Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

error when trying to lower .cu file, should I report this here or on LLVM repo issues? #403

Open
nyck33 opened this issue Apr 24, 2024 · 0 comments

Comments

@nyck33
Copy link

nyck33 commented Apr 24, 2024

I read this: #320 and tried to construct a command to lower my .cu file into LLVM IR.

The result:

nyck33@lenovo-gtx1650:/mnt/d/LLVM/Lean/LLVM$ cgeist cudaComputePortfolioRisk.cu --function=* -S --resource-dir=$LLVM_BUILD_DIR/lib/clang/18 --cuda-gpu-arch=sm_75 | polygeist-opt --convert-polygeist-to-llvm
warning: CUDA version 12.1 is only partially supported
warning: CUDA version 12.1 is only partially supported
polygeist-opt: /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/Support/Casting.h:566: decltype(auto) llvm::cast(const From&) [with To = mlir::detail::TypedValue<mlir::MemRefType>; From = mlir::Value]: Assertion `isa<To>(Val) && "cast<Ty>() argument of incompatible type!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: polygeist-opt --convert-polygeist-to-llvm
 #0 0x0000557978016ad8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:22
 #1 0x0000557978016ef4 PrintStackTraceSignalHandler(void*) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1
 #2 0x0000557978014341 llvm::sys::RunSignalHandlers() /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/lib/Support/Signals.cpp:105:20
 #3 0x0000557978016370 SignalHandler(int) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #4 0x00007f396dd06520 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #5 0x00007f396dd5a9fc pthread_kill (/usr/lib/x86_64-linux-gnu/libc.so.6+0x969fc)
 #6 0x00007f396dd06476 gsignal (/usr/lib/x86_64-linux-gnu/libc.so.6+0x42476)
 #7 0x00007f396dcec7f3 abort (/usr/lib/x86_64-linux-gnu/libc.so.6+0x287f3)
 #8 0x00007f396dcec71b (/usr/lib/x86_64-linux-gnu/libc.so.6+0x2871b)
 #9 0x00007f396dcfde96 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#10 0x0000557972f0f4fc decltype(auto) llvm::cast<mlir::detail::TypedValue<mlir::MemRefType>, mlir::Value>(mlir::Value const&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/Support/Casting.h:567:42
#11 0x0000557972ed23fa mlir::affine::AffineLoadOp::getMemref() /mnt/d/LLVM/NewPolygeistDir/llvm-project/build/tools/mlir/include/mlir/Dialect/Affine/IR/AffineOps.cpp.inc:1087:1
#12 0x0000557972ec3622 mlir::affine::AffineLoadOp::fold(mlir::affine::AffineLoadOpGenericAdaptor<llvm::ArrayRef<mlir::Attribute>>) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Dialect/Affine/IR/AffineOps.cpp:3178:31
#13 0x0000557972f7db4c mlir::LogicalResult mlir::Op<mlir::affine::AffineLoadOp, mlir::OpTrait::ZeroRegions, mlir::OpTrait::OneResult, mlir::OpTrait::OneTypedResult<mlir::Type>::Impl, mlir::OpTrait::ZeroSuccessors, mlir::OpTrait::AtLeastNOperands<1u>::Impl, mlir::OpTrait::OpInvariants, mlir::BytecodeOpInterface::Trait, mlir::affine::AffineReadOpInterface::Trait, mlir::affine::AffineMapAccessInterface::Trait, mlir::OpTrait::MemRefsNormalizable, mlir::MemoryEffectOpInterface::Trait>::foldSingleResultHook<mlir::affine::AffineLoadOp>(mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/include/mlir/IR/OpDefinition.h:1898:42
#14 0x0000557972f79e3a mlir::Op<mlir::affine::AffineLoadOp, mlir::OpTrait::ZeroRegions, mlir::OpTrait::OneResult, mlir::OpTrait::OneTypedResult<mlir::Type>::Impl, mlir::OpTrait::ZeroSuccessors, mlir::OpTrait::AtLeastNOperands<1u>::Impl, mlir::OpTrait::OpInvariants, mlir::BytecodeOpInterface::Trait, mlir::affine::AffineReadOpInterface::Trait, mlir::affine::AffineMapAccessInterface::Trait, mlir::OpTrait::MemRefsNormalizable, mlir::MemoryEffectOpInterface::Trait>::getFoldHookFn()::'lambda'(mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&)::operator()(mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) const /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/include/mlir/IR/OpDefinition.h:1872:14
#15 0x0000557972f88b3a mlir::LogicalResult llvm::detail::UniqueFunctionBase<mlir::LogicalResult, mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&>::CallImpl<mlir::Op<mlir::affine::AffineLoadOp, mlir::OpTrait::ZeroRegions, mlir::OpTrait::OneResult, mlir::OpTrait::OneTypedResult<mlir::Type>::Impl, mlir::OpTrait::ZeroSuccessors, mlir::OpTrait::AtLeastNOperands<1u>::Impl, mlir::OpTrait::OpInvariants, mlir::BytecodeOpInterface::Trait, mlir::affine::AffineReadOpInterface::Trait, mlir::affine::AffineMapAccessInterface::Trait, mlir::OpTrait::MemRefsNormalizable, mlir::MemoryEffectOpInterface::Trait>::getFoldHookFn()::'lambda'(mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) const>(void*, mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h:221:3
#16 0x0000557972f78c0e llvm::unique_function<mlir::LogicalResult (mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) const>::operator()(mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) const /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h:409:3
#17 0x0000557972f73292 mlir::RegisteredOperationName::Model<mlir::affine::AffineLoadOp>::foldHook(mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/include/mlir/IR/OperationSupport.h:539:41
#18 0x0000557977e2c40b mlir::OperationName::foldHook(mlir::Operation*, llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) const /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/include/mlir/IR/OperationSupport.h:267:3
#19 0x0000557977e238c9 mlir::Operation::fold(llvm::ArrayRef<mlir::Attribute>, llvm::SmallVectorImpl<mlir::OpFoldResult>&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/IR/Operation.cpp:614:16
#20 0x0000557977e23a65 mlir::Operation::fold(llvm::SmallVectorImpl<mlir::OpFoldResult>&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/IR/Operation.cpp:635:14
#21 0x0000557977c99e4d mlir::OpBuilder::tryFold(mlir::Operation*, llvm::SmallVectorImpl<mlir::Value>&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/IR/Builders.cpp:480:13
#22 0x00005579777c6c75 (anonymous namespace)::OperationLegalizer::legalizeWithFold(mlir::Operation*, mlir::ConversionPatternRewriter&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:1896:13   
#23 0x00005579777c693f (anonymous namespace)::OperationLegalizer::legalize(mlir::Operation*, mlir::ConversionPatternRewriter&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:1858:16
#24 0x00005579777c7f2f (anonymous namespace)::OperationLegalizer::legalizePatternCreatedOperations(mlir::ConversionPatternRewriter&, mlir::detail::ConversionPatternRewriterImpl&, (anonymous namespace)::RewriterState&, (anonymous namespace)::RewriterState&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:2083:15
#25 0x00005579777c79bb (anonymous namespace)::OperationLegalizer::legalizePatternResult(mlir::Operation*, mlir::Pattern const&, mlir::ConversionPatternRewriter&, (anonymous namespace)::RewriterState&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:2016:13
#26 0x00005579777c71fb (anonymous namespace)::OperationLegalizer::legalizeWithPattern(mlir::Operation*, mlir::ConversionPatternRewriter&)::'lambda1'(mlir::Pattern const&)::operator()(mlir::Pattern const&) const /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:1951:40
#27 0x00005579777d3eba mlir::LogicalResult llvm::function_ref<mlir::LogicalResult (mlir::Pattern const&)>::callback_fn<(anonymous namespace)::OperationLegalizer::legalizeWithPattern(mlir::Operation*, mlir::ConversionPatternRewriter&)::'lambda1'(mlir::Pattern const&)>(long, mlir::Pattern const&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:47:3
#28 0x00005579779fcb07 llvm::function_ref<mlir::LogicalResult (mlir::Pattern const&)>::operator()(mlir::Pattern const&) const /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:69:3
#29 0x00005579779f8ff6 mlir::PatternApplicator::matchAndRewrite(mlir::Operation*, mlir::PatternRewriter&, llvm::function_ref<bool (mlir::Pattern const&)>, llvm::function_ref<void (mlir::Pattern const&)>, llvm::function_ref<mlir::LogicalResult (mlir::Pattern const&)>)::'lambda'()::operator()() const /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Rewrite/PatternApplicator.cpp:214:55
#30 0x00005579779f98ad void llvm::function_ref<void ()>::callback_fn<mlir::PatternApplicator::matchAndRewrite(mlir::Operation*, mlir::PatternRewriter&, llvm::function_ref<bool (mlir::Pattern const&)>, llvm::function_ref<void (mlir::Pattern const&)>, llvm::function_ref<mlir::LogicalResult (mlir::Pattern const&)>)::'lambda'()>(long) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:46:40
#31 0x0000557976d73e54 llvm::function_ref<void ()>::operator()() const /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:62
#32 0x00005579779fcc55 void mlir::MLIRContext::executeAction<mlir::ApplyPatternAction, mlir::Pattern const&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pattern const&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3
#33 0x00005579779f965a mlir::PatternApplicator::matchAndRewrite(mlir::Operation*, mlir::PatternRewriter&, llvm::function_ref<bool (mlir::Pattern const&)>, llvm::function_ref<void (mlir::Pattern const&)>, llvm::function_ref<mlir::LogicalResult (mlir::Pattern const&)>) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Rewrite/PatternApplicator.cpp:227:5
#34 0x00005579777c7412 (anonymous namespace)::OperationLegalizer::legalizeWithPattern(mlir::Operation*, mlir::ConversionPatternRewriter&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:1959:36
#35 0x00005579777c69fd (anonymous namespace)::OperationLegalizer::legalize(mlir::Operation*, mlir::ConversionPatternRewriter&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:1867:16
#36 0x00005579777c913a (anonymous namespace)::OperationConverter::convert(mlir::ConversionPatternRewriter&, mlir::Operation*) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:2378:13
#37 0x00005579777c9633 (anonymous namespace)::OperationConverter::convertOperations(llvm::ArrayRef<mlir::Operation*>, llvm::function_ref<void (mlir::Diagnostic&)>) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:2432:15
#38 0x00005579777cf06a mlir::applyPartialConversion(llvm::ArrayRef<mlir::Operation*>, mlir::ConversionTarget const&, mlir::FrozenRewritePatternSet const&, llvm::DenseSet<mlir::Operation*, llvm::DenseMapInfo<mlir::Operation*, void>>*) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:3400:39
#39 0x00005579777cf0f8 mlir::applyPartialConversion(mlir::Operation*, mlir::ConversionTarget const&, mlir::FrozenRewritePatternSet const&, llvm::DenseSet<mlir::Operation*, llvm::DenseMapInfo<mlir::Operation*, void>>*) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Transforms/Utils/DialectConversion.cpp:3408:1
#40 0x00005579747bfa94 (anonymous namespace)::ConvertPolygeistToLLVMPass::convertModule(mlir::ModuleOp, bool) /mnt/d/LLVM/NewPolygeistDir/lib/polygeist/Passes/ConvertPolygeistToLLVM.cpp:2972:17
#41 0x00005579747c03a3 (anonymous namespace)::ConvertPolygeistToLLVMPass::runOnOperation() /mnt/d/LLVM/NewPolygeistDir/lib/polygeist/Passes/ConvertPolygeistToLLVM.cpp:3025:3
#42 0x0000557977adffcd mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::'lambda0'()::operator()() const /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Pass/Pass.cpp:500:57
#43 0x0000557977ae3caa void llvm::function_ref<void ()>::callback_fn<mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int)::'lambda0'()>(long) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:46:40
#44 0x0000557976d73e54 llvm::function_ref<void ()>::operator()() const /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:62
#45 0x0000557977ae9a2b void mlir::MLIRContext::executeAction<mlir::PassExecutionAction, mlir::Pass&>(llvm::function_ref<void ()>, llvm::ArrayRef<mlir::IRUnit>, mlir::Pass&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/include/mlir/IR/MLIRContext.h:276:3
#46 0x0000557977ae03ef mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Pass/Pass.cpp:509:23
#47 0x0000557977ae06ca mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Pass/Pass.cpp:569:15
#48 0x0000557977ae25f0 mlir::PassManager::runPasses(mlir::Operation*, mlir::AnalysisManager) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Pass/Pass.cpp:880:40
#49 0x0000557977ae2448 mlir::PassManager::run(mlir::Operation*) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Pass/Pass.cpp:860:69
#50 0x0000557974c65003 performActions(llvm::raw_ostream&, std::shared_ptr<llvm::SourceMgr> const&, mlir::MLIRContext*, mlir::MlirOptMainConfig const&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:376:13
#51 0x0000557974c655d7 processBuffer(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, mlir::MlirOptMainConfig const&, mlir::DialectRegistry&, llvm::ThreadPool*) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:431:26
#52 0x0000557974c65711 mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, mlir::DialectRegistry&, mlir::MlirOptMainConfig const&)::'lambda'(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)::operator()(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&) const /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:472:25
#53 0x0000557974c66850 mlir::LogicalResult llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>::callback_fn<mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, mlir::DialectRegistry&, mlir::MlirOptMainConfig const&)::'lambda'(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>(long, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45:52
#54 0x0000557977e8dd93 llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>::operator()(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&) const /mnt/d/LLVM/NewPolygeistDir/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68:12
#55 0x0000557977e8d65d mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>, llvm::raw_ostream&, bool, bool) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Support/ToolUtilities.cpp:28:30
#56 0x0000557974c658c6 mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, mlir::DialectRegistry&, mlir::MlirOptMainConfig const&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:475:31
#57 0x0000557974c65e83 mlir::MlirOptMain(int, char**, llvm::StringRef, mlir::DialectRegistry&) /mnt/d/LLVM/NewPolygeistDir/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:531:13
#58 0x0000557972e8e6e5 main /mnt/d/LLVM/NewPolygeistDir/tools/polygeist-opt/polygeist-opt.cpp:110:22
#59 0x00007f396dcedd90 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x29d90)
#60 0x00007f396dcede40 __libc_start_main (/usr/lib/x86_64-linux-gnu/libc.so.6+0x29e40)
#61 0x0000557972e8e0e5 _start (/mnt/d/LLVM/NewPolygeistDir/build/bin/polygeist-opt+0x41610e5)
Aborted

My .cu file:

#include <cuda.h>

#include <cuda_runtime.h>

// CUDA kernel to perform the matrix multiplication A * B
__global__ void matrixMultiply(double* A, double* B, double* C, int ARows, int ACols, int BCols) {
    int row = blockIdx.y * blockDim.y + threadIdx.y;
    int col = blockIdx.x * blockDim.x + threadIdx.x;

    if (row < ARows && col < BCols) {
        double sum = 0.0;
        for (int k = 0; k < ACols; ++k) {
            sum += A[row * ACols + k] * B[k * BCols + col];
        }
        C[row * BCols + col] = sum;
    }
}

// Host function to initialize memory, call the kernels, and clean up
extern "C" void computeCovarianceMatrix(double* S, double* R, double* Sigma, int sRows, int sCols) {
    double *d_S, *d_R, *d_T, *d_Sigma;

    // Allocate Unified Memory – accessible from CPU or GPU
    cudaMallocManaged(&d_S, sRows * sCols * sizeof(double));
    cudaMallocManaged(&d_R, sCols * sCols * sizeof(double));
    cudaMallocManaged(&d_T, sRows * sCols * sizeof(double)); // Intermediate result
    cudaMallocManaged(&d_Sigma, sRows * sRows * sizeof(double));

    // Copy data into managed memory
    cudaMemcpy(d_S, S, sRows * sCols * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(d_R, R, sCols * sCols * sizeof(double), cudaMemcpyHostToDevice);

    dim3 threadsPerBlock(16, 16);
    dim3 blocksPerGrid1((sCols + 15) / 16, (sRows + 15) / 16);
    dim3 blocksPerGrid2((sRows + 15) / 16, (sRows + 15) / 16);

    // Perform S * R = T
    matrixMultiply<<<blocksPerGrid1, threadsPerBlock>>>(d_S, d_R, d_T, sRows, sCols, sCols);

    // Perform T * S^T = Sigma (assuming S is square and sCols == sRows)
    matrixMultiply<<<blocksPerGrid2, threadsPerBlock>>>(d_T, d_S, d_Sigma, sRows, sCols, sRows);

    // Wait for GPU to finish before accessing on host
    cudaDeviceSynchronize();

    // Copy the result matrix back to the host memory
    cudaMemcpy(Sigma, d_Sigma, sRows * sRows * sizeof(double), cudaMemcpyDeviceToHost);

    // Free the device memory
    cudaFree(d_S);
    cudaFree(d_R);
    cudaFree(d_T);
    cudaFree(d_Sigma);
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant