Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

which case can report "No configurations found." #1879

Open
feixuedudiao opened this issue Apr 26, 2024 · 9 comments
Open

which case can report "No configurations found." #1879

feixuedudiao opened this issue Apr 26, 2024 · 9 comments

Comments

@feixuedudiao
Copy link

Summary

Run the example of convolution with the above code :
// Create execution dnnl::engine.
int isGpu = 1;
engine::kind cpu_kind = engine::kind::cpu;
engine::kind gpu_kind = engine::kind::gpu;

auto cpu_eng = engine(validate_device(cpu_kind), 0);
auto gpu_eng = engine(validate_device(gpu_kind), 0);

// Create dnnl::stream.
dnnl::stream engine_stream(isGpu ? gpu_eng : cpu_eng);

 // Source (src), weights, bias, and destination (dst) tensors
 // dimensions.
 memory::dims src_dims = {1, 32, 56, 96};
 memory::dims weights_dims = {1, 32, 3, 3};
 memory::dims bias_dims = {1};
 memory::dims dst_dims = {1, 1, 56, 96};

 // Strides, padding dimensions.
 memory::dims strides_dims = {1, 1};
 memory::dims padding_dims_l = {1, 1};
 memory::dims padding_dims_r = {1, 1};

 // Allocate buffers.
 std::vector<float> src_data(product(src_dims));
 std::vector<float> weights_data(product(weights_dims));
 std::vector<float> bias_data(1);
 std::vector<float> dst_data(product(dst_dims));

 // Initialize src, weights, and dst tensors.
 std::generate(src_data.begin(), src_data.end(), []() {
     static int i = 0;
     return std::cos(i++ / 10.f);
 });
 std::generate(weights_data.begin(), weights_data.end(), []() {
     static int i = 0;
     return std::sin(i++ * 2.f);
 });
 std::generate(bias_data.begin(), bias_data.end(), []() {
     static int i = 0;
     return std::tanh(float(i++));
 });

 // Create memory objects for tensor data (src, weights, dst). In this
 // example, NCHW layout is assumed for src and dst, and OIHW for weights.
 auto user_src_mem = memory({src_dims, dt::f32, tag::nchw}, isGpu ? gpu_eng : cpu_eng);
 auto user_weights_mem = memory({weights_dims, dt::f32, tag::oihw}, isGpu ? gpu_eng : cpu_eng);
 auto user_dst_mem = memory({dst_dims, dt::f32, tag::nchw}, isGpu ? gpu_eng : cpu_eng);
 auto user_bias_mem = memory({bias_dims, dt::f32, tag::a}, isGpu ? gpu_eng : cpu_eng);

 // Create memory descriptors with format_tag::any for the primitive. This
 // enables the convolution primitive to choose memory layouts for an
 // optimized primitive implementation, and these layouts may differ from the
 // ones provided by the user.
 auto conv_src_md = memory::desc(src_dims, dt::f16, tag::any);
 auto conv_weights_md = memory::desc(weights_dims, dt::f16, tag::any);
 auto conv_dst_md = memory::desc(dst_dims, dt::f16, tag::any);

 // Create memory descriptor and memory object for input bias.
 auto user_bias_md = memory::desc(bias_dims, dt::f16, tag::a);


 // Write data to memory object's handle.
 write_to_dnnl_memory(src_data.data(), user_src_mem);
 write_to_dnnl_memory(weights_data.data(), user_weights_mem);
 write_to_dnnl_memory(bias_data.data(), user_bias_mem);

 // Create primitive post-ops (ReLU).
 const float alpha = 0.f;
 const float beta = 0.f;
 post_ops conv_ops;
 conv_ops.append_eltwise(algorithm::eltwise_relu, alpha, beta);
 primitive_attr conv_attr;
 conv_attr.set_post_ops(conv_ops);

 // Create primitive descriptor.
 auto conv_pd = convolution_forward::primitive_desc(isGpu ? gpu_eng : cpu_eng,
         prop_kind::forward_inference, algorithm::convolution_direct,
         conv_src_md, conv_weights_md, user_bias_md, conv_dst_md,
         strides_dims, padding_dims_l, padding_dims_r, conv_attr);

 // For now, assume that the src, weights, and dst memory layouts generated
 // by the primitive and the ones provided by the user are identical.
 auto conv_src_mem = user_src_mem;
 auto conv_weights_mem = user_weights_mem;
 auto conv_bias_mem = user_bias_mem;
 auto conv_dst_mem = user_dst_mem;

 // Reorder the data in case the src and weights memory layouts generated by
 // the primitive and the ones provided by the user are different. In this
 // case, we create additional memory objects with internal buffers that will
 // contain the reordered data. The data in dst will be reordered after the
 // convolution computation has finalized.
 if (conv_pd.src_desc() != user_src_mem.get_desc()) {
     conv_src_mem = memory(conv_pd.src_desc(), isGpu ? gpu_eng : cpu_eng);
     reorder(user_src_mem, conv_src_mem)
             .execute(engine_stream, user_src_mem, conv_src_mem);
 }

 if (conv_pd.weights_desc() != user_weights_mem.get_desc()) {
     conv_weights_mem = memory(conv_pd.weights_desc(), isGpu ? gpu_eng : cpu_eng);
     reorder(user_weights_mem, conv_weights_mem)
             .execute(engine_stream, user_weights_mem, conv_weights_mem);
 }

if (conv_pd.bias_desc() != user_bias_mem.get_desc())
{
    conv_bias_mem = memory(conv_pd.bias_desc(), isGpu ? gpu_eng : cpu_eng);
    auto bias_rd = reorder(user_bias_mem, conv_bias_mem);
    bias_rd.execute(engine_stream, user_bias_mem, conv_bias_mem);
}

 if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
     conv_dst_mem = memory(conv_pd.dst_desc(), isGpu ? gpu_eng : cpu_eng);
 }

 // Create the primitive.
 auto conv_prim = convolution_forward(conv_pd);

 // Primitive arguments.
 std::unordered_map<int, memory> conv_args;
 conv_args.insert({DNNL_ARG_SRC, conv_src_mem});
 conv_args.insert({DNNL_ARG_WEIGHTS, conv_weights_mem});
 conv_args.insert({DNNL_ARG_BIAS, user_bias_mem});
 conv_args.insert({DNNL_ARG_DST, conv_dst_mem});

 // Primitive execution: convolution with ReLU.
 conv_prim.execute(engine_stream, conv_args);

 // Reorder the data in case the dst memory descriptor generated by the
 // primitive and the one provided by the user are different.
 if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
     reorder(conv_dst_mem, user_dst_mem)
             .execute(engine_stream, conv_dst_mem, user_dst_mem);
 } else
     user_dst_mem = conv_dst_mem;

 // Wait for the computation to finalize.
 engine_stream.wait();

 // Read data from memory object's handle.
 read_from_dnnl_memory(dst_data.data(), user_dst_mem);

Environment

windows 10 cpu is i7-10700 @2.90GHz Compiler vs2022 with DNN_GPU_RUNTIME=ocl

Version

v3.3.6

Problem

It reported "No configurations found" when the code is running "auto conv_prim = convolution_forward(conv_pd);",
????????. . .onednn_verbose,info,oneDNN v3.3.6 (commit N/A)
onednn_verbose,info,cpu,runtime:sequential,nthr:1
onednn_verbose,info,cpu,isa:Intel AVX2
onednn_verbose,info,gpu,runtime:OpenCL
onednn_verbose,info,gpu,engine,0,name:Intel(R) UHD Graphics 630,driver_version:31.0.101,binary_kernels:enabled
onednn_verbose,primitive,info,template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time
onednn_verbose,primitive,exec,gpu,reorder,jit:ir,undef,src_f32::blocked:abcd::f0 dst_f16::blocked:aBcd16b::f0,,,1x32x56x96,0.7471
onednn_verbose,primitive,exec,gpu,reorder,jit:ir,undef,src_f32::blocked:abcd::f0 dst_f16:p:blocked:ABcd16b8a::f0,,,4x32x3x3,0.6235
onednn_verbose,primitive,exec,gpu,reorder,jit:ir,undef,src_f32::blocked:a::f0 dst_f16::blocked:a::f0,,,4,0.5051
Assertion !params_gen_.is_empty() failed at F:\Share\OneDNN\v3.3.6\oneDNN-3.3.6_no_verbose\src\gpu\jit\conv\tiler.cpp:2373
No configurations found.

Note

I test the data type with bf16, it not crashed.

@yehudaorel
Copy link

@feixuedudiao can you verify your OpenCL driver version?

adding @kealan-barbieri, @echeresh for input

@kealan-barbieri
Copy link
Contributor

This is due to limited support for gen9 platforms in the optimized conv implementation. I submitted a small PR to provide a workaround for such cases.

@feixuedudiao
Copy link
Author

@yehudaorel. Thanks. The driver version is "31.0.101.2125".

@feixuedudiao
Copy link
Author

feixuedudiao commented May 6, 2024

@kealan-barbieri. Thank you, but how can i get this version code?

@vpirogov
Copy link
Member

vpirogov commented May 7, 2024

@kealan-barbieri, @feixuedudiao, I'd like to reiterate that support of Intel(R) UHD Graphics 630 is discontinued.

@feixuedudiao
Copy link
Author

feixuedudiao commented May 8, 2024

@vpirogov Thanks. I want to apply oneDNN at HD 630 or lower cpu with fp16 or bf16, which version can support?

@vpirogov
Copy link
Member

vpirogov commented May 8, 2024

oneDNN v3.3.6 is the last version with HD 630 support.

@feixuedudiao
Copy link
Author

feixuedudiao commented May 9, 2024

@vpirogov Ok ,thanks, i try it .

@feixuedudiao
Copy link
Author

feixuedudiao commented May 10, 2024

@vpirogov @kealan-barbieri thank you to help me. But i test this problem, when the data type for f16, the dims of src ,weight, bias and dst are {1,4,224,384} ,{32, 4, 3, 3},{32} and {1,32,112,192}, the kind is gpu, it can not crash at auto conv_prim = convolution_forward(conv_pd). i test version with v3.3.6 and v.3.3.4. The code that is listed below:
/*******************************************************************************

  • Copyright 2020-2022 Intel Corporation
  • Licensed under the Apache License, Version 2.0 (the "License");
  • you may not use this file except in compliance with the License.
  • You may obtain a copy of the License at
  • http://www.apache.org/licenses/LICENSE-2.0
    
  • Unless required by applicable law or agreed to in writing, software
  • distributed under the License is distributed on an "AS IS" BASIS,
  • WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  • See the License for the specific language governing permissions and
  • limitations under the License.
    *******************************************************************************/

/// @example convolution.cpp
/// > Annotated version: @ref convolution_example_cpp
///
/// @page convolution_example_cpp_short
///
/// This C++ API example demonstrates how to create and execute a
/// [Convolution](@ref dev_guide_convolution) primitive in forward propagation
/// mode in two configurations - with and without groups.
///
/// Key optimizations included in this example:
/// - Creation of optimized memory format from the primitive descriptor;
/// - Primitive attributes with fused post-ops.
///
/// @page convolution_example_cpp Convolution Primitive Example
/// @copydetails convolution_example_cpp_short
///
/// @include convolution.cpp

#include
#include
#include
#include
#include
#include

#include "example_utils.hpp"
#include "oneapi/dnnl/dnnl.hpp"

using namespace dnnl;

using tag = memory::format_tag;
using dt = memory::data_type;

void convolution_example(dnnl::engine::kind engine_kind) {

// Create execution dnnl::engine.
dnnl::engine engine(engine_kind, 0);

// Create dnnl::stream.
dnnl::stream engine_stream(engine);

// Tensor dimensions.

// Source (src), weights, bias, and destination (dst) tensors
// dimensions.
/*
memory::dims src_dims = {N, IC, IH, IW};
memory::dims weights_dims = {OC, IC, KH, KW};
memory::dims bias_dims = {OC};
memory::dims dst_dims = {N, OC, OH, OW};

// Strides, padding dimensions.
memory::dims strides_dims = {SH, SW};
memory::dims padding_dims_l = {PH_L, PW_L};
memory::dims padding_dims_r = {PH_R, PW_R};

*/

memory::dims src_dims = {1, 4, 224, 384};
memory::dims weights_dims = {32, 4, 3, 3};
memory::dims bias_dims = {32};
memory::dims dst_dims = {1, 32, 112, 192};

// Strides, padding dimensions.
memory::dims strides_dims = {2, 2};
memory::dims padding_dims_l = {1, 1};
memory::dims padding_dims_r = {1, 1};

/*
memory::dims src_dims = {1, 32, 56, 96};
memory::dims weights_dims = {1, 32, 3, 3};
memory::dims bias_dims = {1};
memory::dims dst_dims = {1, 1, 56, 96};

// Strides, padding dimensions.
memory::dims strides_dims = {1, 1};
memory::dims padding_dims_l = {1, 1};
memory::dims padding_dims_r = {1, 1};

*/
// Allocate buffers.
std::vector src_data(product(src_dims));
std::vector weights_data(product(weights_dims));
std::vector bias_data(product(bias_dims));
std::vector dst_data(product(dst_dims));

// Initialize src, weights, and dst tensors.
std::generate(src_data.begin(), src_data.end(), []() {
    static int i = 0;
    return std::cos(i++ / 10.f);
});
std::generate(weights_data.begin(), weights_data.end(), []() {
    static int i = 0;
    return std::sin(i++ * 2.f);
});
std::generate(bias_data.begin(), bias_data.end(), []() {
    static int i = 0;
    return std::tanh(float(i++));
});


//memset(&src_data[0], 0, src_data.size() / 3 * sizeof(float));

// Create memory objects for tensor data (src, weights, dst). In this
// example, NCHW layout is assumed for src and dst, and OIHW for weights.
auto user_src_mem = memory({src_dims, dt::f32, tag::nchw}, engine);
auto user_weights_mem = memory({weights_dims, dt::f32, tag::oihw}, engine);
auto user_dst_mem = memory({dst_dims, dt::f32, tag::nchw}, engine);

std::vector<std::unordered_map<int, memory>> conv_args;
std::vector<primitive> convs;

// Create memory descriptors with format_tag::any for the primitive. This
// enables the convolution primitive to choose memory layouts for an
// optimized primitive implementation, and these layouts may differ from the
// ones provided by the user.
auto conv_src_md = memory::desc(src_dims, dt::f16, tag::any);
auto conv_weights_md = memory::desc(weights_dims, dt::f16, tag::any);
auto conv_dst_md = memory::desc(dst_dims, dt::f16, tag::any);

// Create memory descriptor and memory object for input bias.
auto user_bias_md = memory::desc(bias_dims, dt::f16, tag::a);
auto user_bias_mem = memory(user_bias_md, engine);

// Write data to memory object's handle.
write_to_dnnl_memory(src_data.data(), user_src_mem);
write_to_dnnl_memory(weights_data.data(), user_weights_mem);
write_to_dnnl_memory(bias_data.data(), user_bias_mem);

// Create primitive post-ops (ReLU).
const float alpha = 0.166667f;
const float beta = 0.5f;
post_ops conv_ops;
//conv_ops.append_eltwise(algorithm::eltwise_hardswish, alpha, beta);
primitive_attr conv_attr;
conv_attr.set_post_ops(conv_ops);

// Create primitive descriptor.
/*
auto conv_pd = convolution_forward::primitive_desc(engine,
        prop_kind::forward_training, algorithm::convolution_direct,
        conv_src_md, conv_weights_md, user_bias_md, conv_dst_md,
        strides_dims, padding_dims_l, padding_dims_r, conv_attr);

*/
try{
convolution_forward::primitive_desc(engine,
prop_kind::forward_inference, algorithm::convolution_direct,
conv_src_md, conv_weights_md, user_bias_md, conv_dst_md,
strides_dims, padding_dims_l, padding_dims_r, conv_attr);
}
catch(error &e)
{
if (e.status == dnnl_unimplemented)
{
std::cout << " No bf16 or f32 convolution implementation is available for this platform !" << std::endl;
return ;
}

}

auto conv_pd = convolution_forward::primitive_desc(engine,
prop_kind::forward_inference, algorithm::convolution_direct,
conv_src_md, conv_weights_md, user_bias_md, conv_dst_md,
strides_dims, padding_dims_l, padding_dims_r, conv_attr);

// For now, assume that the src, weights, and dst memory layouts generated
// by the primitive and the ones provided by the user are identical.
auto conv_src_mem = user_src_mem;
auto conv_weights_mem = user_weights_mem;
auto conv_dst_mem = user_dst_mem;

// Reorder the data in case the src and weights memory layouts generated by
// the primitive and the ones provided by the user are different. In this
// case, we create additional memory objects with internal buffers that will
// contain the reordered data. The data in dst will be reordered after the
// convolution computation has finalized.
/*
if (conv_pd.src_desc() != user_src_mem.get_desc()) {
    conv_src_mem = memory(conv_pd.src_desc(), engine);
    auto rd = reorder(user_src_mem, conv_src_mem);
    convs.push_back(rd);
    conv_args.push_back({{DNNL_ARG_FROM, user_src_mem},
                       {DNNL_ARG_TO,  conv_src_mem}});
            //.execute(engine_stream, user_src_mem, conv_src_mem);
}
*/


if (conv_pd.weights_desc() != user_weights_mem.get_desc()) {
    conv_weights_mem = memory(conv_pd.weights_desc(), engine);
    reorder(user_weights_mem, conv_weights_mem)
            .execute(engine_stream, user_weights_mem, conv_weights_mem);
}

if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
    conv_dst_mem = memory(conv_pd.dst_desc(), engine);
}

// Create the primitive.
auto conv_prim = convolution_forward(conv_pd);

// Primitive arguments.

//conv_args.insert({DNNL_ARG_SRC, conv_src_mem});
//conv_args.insert({DNNL_ARG_WEIGHTS, conv_weights_mem});
//conv_args.insert({DNNL_ARG_BIAS, user_bias_mem});
//conv_args.insert({DNNL_ARG_DST, conv_dst_mem});
conv_args.push_back({{DNNL_ARG_SRC, conv_src_mem},
                    {DNNL_ARG_WEIGHTS, conv_weights_mem},
                 { DNNL_ARG_BIAS, user_bias_mem},
                { DNNL_ARG_DST, conv_dst_mem}});
// Primitive execution: convolution with ReLU.
//conv_prim.execute(engine_stream, conv_args);
convs.push_back(conv_prim);

auto hd_swish_pd
        = eltwise_forward::primitive_desc(engine, prop_kind::forward_inference,
                algorithm::eltwise_hardswish, conv_dst_mem.get_desc(),
                conv_dst_mem.get_desc(), alpha, beta);

convs.push_back(eltwise_forward(hd_swish_pd));
conv_args.push_back({{DNNL_ARG_SRC, conv_dst_mem},
        {DNNL_ARG_DST, conv_dst_mem}});

for(int i = 0; i < convs.size(); i++)
{
	convs.at(i).execute(engine_stream, conv_args.at(i));
	std::unordered_map<int,memory> dstVecMem = conv_args.at(i);
    std::string resPath = "conv_result_";
     resPath += std::to_string(i);

    resPath += std::to_string(i) + "_";
	int j = 0 ;
    for(auto it = dstVecMem.begin(); it != dstVecMem.end(); it++)
    {
       j = it->first;
       memory dst_mem = it->second;
       std::string fnlResPath = resPath   + std::to_string(j);

       std::ofstream resStream(fnlResPath + ".txt", std::ios::binary);
       int flag = resStream.is_open();
       if (!flag)
       {
           std::cout <<"failed to open file !"<< std::endl;
       }

       const_dnnl_memory_desc_t md;
       dnnl_memory_get_memory_desc(dnnl_memory_t(dst_mem), &md);
       size_t memSz = dnnl_memory_desc_get_size(md);
       std::vector<float> resVecF(memSz / sizeof(float));
       read_from_dnnl_memory(resVecF.data(), dst_mem);

       for (auto v : resVecF) {
        resStream << v << std::endl;
       }
       resStream.close();

    }

}
// Reorder the data in case the dst memory descriptor generated by the
// primitive and the one provided by the user are different.
if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
    reorder(conv_dst_mem, user_dst_mem)
            .execute(engine_stream, conv_dst_mem, user_dst_mem);
} else
    user_dst_mem = conv_dst_mem;

// Wait for the computation to finalize.
//engine_stream.wait();

// Read data from memory object's handle.
read_from_dnnl_memory(dst_data.data(), conv_dst_mem);

/*
std::string resPath = "result_";
std::ofstream resStream(resPath + ".txt", std::ios::binary);
int flag = resStream.is_open();
if (!flag)
{
std::cout <<"failed to open file !"<< std::endl;
}
for (auto v : dst_data) {
resStream << v << std::endl;
}
resStream.close();

std::string inPath = "input_";
std::ofstream inStream(inPath + ".txt", std::ios::binary);
flag = inStream.is_open();
if (!flag)
{
    std::cout <<"failed to open file !"<< std::endl;
}
for (auto v : src_data) {
    inStream << v << std::endl;
}
inStream.close();
*/

}

int main(int argc, char **argv) {

engine::kind kind_cpu = engine::kind::gpu;
convolution_example(kind_cpu);

}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

5 participants