Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compilation error with a nested for and if blocks. #381

Open
ShAlireza opened this issue Apr 17, 2024 · 4 comments
Open

Compilation error with a nested for and if blocks. #381

ShAlireza opened this issue Apr 17, 2024 · 4 comments
Assignees
Labels
bug Something isn't working OpenCL

Comments

@ShAlireza
Copy link
Contributor


Describe the bug

The generated OpenCL code has a syntax error.

How To Reproduce

Following is my kernel in Java.

public static void scan(KernelContext context, IntArray input, IntArray sum) {
    int[] temp = context.allocateIntLocalArray(4 * 128);
    int gid2 = context.globalIdx << 1;
    int group = context.groupIdx;
    int item = context.localIdx;
    int n = context.localGroupSizeX << 1;

    temp[2 * item] = input.get(gid2);
    temp[2 * item + 1] = input.get(gid2 + 1);

    int decale = 1;

    for (int d = n >> 1; d > 0; d = d >> 1) {
        context.localBarrier();
        if (item < d) {
            int ai = decale * ((item << 1) + 1) - 1;
            int bi = decale * ((item << 1) + 2) - 1;
            temp[bi] += temp[ai];
        }
        decale = decale << 1;
    }

    if (item == 0) {
        sum.set(group, temp[n - 1]);
        temp[n - 1] = 0;
    }

    for (int d = 1; d < n; d = d << 1) {
        decale = decale >> 1;
        context.localBarrier();
        if (item < d) {
            int ai = decale * ((item << 1) + 1) - 1;
            int bi = decale * ((item << 1) + 2) - 1;
            int t = temp[ai];
            temp[ai] = temp[bi];
            temp[bi] += t;
        }
    }
    context.localBarrier();

    input.set(gid2, temp[item << 1]);
    input.set(gid2 + 1, temp[(item << 1) + 1]);
}

Expected behavior

Successfully compile the code to OpenCL kernel without any error.

Computing system setup (please complete the following information):

  • OS: Ubuntu 22.04
  • v1.0.3

Additional context

This is the generated OpenCL code which is wrong.

#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
__kernel void scan(__global long *_kernel_context, __constant uchar *_constant_region, __local uchar *_local_region, __global int *_atomics, __global uchar *input, __global uchar *sum)
{
  ulong ul_8, ul_43, ul_1, ul_0, ul_15;
  long l_14, l_13, l_42, l_41, l_7, l_6;
  int i_25, i_27, i_28, i_29, i_30, i_31, i_32, i_17, i_18, i_19, i_20, i_21, i_22, i_23, i_24, i_9, i_10, i_11, i_12, i_16, i_3, i_4, i_5, i_57, i_58, i_59, i_60, i_49, i_50, i_51, i_52, i_53, i_54, i_55, i_56, i_44, i_45, i_46, i_47, i_33, i_34, i_35, i_37, i_38, i_39, i_40;
  bool b_36, b_26, b_48;

  // BLOCK 0
  ul_0  =  (ulong) input;
  ul_1  =  (ulong) sum;
  __local int adi_2[512];
  i_3  =  get_global_id(0);
  i_4  =  i_3 << 1;
  i_5  =  i_4 + 6;
  l_6  =  (long) i_5;
  l_7  =  l_6 << 2;
  ul_8  =  ul_0 + l_7;
  i_9  =  *((__global int *) ul_8);
  i_10  =  get_local_id(0);
  i_11  =  i_10 << 1;
  adi_2[i_11]  =  i_9;
  i_12  =  i_4 + 7;
  l_13  =  (long) i_12;
  l_14  =  l_13 << 2;
  ul_15  =  ul_0 + l_14;
  i_16  =  *((__global int *) ul_15);
  i_17  =  i_11 + 1;
  adi_2[i_17]  =  i_16;
  i_18  =  i_11 + 2;
  i_19  =  get_local_size(0);
  i_20  =  i_19 << 1;
  i_21  =  i_20 >> 1;
  // BLOCK 1 MERGES [0 5 ]
  i_22  =  1;
  i_23  =  i_21;
  for(;i_23 >= 1;)
  {
    // BLOCK 2
    barrier(CLK_LOCAL_MEM_FENCE);
    i_24  =  i_23 >> 1;
    i_25  =  i_22 << 1;
    b_26  =  i_10 < i_23;
    if(b_26)
    {
      // BLOCK 3
      i_27  =  i_18 * i_22;
      i_28  =  i_27 + -1;
      i_29  =  adi_2[i_28];
      i_30  =  i_22 * i_17;
      i_31  =  i_30 + -1;
      i_32  =  adi_2[i_31];
      i_33  =  i_29 + i_32;
      adi_2[i_28]  =  i_33;
    }  // B3
    else
    {
      // BLOCK 4
    }  // B4
    // BLOCK 5 MERGES [4 3 ]
    i_34  =  i_25;
    i_35  =  i_24;
    i_22  =  i_34;
    i_23  =  i_35;
  }  // B5
  // BLOCK 6
  b_36  =  i_10 == 0;
  if(b_36)
  {
    // BLOCK 7
    i_37  =  i_20 + -1;
    i_38  =  adi_2[i_37];
    i_39  =  get_group_id(0);
    i_40  =  i_39 + 6;
    l_41  =  (long) i_40;
    l_42  =  l_41 << 2;
    ul_43  =  ul_1 + l_42;
    *((__global int *) ul_43)  =  i_38;
    adi_2[i_37]  =  0;
  }  // B7
  else
  {
    // BLOCK 8
  }  // B8
  // BLOCK 9 MERGES [8 7 ]
  // BLOCK 10 MERGES [9 14 ]
  i_44  =  i_22;
  i_45  =  1;
  for(;i_45 < i_20;)
  {
    // BLOCK 11
    barrier(CLK_LOCAL_MEM_FENCE);
    i_46  =  i_45 << 1;
    i_47  =  i_44 >> 1;
    b_48  =  i_10 < i_45;
    if(b_48)
    {
      // BLOCK 12
      i_49  =  i_47 * i_17;
      i_50  =  i_49 + -1;
      i_51  =  adi_2[i_50];
      i_52  =  i_18 * i_47;
      i_53  =  i_52 + -1;
      i_54  =  adi_2[i_53];
      adi_2[i_50]  =  i_54;
      i_55  =  adi_2[i_53];
      i_56  =  i_51 + i_55;
      adi_2[i_53]  =  i_56;
    }  // B12
    else
    {
      // BLOCK 13
    }  // B13
    // BLOCK 14 MERGES [13 12 ]
    i_57  =  i_47;
    i_58  =  i_46;
    i_44  =  i_57;
    i_45  =  i_58;
  }  // B14
  // BLOCK 15
  barrier(CLK_LOCAL_MEM_FENCE);
  i_59  =  adi_2[i_11];
  *((__global int *) ul_8)  =  i_59;
  i_60  =  adi_2[i_17];
  *((__global int *) ul_15)  =  i_60;
  return;
}  // B15
}  //  kernel

@jjfumero jjfumero added bug Something isn't working OpenCL labels Apr 17, 2024
@jjfumero
Copy link
Member

Thank you for the report. We will work on this.

@mikepapadim mikepapadim self-assigned this Apr 18, 2024
@mikepapadim
Copy link
Member

@ShAlireza can you also provide the exact input sizes that you run the scan kernel? Thanks

@ShAlireza
Copy link
Contributor Author

Sure.
IntArray input = new IntArray(256 * 128 * 4);
IntArray sum = new IntArray(256);

And here is my GridScheduler:
int totalLocalScanItems = 256 * 128 * 4 / 2;
int localItems = totalLocalScanItems / 256;

    WorkerGrid scanWorker = new WorkerGrid1D(totalLocalScanItems);
    scanWorker.setLocalWork(localItems, 1, 1);

@mikepapadim
Copy link
Member

Thank you @ShAlireza, I managed to reprorduce it locally.
We will work on the issue and we will get back to you.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working OpenCL
Projects
Status: No status
Development

No branches or pull requests

3 participants