Workaround for 0 bytes allocation for SYCL device (tensorflow#42)

benoitsteiner · Feb 17, 2017 · 64f69a9 · 64f69a9
1 parent e224438
commit 64f69a9
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 4 deletions.
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
@@ -25,6 +25,9 @@ string SYCLAllocator::Name() { return "device:SYCL"; }
 
 void *SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
   assert(device_);
+  if(num_bytes == 0) {
+    return device_->allocate(1);
+  }
   auto p = device_->allocate(num_bytes);
   return p;
 }

diff --git a/tensorflow/core/kernels/reduction_ops_max.cc b/tensorflow/core/kernels/reduction_ops_max.cc
@@ -66,7 +66,7 @@ REGISTER_KERNEL_BUILDER(
           .TypeConstraint<int32>("Tidx")    \
           .HostMemory("reduction_indices"), \
       ReductionOp<SYCLDevice, type, Eigen::internal::MaxReducer<type>>);
-REGISTER_SYCL_KERNELS(float);
+// REGISTER_SYCL_KERNELS(float);
 #undef REGISTER_SYCL_KERNELS
 
 REGISTER_KERNEL_BUILDER(

diff --git a/tensorflow/core/kernels/stage_op.cc b/tensorflow/core/kernels/stage_op.cc
@@ -99,6 +99,9 @@ REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_CPU), StageOp);
 #if GOOGLE_CUDA
 REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_GPU), StageOp);
 #endif
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_SYCL), StageOp);
+#endif // TENSORFLOW_USE_SYCL
 
 class UnstageOp : public OpKernel {
  public:
@@ -126,5 +129,8 @@ REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_CPU), UnstageOp);
 #if GOOGLE_CUDA
 REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_GPU), UnstageOp);
 #endif
+#ifdef TENSORFLOW_USE_SYCL
+REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_SYCL), UnstageOp);
+#endif // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/stage_op_test.py b/tensorflow/python/kernel_tests/stage_op_test.py
@@ -31,7 +31,7 @@ def testSimple(self):
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         v = 2. * (array_ops.zeros([1024, 1024]) + x)
-      with ops.device('/gpu:0'):
+      with ops.device(test.gpu_device_name()):
         stager = data_flow_ops.StagingArea([dtypes.float32])
         stage = stager.put([v])
         y = stager.get()
@@ -46,7 +46,7 @@ def testMultiple(self):
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         v = 2. * (array_ops.zeros([128, 128]) + x)
-      with ops.device('/gpu:0'):
+      with ops.device(test.gpu_device_name()):
         stager = data_flow_ops.StagingArea([dtypes.float32, dtypes.float32])
         stage = stager.put([x, v])
         z, y = stager.get()
@@ -62,7 +62,7 @@ def testDictionary(self):
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         v = 2. * (array_ops.zeros([128, 128]) + x)
-      with ops.device('/gpu:0'):
+      with ops.device(test.gpu_device_name()):
         stager = data_flow_ops.StagingArea(
             [dtypes.float32, dtypes.float32],
             shapes=[[], [128, 128]],