Skip to content

Commit

Permalink
Merge pull request tensorflow#25 from ROCmSoftwarePlatform/fix_fdividef
Browse files Browse the repository at this point in the history
fix hcc linking error caused by __fdividef
  • Loading branch information
whchung committed Jun 14, 2018
2 parents 3dcf79b + 412496e commit 5dafc62
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions tensorflow/core/kernels/fused_batch_norm_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ __global__ void InvVarianceToVarianceKernel(int nthreads, double epsilon,
int sample_size, T* variance) {
GPU_1D_KERNEL_LOOP(index, nthreads) {
T inv_var = variance[index];
#if GOOGLE_CUDA
T var = __fdividef(1, inv_var * inv_var) - T(epsilon);
#TODO: fix this in ROCDL or LC
#elif TENSORFLOW_USE_ROCM
T var = 1 / (inv_var * inv_var) - T(epsilon);
#endif
// This is for Bessel's correction
var *= T(sample_size) / T((sample_size > 1) ? sample_size - 1 : 1);
variance[index] = (var > 0) ? var : 0;
Expand Down

0 comments on commit 5dafc62

Please sign in to comment.