Skip to content

Commit

Permalink
Merge pull request #434 from mwarusz/precision
Browse files Browse the repository at this point in the history
avoiding float to double promotion
  • Loading branch information
pdziekan committed Mar 19, 2019
2 parents a07596d + b79e7f3 commit 51b6f8b
Show file tree
Hide file tree
Showing 28 changed files with 233 additions and 195 deletions.
13 changes: 6 additions & 7 deletions libmpdata++/concurr/detail/sharedmem.hpp
Expand Up @@ -32,9 +32,8 @@ namespace libmpdataxx
static_assert(n_dims > 0, "n_dims <= 0");
static_assert(n_tlev > 0, "n_tlev <= 0");

// TODO: T_sumtype (perhaps worh using double even if summing floats?)
std::unique_ptr<blitz::Array<real_t, 1>> xtmtmp;
std::unique_ptr<blitz::Array<real_t, 1>> sumtmp;
std::unique_ptr<blitz::Array<double, 1>> sumtmp;

protected:

Expand Down Expand Up @@ -93,12 +92,12 @@ namespace libmpdataxx
throw std::runtime_error("number of subdomains greater than number of gridpoints");

if (n_dims != 1)
sumtmp.reset(new blitz::Array<real_t, 1>(grid_size[0]));
sumtmp.reset(new blitz::Array<double, 1>(grid_size[0]));
xtmtmp.reset(new blitz::Array<real_t, 1>(size));
}

/// @brief concurrency-aware summation of array elements
real_t sum(const arr_t &arr, const idx_t<n_dims> &ijk, const bool sum_khn)
double sum(const arr_t &arr, const idx_t<n_dims> &ijk, const bool sum_khn)
{
// doing a two-step sum to reduce numerical error
// and make parallel results reproducible
Expand All @@ -114,7 +113,7 @@ namespace libmpdataxx
(*sumtmp)(c) = blitz::sum(arr(slice_idx));
}
barrier();
real_t result;
double result;
if (sum_khn)
result = blitz::kahan_sum(*sumtmp);
else
Expand All @@ -124,7 +123,7 @@ namespace libmpdataxx
}

/// @brief concurrency-aware summation of a (element-wise) product of two arrays
real_t sum(const arr_t &arr1, const arr_t &arr2, const idx_t<n_dims> &ijk, const bool sum_khn)
double sum(const arr_t &arr1, const arr_t &arr2, const idx_t<n_dims> &ijk, const bool sum_khn)
{
// doing a two-step sum to reduce numerical error
// and make parallel results reproducible
Expand All @@ -140,7 +139,7 @@ namespace libmpdataxx
(*sumtmp)(c) = blitz::sum(arr1(slice_idx) * arr2(slice_idx));
}
barrier();
real_t result;
double result;
if (sum_khn)
result = blitz::kahan_sum(*sumtmp);
else
Expand Down
6 changes: 6 additions & 0 deletions libmpdata++/formulae/common.hpp
Expand Up @@ -13,6 +13,12 @@ namespace libmpdataxx
{
namespace formulae
{
// helper to cast floating literals to correct precision based on the blitz array underlaying type
template<class arr_t>
constexpr auto fconst(const double v)
{
return static_cast<typename arr_t::T_numtype>(v);
}
// overloads of abs/min/max/where that pick out the correct version based on ix_t
template<class ix_t, class arg_t>
forceinline_macro auto abs(const arg_t &a, typename std::enable_if<std::is_same<ix_t, int>::value>::type* = 0)
Expand Down
6 changes: 3 additions & 3 deletions libmpdata++/formulae/mpdata/formulae_mpdata_1d.hpp
Expand Up @@ -130,7 +130,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 1.0 / 24 *
- fconst<arr_1d_t>(1.0 / 24) *
(
4 * GC[0](i+h) * ndxx_psi<opts>(psi, i)
+ 2 * ndx_psi<opts>(psi, i) * ndx_GC0(GC[0], i)
Expand Down Expand Up @@ -170,9 +170,9 @@ namespace libmpdataxx
// spatial terms
+ div_3rd_spatial<opts, sptl_intrp>(psi, GC, G, i)
// mixed terms
+ 0.5 * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)
+ fconst<arr_1d_t>(0.5) * abs(GC[0](i+h)) * ndx_fdiv<opts>(psi, GC, G, i)
// temporal terms
+ 1.0 / 24 *
+ fconst<arr_1d_t>(1.0 / 24) *
(
- 8 * GC[0](i+h) * nfdiv_fdiv<opts>(psi, GC, G, i)
+ div_3rd_temporal<opts, tmprl_extrp>(psi, ndtt_GC, i)
Expand Down
10 changes: 5 additions & 5 deletions libmpdata++/formulae/mpdata/formulae_mpdata_2d.hpp
Expand Up @@ -145,7 +145,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 1.0 / 24 *
- fconst<arr_2d_t>(1.0 / 24) *
(
4 * GC[dim](pi<dim>(i+h, j)) * ndxx_psi<opts, dim>(psi, i, j)
+ 2 * ndx_psi<opts, dim>(psi, i, j) * ndx_GC0<dim>(GC[dim], i, j)
Expand Down Expand Up @@ -193,9 +193,9 @@ namespace libmpdataxx
// spatial terms
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j)
// mixed terms
+ 0.5 * abs(GC[dim](pi<dim>(i+h, j))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j)
+ fconst<arr_2d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j)
// temporal terms
+ 1.0 / 24 *
+ fconst<arr_2d_t>(1.0 / 24) *
(
- 8 * GC[dim](pi<dim>(i+h, j)) * nfdiv_fdiv<opts, dim>(psi_np1, GC, G, i, j)
+ div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j)
Expand Down Expand Up @@ -226,9 +226,9 @@ namespace libmpdataxx
// spatial terms
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j)
// mixed terms
- 0.5 * abs(GC[dim](pi<dim>(i+h, j))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j)
- fconst<arr_2d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j)
// temporal terms
+ 1.0 / 24 *
+ fconst<arr_2d_t>(1.0 / 24) *
(
+ 8 * GC[dim](pi<dim>(i+h, j)) * nfdiv_dt<opts, dim>(psi_np1, psi_n, GC, G, i, j)
+ 1 * ndtt_GC0<opts, dim>(psi_np1, ndtt_GC[dim], i, j)
Expand Down
10 changes: 5 additions & 5 deletions libmpdata++/formulae/mpdata/formulae_mpdata_3d.hpp
Expand Up @@ -154,7 +154,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 1.0 / 24 *
- fconst<arr_3d_t>(1.0 / 24) *
(
4 * GC[dim](pi<dim>(i+h, j, k)) * ndxx_psi<opts, dim>(psi, i, j, k)
+ 2 * ndx_psi<opts, dim>(psi, i, j, k) * ndx_GC0<dim>(GC[dim], i, j, k)
Expand Down Expand Up @@ -204,9 +204,9 @@ namespace libmpdataxx
// spatial terms
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j, k)
// mixed terms
+ 0.5 * abs(GC[dim](pi<dim>(i+h, j, k))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
+ fconst<arr_3d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j, k))) * ndx_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
// temporal terms
+ 1.0 / 24 *
+ fconst<arr_3d_t>(1.0 / 24) *
(
- 8 * GC[dim](pi<dim>(i+h, j, k)) * nfdiv_fdiv<opts, dim>(psi_np1, GC, G, i, j, k)
+ div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j, k)
Expand Down Expand Up @@ -238,9 +238,9 @@ namespace libmpdataxx
// spatial terms
+ div_3rd_spatial<opts, dim, sptl_intrp>(psi_np1, GC, G, i, j, k)
// mixed terms
- 0.5 * abs(GC[dim](pi<dim>(i+h, j, k))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j, k)
- fconst<arr_3d_t>(0.5) * abs(GC[dim](pi<dim>(i+h, j, k))) * ndtx_psi<opts, dim>(psi_np1, psi_n, i, j, k)
// temporal terms
+ 1.0 / 24 *
+ fconst<arr_3d_t>(1.0 / 24) *
(
+ 8 * GC[dim](pi<dim>(i+h, j, k)) * nfdiv_dt<opts, dim>(psi_np1, psi_n, GC, G, i, j, k)
+ div_3rd_temporal<opts, dim, tmprl_extrp>(psi_np1, ndtt_GC, i, j, k)
Expand Down
4 changes: 4 additions & 0 deletions libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp
Expand Up @@ -40,6 +40,10 @@ namespace libmpdataxx
using idxperm::pi;
using opts::opts_t;
using std::abs;

using blitz::pow2;
using blitz::pow3;
using blitz::pow4;

const int n_tlev = 2;

Expand Down
6 changes: 3 additions & 3 deletions libmpdata++/formulae/mpdata/formulae_mpdata_dfl_1d.hpp
Expand Up @@ -38,7 +38,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 0.5 * GC(i+h)
- fconst<arr_1d_t>(0.5) * GC(i+h)
/
(formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))
*
Expand All @@ -56,13 +56,13 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 0.5 * GC(i+h)
- fconst<arr_1d_t>(0.5) * GC(i+h)
/
(formulae::G<opts>(G, i+1) + formulae::G<opts>(G, i))
*
(GC((i+1)+h) - GC(i-h))
*
0.5 * (psi(i+1) + psi(i)) //to be compatible with iga formulation
fconst<arr_1d_t>(0.5) * (psi(i+1) + psi(i)) //to be compatible with iga formulation
);
}
} // namespace mpdata
Expand Down
4 changes: 2 additions & 2 deletions libmpdata++/formulae/mpdata/formulae_mpdata_dfl_2d.hpp
Expand Up @@ -42,7 +42,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 0.25 * GC[dim](pi<dim>(i+h, j))
- fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))
/
G_bar_x<opts, dim>(G, i, j)
*
Expand Down Expand Up @@ -74,7 +74,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 0.25 * GC[dim](pi<dim>(i+h, j))
- fconst<arr_2d_t>(0.25) * GC[dim](pi<dim>(i+h, j))
/
G_bar_x<opts, dim>(G, i, j)
*
Expand Down
4 changes: 2 additions & 2 deletions libmpdata++/formulae/mpdata/formulae_mpdata_dfl_3d.hpp
Expand Up @@ -44,7 +44,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 0.25 * GC[dim](pi<dim>(i+h, j, k))
- fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))
/
G_bar_x<opts, dim>(G, i, j, k)
*
Expand Down Expand Up @@ -84,7 +84,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
- 0.25 * GC[dim](pi<dim>(i+h, j, k))
- fconst<arr_3d_t>(0.25) * GC[dim](pi<dim>(i+h, j, k))
/
G_bar_x<opts, dim>(G, i, j, k)
*
Expand Down
2 changes: 1 addition & 1 deletion libmpdata++/formulae/mpdata/formulae_mpdata_hot_1d.hpp
Expand Up @@ -26,7 +26,7 @@ namespace libmpdataxx
return return_helper<ix_t>(
(
3 * GC(i+h) * abs(GC(i+h)) / G_bar_x<opts>(G, i)
- 2 * pow(GC(i+h), 3) / pow(G_bar_x<opts>(G, i), 2)
- 2 * pow3(GC(i+h)) / pow2(G_bar_x<opts>(G, i))
- GC(i+h)
) / 6
);
Expand Down
22 changes: 11 additions & 11 deletions libmpdata++/formulae/mpdata/formulae_mpdata_hot_2d.hpp
Expand Up @@ -29,7 +29,7 @@ namespace libmpdataxx
return return_helper<ix_t>(
(
3 * GC(pi<dim>(i+h, j)) * abs(GC(pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j)
- 2 * pow(GC(pi<dim>(i+h, j)), 3) / pow(G_bar_x<opts, dim>(G, i, j), 2)
- 2 * pow3(GC(pi<dim>(i+h, j))) / pow2(G_bar_x<opts, dim>(G, i, j))
- GC(pi<dim>(i+h, j))
) / 6
);
Expand All @@ -44,7 +44,7 @@ namespace libmpdataxx
)
{
return return_helper<ix_t>(
(abs(GC[dim](pi<dim>(i+h, j))) - 2 * pow(GC[dim](pi<dim>(i+h, j)), 2) / G_bar_x<opts, dim>(G, i, j))
(abs(GC[dim](pi<dim>(i+h, j))) - 2 * pow2(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j))
* GC1_bar_xy<dim>(GC[dim+1], i, j) / (2 * G_bar_x<opts, dim>(G, i, j))
);
}
Expand Down Expand Up @@ -90,9 +90,9 @@ namespace libmpdataxx
{
return return_helper<ix_t>(
(
6 * abs(GC(pi<dim>(i+h, j))) * pow(GC(pi<dim>(i+h, j)), 2) / pow(G_bar_x<opts, dim>(G, i, j), 2)
- 3 * pow(GC(pi<dim>(i+h, j)), 2) / G_bar_x<opts, dim>(G, i, j)
- 3 * pow(GC(pi<dim>(i+h, j)), 4) / pow(G_bar_x<opts, dim>(G, i, j), 3)
6 * abs(GC(pi<dim>(i+h, j))) * pow2(GC(pi<dim>(i+h, j))) / pow2(G_bar_x<opts, dim>(G, i, j))
- 3 * pow2(GC(pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j)
- 3 * pow4(GC(pi<dim>(i+h, j))) / pow3(G_bar_x<opts, dim>(G, i, j))
) / 2
);
}
Expand All @@ -107,9 +107,9 @@ namespace libmpdataxx
{
return return_helper<ix_t>(
3 *
(abs(GC[dim](pi<dim>(i+h, j))) - pow(GC[dim](pi<dim>(i+h, j)), 2) / G_bar_x<opts, dim>(G, i, j))
(abs(GC[dim](pi<dim>(i+h, j))) - pow2(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j))
* GC[dim](pi<dim>(i+h, j)) * GC1_bar_xy<dim>(GC[dim-1], i, j)
/ pow(G_bar_x<opts, dim>(G, i, j), 2)
/ pow2(G_bar_x<opts, dim>(G, i, j))
);
}

Expand All @@ -124,11 +124,11 @@ namespace libmpdataxx
return return_helper<ix_t>(
3 *
(
3 * abs(GC1_bar_xy<dim>(GC[dim-1], i, j)) * pow(GC[dim](pi<dim>(i+h, j)), 2) / G_bar_x<opts, dim>(G, i, j)
+ 3 * pow(GC1_bar_xy<dim>(GC[dim-1], i, j), 2) * abs(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j)
3 * abs(GC1_bar_xy<dim>(GC[dim-1], i, j)) * pow2(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j)
+ 3 * pow2(GC1_bar_xy<dim>(GC[dim-1], i, j)) * abs(GC[dim](pi<dim>(i+h, j))) / G_bar_x<opts, dim>(G, i, j)
- 2 * abs(GC1_bar_xy<dim>(GC[dim-1], i, j)) * abs(GC[dim](pi<dim>(i+h, j)))
- 9 * pow(GC1_bar_xy<dim>(GC[dim-1], i, j), 2) * pow(GC[dim](pi<dim>(i+h, j)), 2)
/ pow(G_bar_x<opts, dim>(G, i, j), 2)
- 9 * pow2(GC1_bar_xy<dim>(GC[dim-1], i, j)) * pow2(GC[dim](pi<dim>(i+h, j)))
/ pow2(G_bar_x<opts, dim>(G, i, j))
) / G_bar_x<opts, dim>(G, i, j) / 4
);
}
Expand Down
8 changes: 4 additions & 4 deletions libmpdata++/formulae/mpdata/formulae_mpdata_hot_3d.hpp
Expand Up @@ -30,7 +30,7 @@ namespace libmpdataxx
return return_helper<ix_t>(
(
3 * GC(pi<dim>(i+h, j, k)) * abs(GC(pi<dim>(i+h, j, k))) / G_bar_x<opts, dim>(G, i, j, k)
- 2 * pow(GC(pi<dim>(i+h, j, k)), 3) / pow(G_bar_x<opts, dim>(G, i, j, k), 2)
- 2 * pow3(GC(pi<dim>(i+h, j, k))) / pow2(G_bar_x<opts, dim>(G, i, j, k))
- GC(pi<dim>(i+h, j, k))
) / 6
);
Expand All @@ -48,7 +48,7 @@ namespace libmpdataxx
return return_helper<ix_t>(
(
abs(GC[dim](pi<dim>(i+h, j, k)))
- 2 * pow(GC[dim](pi<dim>(i+h, j, k)), 2) / G_bar_x<opts, dim>(G, i, j, k)
- 2 * pow2(GC[dim](pi<dim>(i+h, j, k))) / G_bar_x<opts, dim>(G, i, j, k)
) * GC1_bar_xy<dim>(GC[dim+1], i, j, k) / (2 * G_bar_x<opts, dim>(G, i, j, k))
);
}
Expand All @@ -65,7 +65,7 @@ namespace libmpdataxx
return return_helper<ix_t>(
(
abs(GC[dim](pi<dim>(i+h, j, k)))
- 2 * pow(GC[dim](pi<dim>(i+h, j, k)), 2) / G_bar_x<opts, dim>(G, i, j, k)
- 2 * pow2(GC[dim](pi<dim>(i+h, j, k))) / G_bar_x<opts, dim>(G, i, j, k)
) * GC2_bar_xz<dim>(GC[dim-1], i, j, k) / (2 * G_bar_x<opts, dim>(G, i, j, k))
);
}
Expand All @@ -81,7 +81,7 @@ namespace libmpdataxx
{
return return_helper<ix_t>(
-2 * GC[dim](pi<dim>(i+h, j, k)) * GC1_bar_xy<dim>(GC[dim+1], i, j, k) * GC2_bar_xz<dim>(GC[dim-1], i, j, k) / 3
/ pow(G_bar_x<opts, dim>(G, i, j, k), 2)
/ pow2(G_bar_x<opts, dim>(G, i, j, k))
);
}

Expand Down
2 changes: 1 addition & 1 deletion libmpdata++/formulae/mpdata/formulae_mpdata_psi_3d.hpp
Expand Up @@ -933,7 +933,7 @@ namespace libmpdataxx
{
static_assert(!opts::isset(opts, opts::abs), "abs & iga options are mutually exclusive");
return return_helper<ix_t>(
0.5 * (psi_np1(pi<d>(i+1, j, k)) - psi_n(pi<d>(i+1, j, k)) + psi_np1(pi<d>(i, j, k)) - psi_n(pi<d>(i, j, k)))
fconst<arr_3d_t>(0.5) * (psi_np1(pi<d>(i+1, j, k)) - psi_n(pi<d>(i+1, j, k)) + psi_np1(pi<d>(i, j, k)) - psi_n(pi<d>(i, j, k)))
);
}
} // namespace mpdata
Expand Down
16 changes: 8 additions & 8 deletions libmpdata++/formulae/nabla_formulae.hpp
Expand Up @@ -30,7 +30,7 @@ namespace libmpdataxx
(
x(i+1) -
x(i-1)
) / dx / 2.
) / dx / 2
);
}

Expand All @@ -47,7 +47,7 @@ namespace libmpdataxx
(
x(pi<d>(i+1, j)) -
x(pi<d>(i-1, j))
) / dx / 2.
) / dx / 2
);
}

Expand All @@ -65,7 +65,7 @@ namespace libmpdataxx
(
x(pi<d>(i+1, j, k)) -
x(pi<d>(i-1, j, k))
) / dx / 2.
) / dx / 2
);
}

Expand Down Expand Up @@ -174,9 +174,9 @@ namespace libmpdataxx
)
{
return blitz::safeToReturn(
(v[0](ijk[0]+1, ijk[1]) - v[0](ijk[0]-1, ijk[1])) / dijk[0] / 2.
(v[0](ijk[0]+1, ijk[1]) - v[0](ijk[0]-1, ijk[1])) / dijk[0] / 2
+
(v[1](ijk[0], ijk[1]+1) - v[1](ijk[0], ijk[1]-1)) / dijk[1] / 2.
(v[1](ijk[0], ijk[1]+1) - v[1](ijk[0], ijk[1]-1)) / dijk[1] / 2
);
}

Expand All @@ -190,11 +190,11 @@ namespace libmpdataxx
)
{
return blitz::safeToReturn(
(v[0](ijk[0]+1, ijk[1], ijk[2]) - v[0](ijk[0]-1, ijk[1], ijk[2])) / dijk[0] / 2.
(v[0](ijk[0]+1, ijk[1], ijk[2]) - v[0](ijk[0]-1, ijk[1], ijk[2])) / dijk[0] / 2
+
(v[1](ijk[0], ijk[1]+1, ijk[2]) - v[1](ijk[0], ijk[1]-1, ijk[2])) / dijk[1] / 2.
(v[1](ijk[0], ijk[1]+1, ijk[2]) - v[1](ijk[0], ijk[1]-1, ijk[2])) / dijk[1] / 2
+
(v[2](ijk[0], ijk[1], ijk[2]+1) - v[2](ijk[0], ijk[1], ijk[2]-1)) / dijk[2] / 2.
(v[2](ijk[0], ijk[1], ijk[2]+1) - v[2](ijk[0], ijk[1], ijk[2]-1)) / dijk[2] / 2
);
}
} // namespace nabla_op
Expand Down

0 comments on commit 51b6f8b

Please sign in to comment.