Skip to content

Commit f34411a

Browse files
authored
v1.1.0 update (#7)
* updates for v1.1.0 * minor revision * Makefile revision
1 parent 20919d4 commit f34411a

24 files changed

+146
-46
lines changed

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
# FCFC Change Log
22

3+
## [1.1.0] – 2025-06-11
4+
5+
- Fixed an early-return bug affecting tiny catalogs.
6+
- Corrected separation bins of the saved projected correlation data ([PR #4](pull/4)).
7+
- Revised the normalization factor for weighted pair counts ([PR #3](pull/3))
8+
- Added a `Makefile` for benchmark code compilation.
9+
- Fixed a macro typo reported in [Issue #2](issues/2).
10+
- Updated the FCFC paper reference in the README.
11+
312
## [1.0.1] – 2022-06-26
413

514
- Refactorized the implementation of MPI and OpenMP parallelisms.
615
- Optimized MPI communications with custom structs, and fixed the deadlock due to multiple broadcasts.
7-

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,4 @@ FCFC_2PT_BOX: src/fcfc/2pt_box
7373
$(C_CMPLR) $(CFLAGS) -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
7474

7575
clean:
76-
rm $(TARGETS)
76+
rm -f $(TARGETS)

README.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ This program is compliant with the ISO C99 and IEEE POSIX.1-2008 standards, and
3232

3333
FCFC is written by Cheng Zhao (&#36213;&#25104;), and is distributed under the [MIT license](LICENSE.txt). If you use this program in research work that results in publications, please cite the following paper:
3434

35-
> Zhao et al. 2020, [arXiv:2007.08997](https://ui.adsabs.harvard.edu/abs/2020arXiv200708997Z/abstract)
35+
> Zhao, C., 2023, [&ldquo;Fast correlation function calculator: A high-performance pair-counting toolkit&rdquo;](https://doi.org/10.1051/0004-6361/202346015), *Astronomy and Astrophysics*, 672, A83. \[[arXiv:2301.12557](https://arxiv.org/abs/2301.12557)\] \[[ADS Abstract](https://ui.adsabs.harvard.edu/abs/2023A%26A...672A..83Z)\]
3636
3737
<sub>[\[TOC\]](#table-of-contents)</sub>
3838

@@ -81,12 +81,12 @@ The list of available command line options can be consulted using the `-h` or `-
8181

8282
An introduction of the components and the corresponding configuration parameters are listed below:
8383

84-
| Component | Description | Configuration parameters |
85-
|:------------:|-----------------------------------------------------------------|:--------------------------------------:|
86-
| FCFC_2PT | Compute 2PCF for survey-like data | [FCFC_2PT.md](doc/FCFC_2PT.md) |
87-
| FCFC_2PT_BOX | Compute 2PCF for periodic simulation boxes<sup>[*](#tab1)</sup> | [FCFC_2PT_BOX.md](doc/FCFC_2PT_BOX.md) |
84+
| Component | Description | Configuration parameters |
85+
|:--------------:|------------------------------------------------------------------|:----------------------------------------:|
86+
| FCFC\_2PT | Compute 2PCF for survey-like data | [FCFC\_2PT.md](doc/FCFC_2PT.md) |
87+
| FCFC\_2PT\_BOX | Compute 2PCF for periodic simulation boxes<sup>[\*](#tab1)</sup> | [FCFC\_2PT\_BOX.md](doc/FCFC_2PT_BOX.md) |
8888

89-
<span id="tab1">*: treat the 3<sup>rd</sup> dimension (*z*-direction) as the line of sight</span>
89+
<span id="tab1">\*: treat the 3<sup>rd</sup> dimension (*z*-direction) as the line of sight</span>
9090

9191
<sub>[\[TOC\]](#table-of-contents)</sub>
9292

@@ -98,4 +98,3 @@ This program benefits from the following open-source projects:
9898
- [https://github.com/swenson/sort](https://github.com/swenson/sort)
9999

100100
<sub>[\[TOC\]](#table-of-contents)</sub>
101-

benchmark/Makefile

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
CC = gcc
2+
CFLAGS = -std=c99 -D_POSIX_C_SOURCE=200809 -Wall -O3 -march=native
3+
LIBS = -lm
4+
INCL = -Icommon
5+
SRCS = $(wildcard common/*.c)
6+
CFLAGS += -DBENCHMARK_TIMING
7+
8+
TARGETS = BENCHMARK_struct_serial BENCHMARK_struct_avx BENCHMARK_struct_single_serial BENCHMARK_struct_single_avx BENCHMARK_hist_serial BENCHMARK_hist_avx BENCHMARK_hist_avx2 BENCHMARK_hist_single_serial BENCHMARK_hist_single_avx BENCHMARK_hist_single_avx2
9+
10+
# Benchmarks with AVX512
11+
#TARGETS += BENCHMARK_struct_avx512 BENCHMARK_struct_single_avx512 BENCHMARK_hist_avx512 BENCHMARK_hist_single_avx512
12+
13+
# Benchmarks for OpenMP strategy
14+
#TARGETS += BENCHMARK_struct_omp_dynamic BENCHMARK_struct_omp_static
15+
16+
17+
.PHONY: all
18+
all: $(TARGETS)
19+
20+
BENCHMARK_struct_serial: struct
21+
$(CC) $(CFLAGS) -DBENCHMARK_TREE_PREALLOC -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
22+
23+
BENCHMARK_struct_avx: struct
24+
$(CC) $(CFLAGS) -DBENCHMARK_TREE_PREALLOC -DWITH_AVX -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
25+
26+
BENCHMARK_struct_avx512: struct
27+
$(CC) $(CFLAGS) -DBENCHMARK_TREE_PREALLOC -DWITH_AVX512 -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
28+
29+
BENCHMARK_struct_single_serial: struct
30+
$(CC) $(CFLAGS) -DBENCHMARK_TREE_PREALLOC -DSINGLE_PREC -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
31+
32+
BENCHMARK_struct_single_avx: struct
33+
$(CC) $(CFLAGS) -DBENCHMARK_TREE_PREALLOC -DSINGLE_PREC -DWITH_AVX -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
34+
35+
BENCHMARK_struct_single_avx512: struct
36+
$(CC) $(CFLAGS) -DBENCHMARK_TREE_PREALLOC -DSINGLE_PREC -DWITH_AVX512 -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
37+
38+
39+
BENCHMARK_hist_serial: histogram
40+
$(CC) $(CFLAGS) -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
41+
42+
BENCHMARK_hist_avx: histogram
43+
$(CC) $(CFLAGS) -DWITH_AVX -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
44+
45+
BENCHMARK_hist_avx2: histogram
46+
$(CC) $(CFLAGS) -DWITH_AVX2 -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
47+
48+
BENCHMARK_hist_avx512: histogram
49+
$(CC) $(CFLAGS) -DWITH_AVX512 -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
50+
51+
BENCHMARK_hist_single_serial: histogram
52+
$(CC) $(CFLAGS) -DSINGLE_PREC -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
53+
54+
BENCHMARK_hist_single_avx: histogram
55+
$(CC) $(CFLAGS) -DSINGLE_PREC -DWITH_AVX -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
56+
57+
BENCHMARK_hist_single_avx2: histogram
58+
$(CC) $(CFLAGS) -DSINGLE_PREC -DWITH_AVX2 -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
59+
60+
BENCHMARK_hist_single_avx512: histogram
61+
$(CC) $(CFLAGS) -DSINGLE_PREC -DWITH_AVX512 -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
62+
63+
64+
BENCHMARK_struct_omp_dynamic: struct
65+
$(CC) $(CFLAGS) -DBENCHMARK_TIMING -DBENCHMARK_OMP_DYNAMIC -fopenmp -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
66+
67+
BENCHMARK_struct_omp_static: struct
68+
$(CC) $(CFLAGS) -DBENCHMARK_TIMING -DBENCHMARK_OMP_STATIC -fopenmp -o $@ $(SRCS) $(wildcard $</*.c) $(LIBS) $(INCL) -I$<
69+
70+
71+
.PHONY: clean
72+
clean:
73+
rm -f $(TARGETS)

benchmark/common/define_comm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
typedef float real;
5050
#define REAL_TOL 1e-6 /* tolerance for float number comparison */
5151
#ifdef FLT_EPSILON
52-
#define REAL_EPS REAL_EPS
52+
#define REAL_EPS FLT_EPSILON
5353
#else
5454
#define REAL_EPS 1e-7
5555
#endif

benchmark/histogram/benchmark.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,16 @@ int main(int argc, char *argv[]) {
226226
printf(FMT_KEY(METHOD) ": FastLog2 (order %d)\n\n",
227227
method - BENCHMARK_HIST_FASTLOG2_START);
228228
}
229-
else printf(FMT_KEY(METHOD) ": %s\n\n",
229+
else printf(FMT_KEY(METHOD)
230+
#if BENCHMARK_SIMD == BENCHMARK_SIMD_NONE
231+
": %s (no SIMD)\n\n",
232+
#elif BENCHMARK_SIMD == BENCHMARK_SIMD_AVX
233+
": %s (AVX)\n\n",
234+
#elif BENCHMARK_SIMD == BENCHMARK_SIMD_AVX2
235+
": %s (AVX2)\n\n",
236+
#elif BENCHMARK_SIMD == BENCHMARK_SIMD_AVX512
237+
": %s (AVX512)\n\n",
238+
#endif
230239
method >= 0 ? benchmark_method_name[method] : "hybrid lookup table");
231240

232241
/* Evaluate the distance histogram. */

doc/logo/plot_fcfc_logo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def export(self, filename, dpi=300):
159159
import matplotlib.patches as mpatches
160160

161161
# Create the figure.
162-
fig = plt.figure(figsize=(float(self.width)/dpi,float(self.height)/dpi), \
162+
plt.figure(figsize=(float(self.width)/dpi,float(self.height)/dpi), \
163163
dpi=dpi, frameon=False)
164164
ax = plt.axes([0,0,1,1])
165165
ax.set_axis_off()

options.mk

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
# * MPICC is used when WITH_MPI == T.
44
CC := gcc
55
MPICC := mpicc
6-
CFLAGS = -O3 -march=native -flto
6+
CFLAGS = -O3 -march=native
7+
8+
# LTO option. Recommended flags for different compilers:
9+
# gcc: -flto=auto, intel: -ipo, clang: -flto
10+
CFLAGS += -flto=auto
711

812
# Set `WITH_MPI := T` to enable MPI.
913
WITH_MPI :=

src/fcfc/2pt/build_tree.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,12 +264,17 @@ void *tree_create(const CONF *conf, CF *cf, const int idx
264264

265265
/* Process weights */
266266
if (conf->has_wt[idx]) {
267-
double sumw = 0;
267+
double sumw, sumw2;
268+
sumw = sumw2 = 0;
268269
#ifdef OMP
269-
#pragma omp parallel for reduction(+:sumw) default(none) shared(data)
270+
#pragma omp parallel for reduction(+:sumw,sumw2) default(none) shared(data)
270271
#endif
271-
for (size_t i = 0; i < data->n; i++) sumw += data->w[i];
272+
for (size_t i = 0; i < data->n; i++) {
273+
sumw += data->w[i];
274+
sumw2 += data->w[i] * data->w[i];
275+
}
272276
data->wt = sumw;
277+
data->w2 = sumw2;
273278
}
274279
else if (cf->cat_wt[idx]) {
275280
#if FCFC_SIMD == FCFC_SIMD_NONE
@@ -285,7 +290,7 @@ void *tree_create(const CONF *conf, CF *cf, const int idx
285290
#pragma omp parallel for default(none) shared(data)
286291
#endif
287292
for (size_t i = 0; i < data->n; i++) data->w[i] = 1;
288-
data->wt = (double) data->n;
293+
data->wt = data->w2 = (double) data->n;
289294
}
290295

291296
/* Construct the tree. */

src/fcfc/2pt/eval_cf.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,11 @@
1212

1313
#include "eval_cf.h"
1414
#include "count_func.h"
15-
#include "read_file.h"
1615
#include "read_res.h"
1716
#include "save_res.h"
1817
#include "legpoly.h"
1918
#include "build_tree.h"
2019
#include <stdlib.h>
21-
#include <math.h>
2220

2321
#ifdef OMP
2422
#include <omp.h>
@@ -125,7 +123,8 @@ static int eval_pairs(const CONF *conf, CF *cf
125123
/* Double auto pairs. */
126124
if (cf->wt[i]) {
127125
for (size_t k = 0; k < cf->ntot; k++) cf->cnt[i][k].d *= 2;
128-
cf->norm[i] = cf->data[cat[0]].wt * (cf->data[cat[0]].wt - 1);
126+
cf->norm[i] = cf->data[cat[0]].wt * cf->data[cat[0]].wt
127+
- cf->data[cat[0]].w2;
129128
}
130129
else {
131130
for (size_t k = 0; k < cf->ntot; k++) cf->cnt[i][k].i *= 2;

0 commit comments

Comments
 (0)