Skip to content

Commit

Permalink
Extend gpu profiling support for rocm
Browse files Browse the repository at this point in the history
  • Loading branch information
eromero-vlc committed Dec 5, 2023
1 parent f693af4 commit f08356a
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
3 changes: 3 additions & 0 deletions Link_flags
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ ifeq ($(origin LIBS), undefined)
endif
ifeq ($(PRIMME_WITH_HIPBLAS),yes)
LIBS += -L$(ROCMDIR)/lib -lhipsparse -lhipblas -lamdhip64
ifeq ($(PRIMME_WITH_GPU_PROFILING), yes)
LIBS += -lroctx64
endif
endif
ifndef MKLROOT
LIBS += -llapack -lblas -lm
Expand Down
7 changes: 5 additions & 2 deletions Make_flags
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,22 @@ CLANG ?= clang
PRIMME_WITH_MAGMA ?= $(if $(findstring undefined,$(origin MAGMADIR)),no,yes)
# CUDADIR ?= /usr/local/cuda
PRIMME_WITH_CUDA ?= $(if $(findstring undefined,$(origin CUDADIR)),no,yes)
# Set PRIMME_WITH_GPU_PROFILING to yes to track GPU kernels with the invoking PRIMME function
PRIMME_WITH_GPU_PROFILING ?= no
ifeq ($(PRIMME_WITH_CUDA), yes)
ifeq ($(PRIMME_WITH_MAGMA), yes)
override CFLAGS += -I$(MAGMADIR)/include -DPRIMME_WITH_MAGMA
endif
override CFLAGS += -I$(CUDADIR)/include -DPRIMME_WITH_CUBLAS
# Uncomment the next line to mark CUDA kernels with the invoking PRIMME function
# override CFLAGS += -DPRIMME_PROFILE_NV
endif
# ROCMDIR ?= $(shell hipconfig -R)
PRIMME_WITH_HIPBLAS ?= $(if $(findstring undefined,$(origin ROCMDIR)),no,yes)
ifeq ($(PRIMME_WITH_HIPBLAS), yes)
override CFLAGS += $(shell hipconfig -C) -I$(ROCMDIR)/include -DPRIMME_WITH_HIPBLAS
endif
ifeq ($(PRIMME_WITH_GPU_PROFILING), yes)
override CFLAGS += -DPRIMME_PROFILE_NV
endif

#---------------------------------------------------------------
# IBM architectures
Expand Down
21 changes: 21 additions & 0 deletions src/include/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,9 +403,30 @@ static inline const char *__compose_function_name(const char *path,
#endif

#ifdef PRIMME_PROFILE_NV
#ifdef PRIMME_WITH_CUDA
#include <nvToolsExt.h>
#define PROFILE_NV_BEGIN(CALL) nvtxRangePush(CALL);
#define PROFILE_NV_END nvtxRangePop();
#elif defined(PRIMME_WITH_HIPBLAS)
#include <roctracer/roctx.h>
#define PROFILE_NV_BEGIN(CALL) \
{ \
/* Limit the call to 100 characteres and remove " (brakes json) */ \
char _aux_str__[100]; \
const char *_str = (CALL); \
for (int i = 0, j = 0; i < 100; ++i) { \
if (_str[i] == 0 || i == 99) { \
_aux_str__[j] = 0; \
break; \
} else if (_str[i] != '"') \
_aux_str__[j++] = _str[i]; \
} \
roctxRangePush(_aux_str__); \
}
#define PROFILE_NV_END roctxRangePop();
#else
#error "Unsupported GPU system for profiling"
#endif
#else
#define PROFILE_NV_BEGIN(CALL)
#define PROFILE_NV_END
Expand Down

0 comments on commit f08356a

Please sign in to comment.