Skip to content

Commit

Permalink
Add stream operations to accelerator components
Browse files Browse the repository at this point in the history
- Stream-based alloc and free
- Stream-based memmove
- Wait for stream to complete

Also, enable querying for number of devices and memory bandwidth.
These operations are needed for operation device offloading.

Co-authored-by: Phuong Nguyen <phuong.nguyen@icl.utk.edu>
Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
  • Loading branch information
devreal and Phuong Nguyen committed Feb 25, 2024
1 parent 980eb50 commit 1c4d11b
Show file tree
Hide file tree
Showing 9 changed files with 747 additions and 94 deletions.
106 changes: 105 additions & 1 deletion opal/mca/accelerator/accelerator.h
Expand Up @@ -5,6 +5,9 @@
* Copyright (c) Amazon.com, Inc. or its affiliates.
* All Rights reserved.
* Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
* Copyright (c) 2024 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -193,7 +196,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
* @param[IN] dev_id Associated device for the event or
* MCA_ACCELERATOR_NO_DEVICE_ID
* @param[OUT] event Event to create
* @param[IN] enable_ipc support inter-process tracking of the event
* @param[IN] enable_ipc support inter-process tracking of the event
*
* @return OPAL_SUCCESS or error status on failure.
*/
Expand Down Expand Up @@ -310,6 +313,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
opal_accelerator_transfer_type_t type);


/**
* Copies memory asynchronously from src to dest. Memory of dest and src
* may overlap. Optionally can specify the transfer type to
* avoid pointer detection for performance. The operations will be enqueued
* into the provided stream but are not guaranteed to be complete upon return.
*
* @param[IN] dest_dev_id Associated device to copy to or
* MCA_ACCELERATOR_NO_DEVICE_ID
* @param[IN] src_dev_id Associated device to copy from or
* MCA_ACCELERATOR_NO_DEVICE_ID
* @param[IN] dest Destination to copy memory to
* @param[IN] src Source to copy memory from
* @param[IN] size Size of memory to copy
* @param[IN] stream Stream to perform asynchronous move on
* @param[IN] type Transfer type field for performance
* Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
* if caller is unsure of the transfer direction.
*
* @return OPAL_SUCCESS or error status on failure
*/
typedef int (*opal_accelerator_base_module_memmove_async_fn_t)(
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
opal_accelerator_stream_t *stream, opal_accelerator_transfer_type_t type);

/**
* Allocates size bytes memory from the device and sets ptr to the
* pointer of the allocated memory. The memory is not initialized.
Expand Down Expand Up @@ -340,6 +368,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
typedef int (*opal_accelerator_base_module_mem_release_fn_t)(
int dev_id, void *ptr);


/**
* Allocates size bytes memory from the device and sets ptr to the
* pointer of the allocated memory. The memory is not initialized.
* The allocation request is placed into the stream object.
* Any use of the memory must succeed the completion of this
* operation on the stream.
*
* @param[IN] dev_id Associated device for the allocation or
* MCA_ACCELERATOR_NO_DEVICE_ID
* @param[OUT] ptr Returns pointer to allocated memory
* @param[IN] size Size of memory to allocate
* @param[IN] stream Stream into which to insert the allocation request
*
* @return OPAL_SUCCESS or error status on failure
*/
typedef int (*opal_accelerator_base_module_mem_alloc_stream_fn_t)(
int dev_id, void **ptr, size_t size, opal_accelerator_stream_t *stream);

/**
* Frees the memory space pointed to by ptr which has been returned by
* a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
* If the function is called on a ptr that has already been freed,
* undefined behavior occurs. If ptr is NULL, no operation is performed,
* and the function returns OPAL_SUCCESS.
* The release of the memory will be inserted into the stream and occurs after
* all previous operations have completed.
*
* @param[IN] dev_id Associated device for the allocation or
* MCA_ACCELERATOR_NO_DEVICE_ID
* @param[IN] ptr Pointer to free
* @param[IN] stream Stream into which to insert the free operation
*
* @return OPAL_SUCCESS or error status on failure
*/
typedef int (*opal_accelerator_base_module_mem_release_stream_fn_t)(
int dev_id, void *ptr, opal_accelerator_stream_t *stream);



/**
* Retrieves the base address and/or size of a memory allocation of the
* device.
Expand Down Expand Up @@ -557,6 +625,35 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
typedef int (*opal_accelerator_base_module_get_buffer_id_fn_t)(
int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);

/**
* Wait for the completion of all operations inserted into the stream.
*
* @param[IN] stram The stream to wait for.
*
* @return OPAL_SUCCESS or error status on failure
*/
typedef int (*opal_accelerator_base_module_wait_stream_fn_t)(opal_accelerator_stream_t *stream);

/**
* Get the number of devices available.
*
* @param[OUT] stram Number of devices.
*
* @return OPAL_SUCCESS or error status on failure
*/
typedef int (*opal_accelerator_base_module_get_num_devices_fn_t)(int *num_devices);

/**
* Get the memory bandwidth of the device.
*
* @param[IN] device The device to query.
* @param[OUT] bw The returned bandwidth for the device.
*
* @return OPAL_SUCCESS or error status on failure
*/
typedef int (*opal_accelerator_base_module_get_mem_bw_fn_t)(int device, float *bw);


/*
* the standard public API data structure
*/
Expand All @@ -572,10 +669,13 @@ typedef struct {

opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async;
opal_accelerator_base_module_memcpy_fn_t mem_copy;
opal_accelerator_base_module_memmove_async_fn_t mem_move_async;
opal_accelerator_base_module_memmove_fn_t mem_move;

opal_accelerator_base_module_mem_alloc_fn_t mem_alloc;
opal_accelerator_base_module_mem_release_fn_t mem_release;
opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream;
opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream;
opal_accelerator_base_module_get_address_range_fn_t get_address_range;

opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled;
Expand All @@ -595,6 +695,10 @@ typedef struct {
opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer;

opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id;

opal_accelerator_base_module_wait_stream_fn_t wait_stream;
opal_accelerator_base_module_get_num_devices_fn_t num_devices;
opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw;
} opal_accelerator_base_module_t;

/**
Expand Down

0 comments on commit 1c4d11b

Please sign in to comment.