Skip to content

Commit

Permalink
linux/dax: add CXLDevice info attribute(s) in DAX and NUMA nodes
Browse files Browse the repository at this point in the history
There can be multiple entries if the region is interleaved.
Might be better to merge into a single info attr? We'll see.

This uses "memregion" identifiers (regionX) to match dax devices
and CXL devices.

The corresponding Linux code (CXL volatile regions) is planned for Linux 6.3.

Refs open-mpi#554

Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
  • Loading branch information
bgoglin committed Feb 20, 2023
1 parent 2cb74cb commit 5d0f33f
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 2 deletions.
5 changes: 5 additions & 0 deletions doc/hwloc.doxy
Expand Up @@ -2123,6 +2123,11 @@ and GID #1 of port #3.
These info attributes are attached to objects specified in parentheses.

<dl>
<dt>CXLDevice (NUMA Nodes or DAX Memory OS devices)</dt>
<dd>The PCI/CXL bus ID of a device whose CXL Type-3 memory is exposed here.
There may be multiple instances of this attributes if multiple device memories
are interleaved.
</dd>
<dt>DAXDevice (NUMA Nodes)</dt>
<dd>The name of the Linux DAX device that was used to expose a non-volatile
memory region as a volatile NUMA node.
Expand Down
77 changes: 75 additions & 2 deletions hwloc/topology-linux.c
Expand Up @@ -3689,6 +3689,69 @@ read_node_mscaches(struct hwloc_topology *topology,
return 0;
}

static int
annotate_cxl_dax(hwloc_obj_t obj, const char *regionname, int root_fd)
{
char path[300];
unsigned i;

for(i=0; ; i++) {
char decoder[20]; /* "decoderX.Y" */
char decoderpath[256], *endpoint;
char uportpath[256], *pcirootbus, *pcibdf;
unsigned pcidomain, pcibus, pcidevice, pcifunc;
char *slash, *end;
int err;

/* read the i-th decoder name from file target<i> */
snprintf(path, sizeof(path), "/sys/bus/cxl/devices/%s/target%u", regionname, i);
if (hwloc_read_path_by_length(path, decoder, sizeof(decoder), root_fd) < 0)
break;
end = strchr(decoder, '\n');
if (end)
*end = '\0';
hwloc_debug("hwloc/dax/cxl: found decoder `%s' for region `%s' target#%u\n", decoder, regionname, i);

/* get the endpoint symlink which ends with "/portT/endpointX/decoderY.X/" */
snprintf(path, sizeof(path), "/sys/bus/cxl/devices/%s", decoder);
err = hwloc_readlink(path, decoderpath, sizeof(decoderpath), root_fd);
if (err < 0)
break;
endpoint = strstr(decoderpath, "endpoint");
if (!endpoint)
break;
slash = strchr(endpoint, '/');
if (!slash)
break;
*slash = '\0';
hwloc_debug("hwloc/dax/cxl: found endpoint `%s'\n", endpoint);

/* get the PCI in the endpointX/uport symlink "../../../pci<busid>/<BDFs>../memX" */
snprintf(path, sizeof(path), "/sys/bus/cxl/devices/%s/uport", endpoint);
err = hwloc_readlink(path, uportpath, sizeof(uportpath), root_fd);
if (err < 0)
break;
hwloc_debug("hwloc/dax/cxl: lookind for BDF at the end of uport `%s'\n", uportpath);
pcirootbus = strstr(uportpath, "/pci");
if (!pcirootbus)
break;
slash = pcirootbus + 11; /* "/pciXXXX:YY/" */
if (*slash != '/')
break;
pcibdf = NULL;
while (sscanf(slash, "/%x:%x:%x.%x/", &pcidomain, &pcibus, &pcidevice, &pcifunc) == 4) {
pcibdf = slash+1;
slash += 13;
}
*slash = '\0';
if (pcibdf) {
hwloc_obj_add_info(obj, "CXLDevice", pcibdf);
}
}

return 0;
}

static int
dax_is_kmem(const char *name, int fsroot_fd)
{
Expand Down Expand Up @@ -3744,6 +3807,16 @@ annotate_dax_parent(hwloc_obj_t obj, const char *name, int fsroot_fd)

hwloc_obj_add_info(obj, "DAXParent", begin);

/* check if the region comes from CXL */
begin = strstr(begin, "/region");
if (begin) {
begin++;
end = strchr(begin, '/');
if (end)
*end = '\0';
annotate_cxl_dax(obj, begin, fsroot_fd);
}

/*
* Note:
* "ndbus" or "ndctl" in the path should be enough since these are specifically for NVDIMMs.
Expand Down Expand Up @@ -6641,12 +6714,12 @@ hwloc_linuxfs_lookup_cxlmem(struct hwloc_backend *backend, unsigned osdev_flags)
hwloc_obj_t obj, parent;

if (strncmp(dirent->d_name, "mem", 3))
continue;
continue;

snprintf(path, sizeof(path), "/sys/bus/cxl/devices/%s", dirent->d_name);
parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, osdev_flags | HWLOC_LINUXFS_OSDEV_FLAG_UNDER_BUS | HWLOC_LINUXFS_OSDEV_FLAG_USE_PARENT_ATTRS);
if (!parent)
continue;
continue;

obj = hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_MEMORY, dirent->d_name);

Expand Down

0 comments on commit 5d0f33f

Please sign in to comment.