Skip to content

Commit

Permalink
Merge pull request #37187 from mantidproject/37013-new-idaaas-data-ca…
Browse files Browse the repository at this point in the history
…che-index

New IDAaaS data cache index file  loading
  • Loading branch information
robertapplin committed May 10, 2024
2 parents b837b6b + 593879d commit 240c7a6
Show file tree
Hide file tree
Showing 14 changed files with 457 additions and 18 deletions.
3 changes: 3 additions & 0 deletions Framework/API/CMakeLists.txt
Expand Up @@ -80,6 +80,7 @@ set(SRC_FILES
src/IPeakFunction.cpp
src/IPeaksWorkspace.cpp
src/IPowderDiffPeakFunction.cpp
src/ISISInstrumentDataCache.cpp
src/ISpectrum.cpp
src/ITableWorkspace.cpp
src/ImmutableCompositeFunction.cpp
Expand Down Expand Up @@ -263,6 +264,7 @@ set(INC_FILES
inc/MantidAPI/IPeaksWorkspace.h
inc/MantidAPI/IPeaksWorkspace_fwd.h
inc/MantidAPI/IPowderDiffPeakFunction.h
inc/MantidAPI/ISISInstrumentDataCache.h
inc/MantidAPI/IPreview.h
inc/MantidAPI/ISpectrum.h
inc/MantidAPI/ISplittersWorkspace.h
Expand Down Expand Up @@ -423,6 +425,7 @@ set(TEST_FILES
ILatticeFunctionTest.h
IMDWorkspaceTest.h
IPreviewTest.h
ISISInstrumentDataCacheTest.h
ISpectrumTest.h
ImmutableCompositeFunctionTest.h
ImplicitFunctionParserFactoryTest.h
Expand Down
9 changes: 6 additions & 3 deletions Framework/API/inc/MantidAPI/FileFinder.h
Expand Up @@ -36,6 +36,7 @@ number
class MANTID_API_DLL FileFinderImpl {
public:
std::string getFullPath(const std::string &filename, const bool ignoreDirs = false) const;
std::string extractAllowedSuffix(std::string &userString) const;
const API::Result<std::string> getPath(const std::vector<IArchiveSearch_sptr> &archs,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const;
Expand All @@ -49,10 +50,11 @@ class MANTID_API_DLL FileFinderImpl {
std::vector<std::string> findRuns(const std::string &hintstr, const std::vector<std::string> &exts = {},
const bool useExtsOnly = false) const;
/// DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
const Kernel::InstrumentInfo getInstrument(const std::string &hint) const;
const Kernel::InstrumentInfo getInstrument(const std::string &hint, const bool returnDefaultIfNotFound = true) const;
/// DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
std::string getExtension(const std::string &filename, const std::vector<std::string> &exts) const;
void getUniqueExtensions(const std::vector<std::string> &extensionsToAdd, std::vector<std::string> &uniqueExts) const;
std::pair<std::string, std::string> toInstrumentAndNumber(const std::string &hint) const;

private:
friend struct Mantid::Kernel::CreateUsingNew<FileFinderImpl>;
Expand All @@ -67,8 +69,9 @@ class MANTID_API_DLL FileFinderImpl {
FileFinderImpl &operator=(const FileFinderImpl &);
/// A method that returns error messages if the provided runs are invalid
std::string validateRuns(const std::string &searchText) const;
std::string extractAllowedSuffix(std::string &userString) const;
std::pair<std::string, std::string> toInstrumentAndNumber(const std::string &hint) const;
const API::Result<std::string> getISISInstrumentDataCachePath(const std::string &cachePathToSearch,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const;
const API::Result<std::string> getArchivePath(const std::vector<IArchiveSearch_sptr> &archs,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const;
Expand Down
29 changes: 29 additions & 0 deletions Framework/API/inc/MantidAPI/ISISInstrumentDataCache.h
@@ -0,0 +1,29 @@
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright &copy; 2024 ISIS Rutherford Appleton Laboratory UKRI,
// NScD Oak Ridge National Laboratory, European Spallation Source,
// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
// SPDX - License - Identifier: GPL - 3.0 +
#pragma once

//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "MantidAPI/DllConfig.h"
#include <string>

namespace Mantid {
namespace API {

class MANTID_API_DLL ISISInstrumentDataCache {
public:
ISISInstrumentDataCache(const std::string &path) : m_dataCachePath(path) {}
std::string getFileParentDirectoryPath(const std::string &filename) const;

private:
std::pair<std::string, std::string> validateInstrumentAndNumber(const std::string &filename) const;
std::pair<std::string, std::string> splitIntoInstrumentAndNumber(const std::string &filename) const;
std::string m_dataCachePath;
};
} // namespace API
} // namespace Mantid
87 changes: 78 additions & 9 deletions Framework/API/src/FileFinder.cpp
Expand Up @@ -11,6 +11,7 @@
#include "MantidAPI/ArchiveSearchFactory.h"
#include "MantidAPI/FrameworkManager.h"
#include "MantidAPI/IArchiveSearch.h"
#include "MantidAPI/ISISInstrumentDataCache.h"
#include "MantidKernel/ConfigService.h"
#include "MantidKernel/Exception.h"
#include "MantidKernel/FacilityInfo.h"
Expand All @@ -30,6 +31,9 @@

#include <boost/algorithm/string.hpp>

#include <filesystem>
#include <json/value.h>

namespace {
/// static logger object
Mantid::Kernel::Logger g_log("FileFinder");
Expand Down Expand Up @@ -132,9 +136,11 @@ std::string FileFinderImpl::extractAllowedSuffix(std::string &userString) const
* Return the InstrumentInfo as determined from the hint.
*
* @param hint :: The name hint.
* @param returnDefaultIfNotFound :: Flag to control return. May throw exception if set to false.
* @return This will return the default instrument if it cannot be determined.
*/
const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint) const {
const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint,
const bool returnDefaultIfNotFound) const {
if ((!hint.empty()) && (!isdigit(hint[0]))) {
string instrName(hint);
Poco::Path path(instrName);
Expand All @@ -158,22 +164,25 @@ const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint) c
{
const auto it = std::find_if(instrName.begin(), instrName.end(), isdigit);
const auto nChars = std::distance(instrName.begin(), it);
instrName = instrName.substr(0, nChars);
instrName.resize(nChars);
}

// go backwards looking for the instrument name to end - gets around
// delimiters
if (!instrName.empty()) {
const auto it = std::find_if(instrName.rbegin(), instrName.rend(), isalpha);
const auto nChars = std::distance(it, instrName.rend());
instrName = instrName.substr(0, nChars);
instrName.resize(nChars);
}
}
try {
const Kernel::InstrumentInfo instrument = Kernel::ConfigService::Instance().getInstrument(instrName);
return instrument;
} catch (Kernel::Exception::NotFoundError &e) {
g_log.debug() << e.what() << "\n";
if (!returnDefaultIfNotFound) {
throw e;
}
}
}
return Kernel::ConfigService::Instance().getInstrument();
Expand All @@ -185,7 +194,7 @@ const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint) c
* @return A pair of instrument name and run number
*/
std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const std::string &hint) const {
// g_log.debug() << "toInstrumentAndNumber(" << hint << ")\n";
g_log.debug() << "toInstrumentAndNumber(" << hint << ")\n";
std::string instrPart;
std::string runPart;

Expand Down Expand Up @@ -254,8 +263,6 @@ std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const
* too long
*/
std::string FileFinderImpl::makeFileName(const std::string &hint, const Kernel::InstrumentInfo &instrument) const {
// g_log.debug() << "makeFileName(" << hint << ", " << instrument.shortName()
// << ")\n";
if (hint.empty())
return "";

Expand Down Expand Up @@ -305,7 +312,7 @@ std::string FileFinderImpl::getExtension(const std::string &filename, const std:
std::string extension = toUpper(ext);
if (extension.rfind('*') == extension.size() - 1) // there is a wildcard at play
{
extension = extension.substr(0, extension.rfind('*'));
extension.resize(extension.rfind('*'));
}

std::size_t found = toUpper(filename).rfind(extension);
Expand Down Expand Up @@ -645,6 +652,52 @@ std::vector<std::string> FileFinderImpl::findRuns(const std::string &hintstr, co
return res;
}

const API::Result<std::string>
FileFinderImpl::getISISInstrumentDataCachePath(const std::string &cachePathToSearch,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const {
std::string errors;
auto dataCache = API::ISISInstrumentDataCache(cachePathToSearch);

for (const auto &filename : filenames) {

std::string parentDirPath;

try {
parentDirPath = dataCache.getFileParentDirectoryPath(filename);

} catch (const std::invalid_argument &e) {
errors += "Data cache: " + std::string(e.what());
return API::Result<std::string>("", errors);

} catch (const Json::Exception &e) {
errors += "Data cache: Failed parsing to JSON: " + std::string(e.what()) +
"Error likely due to accessing instrument index file while it was being updated on IDAaaS.";
return API::Result<std::string>("", errors);
}

if (!std::filesystem::exists(parentDirPath)) {
errors += "Data cache: Directory not found: " + parentDirPath;
return API::Result<std::string>("", errors);
}

for (const auto &ext : exts) {
std::filesystem::path filePath(parentDirPath + '/' + filename + ext);

try { // Catches error for permission denied
if (std::filesystem::exists(filePath)) {
return API::Result<std::string>(filePath.string());
}
} catch (const std::filesystem::filesystem_error &e) {
errors += "Data cache: " + std::string(e.what());
return API::Result<std::string>("", errors);
}
}
errors += "Data cache: " + filename + " not found in " + parentDirPath;
}
return API::Result<std::string>("", errors);
}

/**
* Return the path to the file found in archive
* @param archs :: A list of archives to search
Expand Down Expand Up @@ -741,8 +794,25 @@ const API::Result<std::string> FileFinderImpl::getPath(const std::vector<IArchiv
}
}

// Search data cache
string errors;
std::filesystem::path cachePathToSearch(Kernel::ConfigService::Instance().getString("datacachesearch.directory"));
// Only expect to find path to data cache on IDAaaS
if (std::filesystem::exists(cachePathToSearch)) {

API::Result<std::string> cacheFilePath =
getISISInstrumentDataCachePath(cachePathToSearch.string(), filenames, exts);

if (cacheFilePath) {
return cacheFilePath;
}
errors += cacheFilePath.errors();

} else {
errors += "Could not find data cache directory: " + cachePathToSearch.string();
}

// Search the archive
string errors = "";
if (!archs.empty()) {
g_log.debug() << "Search the archives\n";
const auto archivePath = getArchivePath(archs, filenames, exts);
Expand All @@ -758,7 +828,6 @@ const API::Result<std::string> FileFinderImpl::getPath(const std::vector<IArchiv
errors += archivePath.errors();

} // archs

return API::Result<std::string>("", errors);
}

Expand Down
81 changes: 81 additions & 0 deletions Framework/API/src/ISISInstrumentDataCache.cpp
@@ -0,0 +1,81 @@
#include "MantidAPI/ISISInstrumentDataCache.h"
#include "MantidAPI/FileFinder.h"
#include "MantidKernel/ConfigService.h"
#include "MantidKernel/Exception.h"
#include "MantidKernel/InstrumentInfo.h"
#include "MantidKernel/Logger.h"
#include <fstream>
#include <json/reader.h>

namespace {
Mantid::Kernel::Logger g_log("ISISInstrumentDataCache");
} // namespace

std::string Mantid::API::ISISInstrumentDataCache::getFileParentDirectoryPath(const std::string &fileName) const {
g_log.debug() << "ISISInstrumentDataCache::getFileParentDirectoryPath(" << fileName << ")" << std::endl;

auto [instrName, runNumber] = validateInstrumentAndNumber(fileName);

// Open index json file
std::string jsonPath = m_dataCachePath + "/" + instrName + "/" + instrName + "_index.json";
std::ifstream ifstrm{jsonPath};
if (!ifstrm) {
throw std::invalid_argument("Could not open index file: " + jsonPath);
}

// Read directory path from json file
Json::Value json;
ifstrm >> json;
std::string relativePath = json[runNumber].asString();

if (relativePath.empty()) {
throw std::invalid_argument("Run number " + runNumber + " not found for instrument " + instrName + ".");
}

std::string dirPath = m_dataCachePath + "/" + instrName + "/" + relativePath;

g_log.debug() << "Opened instrument index file: " << jsonPath << ". Found path to search: " << dirPath << "."
<< std::endl;
return dirPath;
}

std::pair<std::string, std::string>
Mantid::API::ISISInstrumentDataCache::validateInstrumentAndNumber(const std::string &fileName) const {

// Check if suffix eg. -add is present in filename
std::string fileNameCopy = fileName;
std::string suffix = FileFinder::Instance().extractAllowedSuffix(fileNameCopy);
if (!suffix.empty()) {
throw std::invalid_argument("Unsuported format: Suffix detected: " + suffix);
}

auto [instrName, runNumber] = splitIntoInstrumentAndNumber(fileName);

if (runNumber.empty() || !std::all_of(runNumber.begin(), runNumber.end(), ::isdigit)) { // Check run number
throw std::invalid_argument("Filename not in correct format.");
}
runNumber.erase(0, runNumber.find_first_not_of('0')); // Remove padding zeros

try { // Expand instrument name
instrName = FileFinder::Instance().getInstrument(instrName, false).name();
} catch (const Kernel::Exception::NotFoundError &) {
throw std::invalid_argument("Instrument name not recognized.");
}

return std::pair(instrName, runNumber);
}

std::pair<std::string, std::string>
Mantid::API::ISISInstrumentDataCache::splitIntoInstrumentAndNumber(const std::string &fileName) const {

// Find the last non-digit as the instrument name can contain numbers
const auto itRev = std::find_if(fileName.rbegin(), fileName.rend(), std::not_fn(isdigit));
const auto nChars = std::distance(itRev, fileName.rend());
std::string runNumber = fileName.substr(nChars);

std::string fileNameUpperCase = fileName;
std::transform(fileNameUpperCase.begin(), fileNameUpperCase.end(), fileNameUpperCase.begin(), toupper);
std::string instrName = fileNameUpperCase.substr(0, nChars);

return std::pair(instrName, runNumber);
}

0 comments on commit 240c7a6

Please sign in to comment.