Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New IDAaaS data cache index file loading #37187

Merged
merged 40 commits into from May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
6525f4e
Simple barebones for searching new index files
GuiMacielPereira Mar 28, 2024
7ea885b
Added setUp and tearDown to tests.
GuiMacielPereira Mar 28, 2024
15b0e4c
Changed implementation from DataHandling to API
GuiMacielPereira Mar 28, 2024
d957d59
Rough sketch of implementation after looking for data archive
GuiMacielPereira Mar 28, 2024
51709aa
Add cache search to the file finder.
GuiMacielPereira Apr 2, 2024
3eb656a
Changing function to handle expanded instr dirs
GuiMacielPereira Apr 3, 2024
f6fbef1
Add functunality to getInstrument() function
GuiMacielPereira Apr 4, 2024
d314e24
Added test for delimiter in filename
GuiMacielPereira Apr 4, 2024
1bfc329
Added handling of bad inputs
GuiMacielPereira Apr 4, 2024
363af4c
Added unit tests to FileFinder
GuiMacielPereira Apr 9, 2024
2a52597
Added exception for missing read permissions
GuiMacielPereira Apr 18, 2024
d14df70
Added unit test for folder without permissions
GuiMacielPereira Apr 19, 2024
1a189a0
Removed unecessary changes
GuiMacielPereira Apr 19, 2024
9bbac0c
Added documentation for new mantid property
GuiMacielPereira Apr 19, 2024
914dbaa
Fixed cppcheck warnings
GuiMacielPereira Apr 22, 2024
39c4f3f
Cleaned exceptions catching
GuiMacielPereira Apr 23, 2024
ba4a50d
Fixed failing unit test
GuiMacielPereira Apr 23, 2024
209676e
Update Framework/API/inc/MantidAPI/ISISInstrDataCache.h
GuiMacielPereira Apr 24, 2024
78359ae
Change argument flag in getInstrument
GuiMacielPereira Apr 24, 2024
8ca5dbc
Updated doc string
GuiMacielPereira Apr 24, 2024
f826c08
Removed unused includes
GuiMacielPereira Apr 24, 2024
210e7f7
Removed another unused include
GuiMacielPereira Apr 24, 2024
d5cb094
Made error message clearer
GuiMacielPereira Apr 24, 2024
d710d64
Changed to const auto types
GuiMacielPereira Apr 24, 2024
024bff9
Split big function into smaller functions
GuiMacielPereira Apr 24, 2024
542df97
Removed usused old setUp()
GuiMacielPereira Apr 24, 2024
3c375a8
Added comment
GuiMacielPereira Apr 24, 2024
d3ac714
Reformatted strings and removed unecessary ;
GuiMacielPereira Apr 24, 2024
ca07ee3
Included comment for clarification
GuiMacielPereira Apr 24, 2024
346cd6c
Renamed classes and files to more explicit descriptions
GuiMacielPereira Apr 24, 2024
47319fc
Fixed cppcheck and added unit tests
GuiMacielPereira Apr 25, 2024
f09f326
Added release note (+ section for Data Handling)
GuiMacielPereira Apr 29, 2024
c5d73a2
Cleaned error handling for permission denied
GuiMacielPereira May 9, 2024
46101db
Added catch for error from parsing json
GuiMacielPereira May 9, 2024
1d03a2e
Fixed include error + cppcheck supressions
GuiMacielPereira May 9, 2024
3ca6d6d
Corrected unit test
GuiMacielPereira May 9, 2024
5046005
Removed unused includes + added const to methods
GuiMacielPereira May 10, 2024
8a4ed15
Pass bool by value
GuiMacielPereira May 10, 2024
4869c7a
Fix Cppcheck
GuiMacielPereira May 10, 2024
593879d
Passed string by const & + added unit test
GuiMacielPereira May 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions Framework/API/CMakeLists.txt
Expand Up @@ -80,6 +80,7 @@ set(SRC_FILES
src/IPeakFunction.cpp
src/IPeaksWorkspace.cpp
src/IPowderDiffPeakFunction.cpp
src/ISISInstrumentDataCache.cpp
src/ISpectrum.cpp
src/ITableWorkspace.cpp
src/ImmutableCompositeFunction.cpp
Expand Down Expand Up @@ -263,6 +264,7 @@ set(INC_FILES
inc/MantidAPI/IPeaksWorkspace.h
inc/MantidAPI/IPeaksWorkspace_fwd.h
inc/MantidAPI/IPowderDiffPeakFunction.h
inc/MantidAPI/ISISInstrumentDataCache.h
inc/MantidAPI/IPreview.h
inc/MantidAPI/ISpectrum.h
inc/MantidAPI/ISplittersWorkspace.h
Expand Down Expand Up @@ -423,6 +425,7 @@ set(TEST_FILES
ILatticeFunctionTest.h
IMDWorkspaceTest.h
IPreviewTest.h
ISISInstrumentDataCacheTest.h
ISpectrumTest.h
ImmutableCompositeFunctionTest.h
ImplicitFunctionParserFactoryTest.h
Expand Down
9 changes: 6 additions & 3 deletions Framework/API/inc/MantidAPI/FileFinder.h
Expand Up @@ -36,6 +36,7 @@ number
class MANTID_API_DLL FileFinderImpl {
public:
std::string getFullPath(const std::string &filename, const bool ignoreDirs = false) const;
std::string extractAllowedSuffix(std::string &userString) const;
const API::Result<std::string> getPath(const std::vector<IArchiveSearch_sptr> &archs,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const;
Expand All @@ -49,10 +50,11 @@ class MANTID_API_DLL FileFinderImpl {
std::vector<std::string> findRuns(const std::string &hintstr, const std::vector<std::string> &exts = {},
const bool useExtsOnly = false) const;
/// DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
const Kernel::InstrumentInfo getInstrument(const std::string &hint) const;
const Kernel::InstrumentInfo getInstrument(const std::string &hint, const bool returnDefaultIfNotFound = true) const;
/// DO NOT USE! MADE PUBLIC FOR TESTING ONLY.
std::string getExtension(const std::string &filename, const std::vector<std::string> &exts) const;
void getUniqueExtensions(const std::vector<std::string> &extensionsToAdd, std::vector<std::string> &uniqueExts) const;
std::pair<std::string, std::string> toInstrumentAndNumber(const std::string &hint) const;

private:
friend struct Mantid::Kernel::CreateUsingNew<FileFinderImpl>;
Expand All @@ -67,8 +69,9 @@ class MANTID_API_DLL FileFinderImpl {
FileFinderImpl &operator=(const FileFinderImpl &);
/// A method that returns error messages if the provided runs are invalid
std::string validateRuns(const std::string &searchText) const;
std::string extractAllowedSuffix(std::string &userString) const;
std::pair<std::string, std::string> toInstrumentAndNumber(const std::string &hint) const;
const API::Result<std::string> getISISInstrumentDataCachePath(const std::string &cachePathToSearch,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const;
const API::Result<std::string> getArchivePath(const std::vector<IArchiveSearch_sptr> &archs,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const;
Expand Down
29 changes: 29 additions & 0 deletions Framework/API/inc/MantidAPI/ISISInstrumentDataCache.h
@@ -0,0 +1,29 @@
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright &copy; 2024 ISIS Rutherford Appleton Laboratory UKRI,
// NScD Oak Ridge National Laboratory, European Spallation Source,
// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
// SPDX - License - Identifier: GPL - 3.0 +
#pragma once

//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "MantidAPI/DllConfig.h"
#include <string>

namespace Mantid {
namespace API {

class MANTID_API_DLL ISISInstrumentDataCache {
public:
ISISInstrumentDataCache(const std::string &path) : m_dataCachePath(path) {}
std::string getFileParentDirectoryPath(const std::string &filename) const;

private:
std::pair<std::string, std::string> validateInstrumentAndNumber(const std::string &filename) const;
std::pair<std::string, std::string> splitIntoInstrumentAndNumber(const std::string &filename) const;
std::string m_dataCachePath;
};
} // namespace API
} // namespace Mantid
87 changes: 78 additions & 9 deletions Framework/API/src/FileFinder.cpp
Expand Up @@ -11,6 +11,7 @@
#include "MantidAPI/ArchiveSearchFactory.h"
#include "MantidAPI/FrameworkManager.h"
#include "MantidAPI/IArchiveSearch.h"
#include "MantidAPI/ISISInstrumentDataCache.h"
#include "MantidKernel/ConfigService.h"
#include "MantidKernel/Exception.h"
#include "MantidKernel/FacilityInfo.h"
Expand All @@ -30,6 +31,9 @@

#include <boost/algorithm/string.hpp>

#include <filesystem>
#include <json/value.h>

namespace {
/// static logger object
Mantid::Kernel::Logger g_log("FileFinder");
Expand Down Expand Up @@ -132,9 +136,11 @@ std::string FileFinderImpl::extractAllowedSuffix(std::string &userString) const
* Return the InstrumentInfo as determined from the hint.
*
* @param hint :: The name hint.
* @param returnDefaultIfNotFound :: Flag to control return. May throw exception if set to false.
* @return This will return the default instrument if it cannot be determined.
*/
const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint) const {
const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint,
const bool returnDefaultIfNotFound) const {
if ((!hint.empty()) && (!isdigit(hint[0]))) {
string instrName(hint);
Poco::Path path(instrName);
Expand All @@ -158,22 +164,25 @@ const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint) c
{
const auto it = std::find_if(instrName.begin(), instrName.end(), isdigit);
const auto nChars = std::distance(instrName.begin(), it);
instrName = instrName.substr(0, nChars);
instrName.resize(nChars);
}

// go backwards looking for the instrument name to end - gets around
// delimiters
if (!instrName.empty()) {
const auto it = std::find_if(instrName.rbegin(), instrName.rend(), isalpha);
const auto nChars = std::distance(it, instrName.rend());
instrName = instrName.substr(0, nChars);
instrName.resize(nChars);
}
}
try {
const Kernel::InstrumentInfo instrument = Kernel::ConfigService::Instance().getInstrument(instrName);
return instrument;
} catch (Kernel::Exception::NotFoundError &e) {
g_log.debug() << e.what() << "\n";
if (!returnDefaultIfNotFound) {
throw e;
}
}
}
return Kernel::ConfigService::Instance().getInstrument();
Expand All @@ -185,7 +194,7 @@ const Kernel::InstrumentInfo FileFinderImpl::getInstrument(const string &hint) c
* @return A pair of instrument name and run number
*/
std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const std::string &hint) const {
// g_log.debug() << "toInstrumentAndNumber(" << hint << ")\n";
g_log.debug() << "toInstrumentAndNumber(" << hint << ")\n";
jhaigh0 marked this conversation as resolved.
Show resolved Hide resolved
std::string instrPart;
std::string runPart;

Expand Down Expand Up @@ -254,8 +263,6 @@ std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const
* too long
*/
std::string FileFinderImpl::makeFileName(const std::string &hint, const Kernel::InstrumentInfo &instrument) const {
// g_log.debug() << "makeFileName(" << hint << ", " << instrument.shortName()
// << ")\n";
if (hint.empty())
return "";

Expand Down Expand Up @@ -305,7 +312,7 @@ std::string FileFinderImpl::getExtension(const std::string &filename, const std:
std::string extension = toUpper(ext);
if (extension.rfind('*') == extension.size() - 1) // there is a wildcard at play
{
extension = extension.substr(0, extension.rfind('*'));
extension.resize(extension.rfind('*'));
}

std::size_t found = toUpper(filename).rfind(extension);
Expand Down Expand Up @@ -645,6 +652,52 @@ std::vector<std::string> FileFinderImpl::findRuns(const std::string &hintstr, co
return res;
}

const API::Result<std::string>
FileFinderImpl::getISISInstrumentDataCachePath(const std::string &cachePathToSearch,
const std::set<std::string> &filenames,
const std::vector<std::string> &exts) const {
std::string errors;
auto dataCache = API::ISISInstrumentDataCache(cachePathToSearch);

for (const auto &filename : filenames) {

std::string parentDirPath;

try {
parentDirPath = dataCache.getFileParentDirectoryPath(filename);

} catch (const std::invalid_argument &e) {
jhaigh0 marked this conversation as resolved.
Show resolved Hide resolved
errors += "Data cache: " + std::string(e.what());
return API::Result<std::string>("", errors);

} catch (const Json::Exception &e) {
errors += "Data cache: Failed parsing to JSON: " + std::string(e.what()) +
"Error likely due to accessing instrument index file while it was being updated on IDAaaS.";
return API::Result<std::string>("", errors);
}

if (!std::filesystem::exists(parentDirPath)) {
errors += "Data cache: Directory not found: " + parentDirPath;
return API::Result<std::string>("", errors);
}

for (const auto &ext : exts) {
std::filesystem::path filePath(parentDirPath + '/' + filename + ext);

try { // Catches error for permission denied
if (std::filesystem::exists(filePath)) {
return API::Result<std::string>(filePath.string());
}
} catch (const std::filesystem::filesystem_error &e) {
errors += "Data cache: " + std::string(e.what());
return API::Result<std::string>("", errors);
}
GuiMacielPereira marked this conversation as resolved.
Show resolved Hide resolved
}
errors += "Data cache: " + filename + " not found in " + parentDirPath;
}
return API::Result<std::string>("", errors);
}

/**
* Return the path to the file found in archive
* @param archs :: A list of archives to search
Expand Down Expand Up @@ -741,8 +794,25 @@ const API::Result<std::string> FileFinderImpl::getPath(const std::vector<IArchiv
}
}

// Search data cache
string errors;
std::filesystem::path cachePathToSearch(Kernel::ConfigService::Instance().getString("datacachesearch.directory"));
// Only expect to find path to data cache on IDAaaS
if (std::filesystem::exists(cachePathToSearch)) {

API::Result<std::string> cacheFilePath =
getISISInstrumentDataCachePath(cachePathToSearch.string(), filenames, exts);

if (cacheFilePath) {
return cacheFilePath;
}
errors += cacheFilePath.errors();

} else {
errors += "Could not find data cache directory: " + cachePathToSearch.string();
}
jhaigh0 marked this conversation as resolved.
Show resolved Hide resolved

// Search the archive
string errors = "";
if (!archs.empty()) {
g_log.debug() << "Search the archives\n";
const auto archivePath = getArchivePath(archs, filenames, exts);
Expand All @@ -758,7 +828,6 @@ const API::Result<std::string> FileFinderImpl::getPath(const std::vector<IArchiv
errors += archivePath.errors();

} // archs

return API::Result<std::string>("", errors);
}

Expand Down
81 changes: 81 additions & 0 deletions Framework/API/src/ISISInstrumentDataCache.cpp
@@ -0,0 +1,81 @@
#include "MantidAPI/ISISInstrumentDataCache.h"
#include "MantidAPI/FileFinder.h"
#include "MantidKernel/ConfigService.h"
#include "MantidKernel/Exception.h"
#include "MantidKernel/InstrumentInfo.h"
#include "MantidKernel/Logger.h"
#include <fstream>
#include <json/reader.h>

namespace {
Mantid::Kernel::Logger g_log("ISISInstrumentDataCache");
} // namespace

std::string Mantid::API::ISISInstrumentDataCache::getFileParentDirectoryPath(const std::string &fileName) const {
g_log.debug() << "ISISInstrumentDataCache::getFileParentDirectoryPath(" << fileName << ")" << std::endl;

auto [instrName, runNumber] = validateInstrumentAndNumber(fileName);

// Open index json file
std::string jsonPath = m_dataCachePath + "/" + instrName + "/" + instrName + "_index.json";
std::ifstream ifstrm{jsonPath};
if (!ifstrm) {
throw std::invalid_argument("Could not open index file: " + jsonPath);
}

// Read directory path from json file
Json::Value json;
ifstrm >> json;
std::string relativePath = json[runNumber].asString();

if (relativePath.empty()) {
throw std::invalid_argument("Run number " + runNumber + " not found for instrument " + instrName + ".");
}

std::string dirPath = m_dataCachePath + "/" + instrName + "/" + relativePath;

g_log.debug() << "Opened instrument index file: " << jsonPath << ". Found path to search: " << dirPath << "."
<< std::endl;
return dirPath;
}

std::pair<std::string, std::string>
Mantid::API::ISISInstrumentDataCache::validateInstrumentAndNumber(const std::string &fileName) const {

// Check if suffix eg. -add is present in filename
std::string fileNameCopy = fileName;
std::string suffix = FileFinder::Instance().extractAllowedSuffix(fileNameCopy);
if (!suffix.empty()) {
throw std::invalid_argument("Unsuported format: Suffix detected: " + suffix);
}

auto [instrName, runNumber] = splitIntoInstrumentAndNumber(fileName);

if (runNumber.empty() || !std::all_of(runNumber.begin(), runNumber.end(), ::isdigit)) { // Check run number
throw std::invalid_argument("Filename not in correct format.");
}
runNumber.erase(0, runNumber.find_first_not_of('0')); // Remove padding zeros

try { // Expand instrument name
instrName = FileFinder::Instance().getInstrument(instrName, false).name();
} catch (const Kernel::Exception::NotFoundError &) {
throw std::invalid_argument("Instrument name not recognized.");
}

return std::pair(instrName, runNumber);
}

std::pair<std::string, std::string>
Mantid::API::ISISInstrumentDataCache::splitIntoInstrumentAndNumber(const std::string &fileName) const {

// Find the last non-digit as the instrument name can contain numbers
const auto itRev = std::find_if(fileName.rbegin(), fileName.rend(), std::not_fn(isdigit));
const auto nChars = std::distance(itRev, fileName.rend());
std::string runNumber = fileName.substr(nChars);

std::string fileNameUpperCase = fileName;
std::transform(fileNameUpperCase.begin(), fileNameUpperCase.end(), fileNameUpperCase.begin(), toupper);
std::string instrName = fileNameUpperCase.substr(0, nChars);

return std::pair(instrName, runNumber);
}