Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fetcher cache cleanups #10465

Merged
merged 4 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
160 changes: 76 additions & 84 deletions src/libfetchers/cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ namespace nix::fetchers {
static const char * schema = R"sql(

create table if not exists Cache (
input text not null,
info text not null,
path text not null,
immutable integer not null,
domain text not null,
key text not null,
value text not null,
timestamp integer not null,
primary key (input)
primary key (domain, key)
);
)sql";

Expand All @@ -28,7 +27,7 @@ struct CacheImpl : Cache
struct State
{
SQLite db;
SQLiteStmt add, lookup;
SQLiteStmt upsert, lookup;
};

Sync<State> _state;
Expand All @@ -37,136 +36,129 @@ struct CacheImpl : Cache
{
auto state(_state.lock());

auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite";
auto dbPath = getCacheDir() + "/nix/fetcher-cache-v2.sqlite";
createDirs(dirOf(dbPath));

state->db = SQLite(dbPath);
state->db.isCache();
state->db.exec(schema);

state->add.create(state->db,
"insert or replace into Cache(input, info, path, immutable, timestamp) values (?, ?, ?, ?, ?)");
state->upsert.create(state->db,
"insert or replace into Cache(domain, key, value, timestamp) values (?, ?, ?, ?)");

state->lookup.create(state->db,
"select info, path, immutable, timestamp from Cache where input = ?");
"select value, timestamp from Cache where domain = ? and key = ?");
}

void upsert(
const Attrs & inAttrs,
const Attrs & infoAttrs) override
const Key & key,
const Attrs & value) override
{
_state.lock()->add.use()
(attrsToJSON(inAttrs).dump())
(attrsToJSON(infoAttrs).dump())
("") // no path
(false)
_state.lock()->upsert.use()
(key.first)
(attrsToJSON(key.second).dump())
(attrsToJSON(value).dump())
(time(0)).exec();
}

std::optional<Attrs> lookup(const Attrs & inAttrs) override
std::optional<Attrs> lookup(
const Key & key) override
{
if (auto res = lookupExpired(inAttrs))
return std::move(res->infoAttrs);
if (auto res = lookupExpired(key))
return std::move(res->value);
return {};
}

std::optional<Attrs> lookupWithTTL(const Attrs & inAttrs) override
std::optional<Attrs> lookupWithTTL(
const Key & key) override
{
if (auto res = lookupExpired(inAttrs)) {
if (auto res = lookupExpired(key)) {
if (!res->expired)
return std::move(res->infoAttrs);
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
return std::move(res->value);
debug("ignoring expired cache entry '%s:%s'",
key.first, attrsToJSON(key.second).dump());
}
return {};
}

std::optional<Result2> lookupExpired(const Attrs & inAttrs) override
std::optional<Result> lookupExpired(
const Key & key) override
{
auto state(_state.lock());

auto inAttrsJSON = attrsToJSON(inAttrs).dump();
auto keyJSON = attrsToJSON(key.second).dump();

auto stmt(state->lookup.use()(inAttrsJSON));
auto stmt(state->lookup.use()(key.first)(keyJSON));
if (!stmt.next()) {
debug("did not find cache entry for '%s'", inAttrsJSON);
debug("did not find cache entry for '%s:%s'", key.first, keyJSON);
return {};
}

auto infoJSON = stmt.getStr(0);
auto locked = stmt.getInt(2) != 0;
auto timestamp = stmt.getInt(3);
auto valueJSON = stmt.getStr(0);
auto timestamp = stmt.getInt(1);

debug("using cache entry '%s' -> '%s'", inAttrsJSON, infoJSON);
debug("using cache entry '%s:%s' -> '%s'", key.first, keyJSON, valueJSON);

return Result2 {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
.infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)),
return Result {
.expired = settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0),
.value = jsonToAttrs(nlohmann::json::parse(valueJSON)),
};
}

void add(
void upsert(
Key key,
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
const StorePath & storePath,
bool locked) override
Attrs value,
const StorePath & storePath)
{
_state.lock()->add.use()
(attrsToJSON(inAttrs).dump())
(attrsToJSON(infoAttrs).dump())
(store.printStorePath(storePath))
(locked)
(time(0)).exec();
}
/* Add the store prefix to the cache key to handle multiple
store prefixes. */
key.second.insert_or_assign("store", store.storeDir);

std::optional<std::pair<Attrs, StorePath>> lookup(
Store & store,
const Attrs & inAttrs) override
{
if (auto res = lookupExpired(store, inAttrs)) {
if (!res->expired)
return std::make_pair(std::move(res->infoAttrs), std::move(res->storePath));
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
}
return {};
value.insert_or_assign("storePath", (std::string) storePath.to_string());

upsert(key, value);
}

std::optional<Result> lookupExpired(
Store & store,
const Attrs & inAttrs) override
std::optional<ResultWithStorePath> lookupStorePath(
Key key,
Store & store) override
{
auto state(_state.lock());
key.second.insert_or_assign("store", store.storeDir);

auto inAttrsJSON = attrsToJSON(inAttrs).dump();
auto res = lookupExpired(key);
if (!res) return std::nullopt;

auto stmt(state->lookup.use()(inAttrsJSON));
if (!stmt.next()) {
debug("did not find cache entry for '%s'", inAttrsJSON);
return {};
}
auto storePathS = getStrAttr(res->value, "storePath");
res->value.erase("storePath");

auto infoJSON = stmt.getStr(0);
auto storePath = store.parseStorePath(stmt.getStr(1));
auto locked = stmt.getInt(2) != 0;
auto timestamp = stmt.getInt(3);
ResultWithStorePath res2(*res, StorePath(storePathS));

store.addTempRoot(storePath);
if (!store.isValidPath(storePath)) {
store.addTempRoot(res2.storePath);
if (!store.isValidPath(res2.storePath)) {
// FIXME: we could try to substitute 'storePath'.
debug("ignoring disappeared cache entry '%s'", inAttrsJSON);
return {};
debug("ignoring disappeared cache entry '%s:%s' -> '%s'",
key.first,
attrsToJSON(key.second).dump(),
store.printStorePath(res2.storePath));
return std::nullopt;
}

debug("using cache entry '%s' -> '%s', '%s'",
inAttrsJSON, infoJSON, store.printStorePath(storePath));
debug("using cache entry '%s:%s' -> '%s', '%s'",
key.first,
attrsToJSON(key.second).dump(),
attrsToJSON(res2.value).dump(),
store.printStorePath(res2.storePath));

return Result {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
.infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)),
.storePath = std::move(storePath)
};
return res2;
}

std::optional<ResultWithStorePath> lookupStorePathWithTTL(
Key key,
Store & store) override
{
auto res = lookupStorePath(std::move(key), store);
return res && !res->expired ? res : std::nullopt;
}
};

Expand Down
71 changes: 45 additions & 26 deletions src/libfetchers/cache.hh
Original file line number Diff line number Diff line change
Expand Up @@ -15,61 +15,80 @@ struct Cache
virtual ~Cache() { }

/**
* Add a value to the cache. The cache is an arbitrary mapping of
* Attrs to Attrs.
* A domain is a partition of the key/value cache for a particular
* purpose, e.g. git revision to revcount.
*/
using Domain = std::string_view;

/**
* A cache key is a domain and an arbitrary set of attributes.
*/
using Key = std::pair<Domain, Attrs>;

/**
* Add a key/value pair to the cache.
*/
virtual void upsert(
const Attrs & inAttrs,
const Attrs & infoAttrs) = 0;
const Key & key,
const Attrs & value) = 0;

/**
* Look up a key with infinite TTL.
*/
virtual std::optional<Attrs> lookup(
const Attrs & inAttrs) = 0;
const Key & key) = 0;

/**
* Look up a key. Return nothing if its TTL has exceeded
* `settings.tarballTTL`.
*/
virtual std::optional<Attrs> lookupWithTTL(
const Attrs & inAttrs) = 0;
const Key & key) = 0;

struct Result2
struct Result
{
bool expired = false;
Attrs infoAttrs;
Attrs value;
};

/**
* Look up a key. Return a bool denoting whether its TTL has
* exceeded `settings.tarballTTL`.
*/
virtual std::optional<Result2> lookupExpired(
const Attrs & inAttrs) = 0;

/* Old cache for things that have a store path. */
virtual void add(
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
const StorePath & storePath,
bool locked) = 0;
virtual std::optional<Result> lookupExpired(
const Key & key) = 0;

virtual std::optional<std::pair<Attrs, StorePath>> lookup(
/**
* Insert a cache entry that has a store path associated with
* it. Such cache entries are always considered stale if the
* associated store path is invalid.
*/
virtual void upsert(
Key key,
Store & store,
const Attrs & inAttrs) = 0;
Attrs value,
const StorePath & storePath) = 0;

struct Result
struct ResultWithStorePath : Result
{
bool expired = false;
Attrs infoAttrs;
StorePath storePath;
};

virtual std::optional<Result> lookupExpired(
Store & store,
const Attrs & inAttrs) = 0;
/**
* Look up a store path in the cache. The returned store path will
* be valid, but it may be expired.
*/
virtual std::optional<ResultWithStorePath> lookupStorePath(
Key key,
Store & store) = 0;

/**
* Look up a store path in the cache. Return nothing if its TTL
* has exceeded `settings.tarballTTL`.
*/
virtual std::optional<ResultWithStorePath> lookupStorePathWithTTL(
Key key,
Store & store) = 0;
};

ref<Cache> getCache();
Expand Down
15 changes: 6 additions & 9 deletions src/libfetchers/fetch-to-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,18 @@ StorePath fetchToStore(
// FIXME: add an optimisation for the case where the accessor is
// an FSInputAccessor pointing to a store path.

std::optional<fetchers::Attrs> cacheKey;
std::optional<fetchers::Cache::Key> cacheKey;

if (!filter && path.accessor->fingerprint) {
cacheKey = fetchers::Attrs{
{"_what", "fetchToStore"},
{"store", store.storeDir},
cacheKey = fetchers::Cache::Key{"fetchToStore", {
{"name", std::string{name}},
{"fingerprint", *path.accessor->fingerprint},
{"method", std::string{method.render()}},
{"path", path.path.abs()}
};
if (auto res = fetchers::getCache()->lookup(store, *cacheKey)) {
}};
if (auto res = fetchers::getCache()->lookupStorePath(*cacheKey, store)) {
debug("store path cache hit for '%s'", path);
return res->second;
return res->storePath;
}
} else
debug("source path '%s' is uncacheable", path);
Expand All @@ -47,10 +45,9 @@ StorePath fetchToStore(
name, *path.accessor, path.path, method, HashAlgorithm::SHA256, {}, filter2, repair);

if (cacheKey && mode == FetchMode::Copy)
fetchers::getCache()->add(store, *cacheKey, {}, storePath, true);
fetchers::getCache()->upsert(*cacheKey, store, {}, storePath);

return storePath;
}


}
2 changes: 1 addition & 1 deletion src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
{
auto accessor = getAccessor(treeHash, false);

fetchers::Attrs cacheKey({{"_what", "treeHashToNarHash"}, {"treeHash", treeHash.gitRev()}});
fetchers::Cache::Key cacheKey{"treeHashToNarHash", {{"treeHash", treeHash.gitRev()}}};

if (auto res = fetchers::getCache()->lookup(cacheKey))
return Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256);
Expand Down
4 changes: 2 additions & 2 deletions src/libfetchers/github.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ struct GitArchiveInputScheme : InputScheme

auto cache = getCache();

Attrs treeHashKey{{"_what", "gitRevToTreeHash"}, {"rev", rev->gitRev()}};
Attrs lastModifiedKey{{"_what", "gitRevToLastModified"}, {"rev", rev->gitRev()}};
Cache::Key treeHashKey{"gitRevToTreeHash", {{"rev", rev->gitRev()}}};
Cache::Key lastModifiedKey{"gitRevToLastModified", {{"rev", rev->gitRev()}}};

if (auto treeHashAttrs = cache->lookup(treeHashKey)) {
if (auto lastModifiedAttrs = cache->lookup(lastModifiedKey)) {
Expand Down