Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Git clone cache #451

Closed
wants to merge 10 commits into from
19 changes: 19 additions & 0 deletions lib/client/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const socket = require('./socket');
const relay = require('../relay');
const logger = require('../log');
const version = require('../version');
const promBundle = require('express-prom-bundle');

module.exports = ({ port = null, config = {}, filters = {} }) => {
logger.info({ version }, 'running in client mode');
Expand All @@ -25,6 +26,24 @@ module.exports = ({ port = null, config = {}, filters = {} }) => {
// start the local webserver to listen for relay requests
const { app, server } = require('../webserver')(config, port);

// This is gross, but the tests run both servers in the same VM so without this we get an error about duplicate metrics...
if (!process.env.TAP && process.env.NODE_ENV !== 'test') {
// basic prometheus metrics
const metricsMiddleware = promBundle({
buckets: [0.5, 1, 2, 5, 10, 30, 60, 120, 300],
includeMethod: true,
includePath: false,
metricsPath: '/metrics',
promClient: {
collectDefaultMetrics: {
timeout: 3000,
},
},
});

app.use(metricsMiddleware);
}

// IMPORTANT: defined before relay (`app.all('/*', ...`)
app.get(config.brokerHealthcheckPath || '/healthcheck', (req, res) => {
// healthcheck state depends on websocket connection status
Expand Down
287 changes: 287 additions & 0 deletions lib/gh-git-clone.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
const config = require('./config');
const fs = require('fs');
const { spawn } = require('child_process');
const NodeCache = require('node-cache');
const path = require('path');
const os = require('os');
const logger = require('./log');
const { incrementCacheHit, incrementCacheMiss } = require('./metrics');

const inflight = new Map();
// Duplicating the outputDir into two maps because the gitCaches map should be empty until the clone completes,
// but any requests that come during the clone need access to the output directory, so we put that into another cache
const outputDirs = new Map();
const gitCaches = new NodeCache({
stdTTL: parseInt(config.cacheExpiry) || 300, // 5 minutes
checkperiod: parseInt(config.cacheCheckPeriod) || 60, // 1 min
useClones: false,
});

gitCaches.on('expired', (key, value) => {
logger.debug(
{ cacheKey: key, path: value },
'cached clone of git repo has expired - removing',
);
fs.rm(value, { recursive: true, force: true }, (err) => {
// Under load this key can be overridden by a new clone before this callback is fired
if (outputDirs.get(key) === value) {
outputDirs.delete(key);
}
if (err) {
logger.error(
{ cacheKey: key, path: value, err },
'unable to delete cached GitHub checkout',
);
} else {
logger.debug(
{ cacheKey: key, path: value, err },
'successfully deleted cache',
);
}
});
});

class GitHubCache {
#logContext;
#path;
#filePathMatcher;
#treeRefMatcher;
#protocol;
#origin;

#org;
#repo;
#filePath;
#ref;
#repoUrl;
#cacheKey;

constructor(logContext, requestPath, authorization) {
const filePathExtractor =
/(?<protocol>https?).*\/(?:repos\/)?(?<org>[^/]+)\/(?<repo>[^/]+)\/contents\/(?<path>[^?]*)(?:\?ref=(?<ref>.+))?/g;
const treeRefExtractor =
/(?<protocol>https?).*\/repos\/(?<org>[^/]+)\/(?<repo>[^/]+)\/git\/trees\/(?<ref>.+)/gi;
this.#logContext = logContext;
this.#path = requestPath;
this.#filePathMatcher = filePathExtractor.exec(requestPath);
this.#treeRefMatcher = treeRefExtractor.exec(requestPath);

if (this.#filePathMatcher?.groups) {
this.#origin = config.github;
this.#protocol = this.#filePathMatcher.groups.protocol;
this.#org = this.#filePathMatcher.groups.org;
this.#repo = this.#filePathMatcher.groups.repo;
this.#filePath = this.#filePathMatcher.groups.path.replaceAll(
/%2f/gi,
'/',
);
this.#ref = this.#filePathMatcher.groups.ref || 'master';

this.#repoUrl = `${this.#protocol}://${authorization}@${this.#origin}/${
this.#org
}/${this.#repo}.git`;
this.#cacheKey = `${this.#repoUrl}#${this.#ref}`;

this.#logContext = {
...logContext,
protocol: this.#protocol,
origin: this.#origin,
org: this.#org,
repo: this.#repo,
filePath: this.#filePath,
repoUrl: this.#repoUrl,
ref: this.#ref,
cacheKey: this.#cacheKey,
};
} else if (this.#treeRefMatcher?.groups) {
this.#origin = config.github;
this.#protocol = this.#treeRefMatcher.groups.protocol;
this.#org = this.#treeRefMatcher.groups.org;
this.#repo = this.#treeRefMatcher.groups.repo;
this.#ref = this.#treeRefMatcher.groups.ref || 'master';

this.#repoUrl = `${this.#protocol}://${authorization}@${this.#origin}/${
this.#org
}/${this.#repo}.git`;
this.#cacheKey = `${this.#repoUrl}#${this.#ref}`;

this.#logContext = {
...logContext,
protocol: this.#protocol,
origin: this.#origin,
org: this.#org,
repo: this.#repo,
repoUrl: this.#repoUrl,
ref: this.#ref,
cacheKey: this.#cacheKey,
};
}
}

static enabled() {
return config.useGitHubCloneCache === 'true';
}

pathSupported() {
return this.#filePathMatcher?.groups;
}

pathTriggersCaching() {
return this.#treeRefMatcher?.groups;
}

loadCache() {
if (gitCaches.has(this.#cacheKey)) {
logger.trace(
this.#logContext,
'git clone cache already exists - ignoring',
);
return;
}

if (inflight.has(this.#cacheKey)) {
logger.debug(
this.#logContext,
'existing in-flight git clone found - ignoring',
);
return;
}

const baseDir = config.gitHubCloneDirectory || os.tmpdir();
const outputDir = fs.mkdtempSync(path.join(baseDir, 'broker-snyk-client'));
outputDirs.set(this.#cacheKey, outputDir);
const task = spawn('/bin/sh', [
'-c',
`GIT_SSL_NO_VERIFY=true git clone ${
this.#repoUrl
} ${outputDir} && cd ${outputDir} && git checkout ${this.#ref}`,
]);
inflight.set(this.#cacheKey, task);

let stdout = '';
let stderr = '';
task.stdout.on('data', (data) => (stdout += data.toString()));
task.stderr.on('data', (data) => (stderr += data.toString()));
task.setMaxListeners(0);

task.on('exit', (code) => {
inflight.delete(this.#cacheKey);

if (code) {
logger.error(
{ ...this.#logContext, code, stdout, stderr },
'received error performing git clone',
);
return;
}

gitCaches.set(this.#cacheKey, outputDir);
});
}

handle(hitCallback, missCallback, errorCallback) {
if (gitCaches.has(this.#cacheKey)) {
logger.trace(
this.#logContext,
'git clone cache found, looking up request',
);
this.#checkFsCache(
gitCaches.get(this.#cacheKey),
hitCallback,
missCallback,
errorCallback,
);
return true;
}

if (inflight.has(this.#cacheKey)) {
logger.debug(
this.#logContext,
'existing in-flight git clone found, adding listener',
);
inflight.get(this.#cacheKey).on('exit', (code) => {
if (code) {
logger.debug(
{ ...this.#logContext, code },
'non-zero error code for additional listener - returning error',
);
errorCallback(code);
} else {
logger.debug(this.#logContext, 'clone succeeded');
this.#checkFsCache(
outputDirs.get(this.#cacheKey),
hitCallback,
missCallback,
errorCallback,
);
}
});
return true;
}
return false;
}

#checkFsCache(outputDir, hitCallback, missCallback, errorCallback) {
if (!outputDir || !this.#filePath) {
incrementCacheMiss();
logger.error(
{ ...this.#logContext, outputDir },
'one of outputDir and filePath is undefined',
);
if (errorCallback) {
errorCallback({
message:
'unexpected internal error - one or more variables undefined when they should be defined',
});
} else {
logger.error(
this.#logContext,
'all arguments passed to function appear undefined - something has gone very wrong',
);
console.trace();
}
return;
}
const pathOnDisk = path.join(outputDir, this.#filePath);
if (fs.existsSync(pathOnDisk)) {
incrementCacheHit();
logger.trace(this.#logContext, 'filePath found in cache');
fs.readFile(pathOnDisk, (err, data) => {
if (err) {
errorCallback(err);
} else {
const response = JSON.stringify({
name: path.basename(pathOnDisk),
path: this.#filePath,
sha: null,
size: data.length,
url: null,
html_url: null,
git_url: null,
type: 'file',
content: data.toString('base64'),
encoding: 'base64',
_links: [],
});

hitCallback({
status: 200,
headers: {
'Content-Type': 'application/json; charset=utf-8',
'Content-Length': response.length,
},
body: response,
});
}
});
} else {
incrementCacheMiss();
logger.trace(this.#logContext, 'filePath not found in cache');
missCallback();
}
}
}

module.exports = {
GitHubCache,
};
2 changes: 2 additions & 0 deletions lib/log.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ const log = bunyan.createLogger({
url: sanitise,
httpUrl: sanitise,
ioUrl: sanitise,
repoUrl: sanitise,
cacheKey: sanitise,
headers: sanitiseHeaders,
err: serialiseError,
error: serialiseError,
Expand Down
20 changes: 20 additions & 0 deletions lib/metrics.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ const unableToSizeResponseCounter = new Counter({
help: 'A count of the number of times broker server was unable to size a response',
});

const cacheHitCounter = new Counter({
name: 'broker_github_clone_cache_hit_count',
help: 'Number of requests that looked in a cloned cache of a GitHub repository and found the desired file',
});

const cacheMissCounter = new Counter({
name: 'broker_github_clone_cache_miss_count',
help: 'Number of requests that looked in a cloned cache of a GitHub repository and did not find the desired file',
});

function incrementSocketConnectionGauge() {
socketConnectionGauge.inc(1);
}
Expand All @@ -44,9 +54,19 @@ function incrementUnableToSizeResponse() {
unableToSizeResponseCounter.inc(1);
}

function incrementCacheHit() {
cacheHitCounter.inc(1);
}

function incrementCacheMiss() {
cacheMissCounter.inc(1);
}

module.exports = {
incrementSocketConnectionGauge,
decrementSocketConnectionGauge,
observeResponseSize,
incrementUnableToSizeResponse,
incrementCacheHit,
incrementCacheMiss,
};