Skip to content

Commit

Permalink
Revised the hide/ignore-referrer option to filter by hostname directly.
Browse files Browse the repository at this point in the history
This still allows for the continued use of wildcards if desired. e.g.,

--ignore-referrer=wiki.google.com will ignore:
 http://wiki.google.com/
 https://wiki.google.com/
 https://wiki.google.com
 http://wiki.google.com

OR

--ignore-referrer=*wiki.google.c* will work the same way.

Fixes #2562
  • Loading branch information
allinurl committed Apr 7, 2024
1 parent dc0f9d1 commit 5cb1ff7
Showing 1 changed file with 64 additions and 38 deletions.
102 changes: 64 additions & 38 deletions src/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,37 @@ wc_match (const char *wc, char *str) {
return 0;
}

/**
* Extracts the hostname part from a given URL.
*
* On error, NULL is returned.
* On success, a dynamically allocated string containing the hostname is returned.
*/
static char *
extract_hostname (const char *url) {
char *start, *end;
char *hostname = NULL;

start = strstr (url, "://");
if (start != NULL) {
start += 3;
} else {
start = (char *) url;
}

end = strchr (start, '/');
if (end == NULL) {
/* no path, use the entire string */
end = start + strlen (start);
}

hostname = xmalloc (end - start + 1);
strncpy (hostname, start, end - start);
hostname[end - start] = '\0';

return hostname;
}

/* Generic routine to extract all groups from a string given a POSIX regex.
*
* If no match found or error, NULL is returned.
Expand Down Expand Up @@ -262,66 +293,61 @@ regex_extract_string (const char *str, const char *regex, int max_groups, char c
return ret == 0 ? dest : NULL;
}

/* Determine if the given host needs to be ignored given the list of
* referrers to ignore.
*
* On error, or the referrer is not found, 0 is returned
* On success, or if the host needs to be ignored, 1 is returned */
int
ignore_referer (const char *host) {
char *needle = NULL;
static int
handle_referer (const char *host, const char **referers, int referer_idx) {
char *needle = NULL, *hostname = NULL;
int i, ignore = 0;

if (conf.ignore_referer_idx == 0)
if (referer_idx == 0)
return 0;

if (host == NULL || *host == '\0')
return 0;

needle = xstrdup (host);
for (i = 0; i < conf.ignore_referer_idx; ++i) {
if (conf.ignore_referers[i] == NULL || *conf.ignore_referers[i] == '\0')
for (i = 0; i < referer_idx; ++i) {
if (referers[i] == NULL || *referers[i] == '\0')
continue;

if (wc_match (conf.ignore_referers[i], needle)) {
ignore = 1;
goto out;
if (strchr (referers[i], '*') != NULL || strchr (referers[i], '?') != NULL) {
if (wc_match (referers[i], needle)) {
ignore = 1;
goto out;
}
} else {
hostname = extract_hostname (host);
if (strcmp (referers[i], hostname) == 0) {
ignore = 1;
free (hostname);
goto out;
}
free (hostname);
}
}

out:
free (needle);

return ignore;
}

/* Determine if the given host needs to be ignored given the list of
* referrers to ignore.
*
* On error, or the referrer is not found, 0 is returned
* On success, or if the host needs to be ignored, 1 is returned */
int
ignore_referer (const char *host) {
return handle_referer (host, conf.ignore_referers, conf.ignore_referer_idx);
}

/* Determine if the given host needs to be hidden given the list of
* referrers to hide.
*
* On error, or the referrer is not found, 0 is returned
* On success, or if the host needs to be ignored, 1 is returned */
int
hide_referer (const char *host) {
char *needle = NULL;
int i, ignore = 0;

if (conf.hide_referer_idx == 0)
return 0;
if (host == NULL || *host == '\0')
return 0;

needle = xstrdup (host);
for (i = 0; i < conf.hide_referer_idx; ++i) {
if (conf.hide_referers[i] == NULL || *conf.hide_referers[i] == '\0')
continue;

if (wc_match (conf.hide_referers[i], needle)) {
ignore = 1;
goto out;
}
}
out:
free (needle);

return ignore;
return handle_referer (host, conf.hide_referers, conf.hide_referer_idx);
}

/* Determine if the given ip is within a range of IPs.
Expand Down Expand Up @@ -585,7 +611,7 @@ parse_tz_specifier (const char *str, const char *fmt, struct tm *tm) {
return 0;
}

/* try to parse timezone offset else bail early, +/-0500*/
/* try to parse timezone offset else bail early, +/-0500 */
if ((*end != '+' && *end != '-') || strlen (end) < 4)
return 1;

Expand Down

0 comments on commit 5cb1ff7

Please sign in to comment.