Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

E-mail address canonicalization additions #761

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ NickServ
- Blame a specific channel when a NickServ `REGAIN` fails due to a channel ban
- NickServ `RETURN` now enables the `HIDEMAIL` flag if the email was changed
(unless the flag is unset by default)
- New module `nickserv/canonmail` to allow services administrators to test
the configuration of their email canonicalization setup

IRCds
-----
Expand All @@ -209,6 +211,8 @@ Misc
- Make the OperServ `MODLIST` command available to everyone
- Document the `special:authenticated` privilege
- Add a Turkish translation
- New module `misc/canon_domains` to canonicalize various domains associated
with a given email service provider; see the example configuration for details

Build System
------------
Expand Down
48 changes: 48 additions & 0 deletions dist/atheme.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ loadmodule "crypto/pbkdf2"; /* Verify-only, see prev. */
* Core components nickserv/main
* Nickname access lists nickserv/access
* Bad e-mail address blocking nickserv/badmail
* E-mail canonicalization testing nickserv/canonmail
* CertFP fingerprint managment nickserv/cert
* DROP command nickserv/drop
* Nickname enforcement nickserv/enforce
Expand Down Expand Up @@ -360,6 +361,7 @@ loadmodule "crypto/pbkdf2"; /* Verify-only, see prev. */
loadmodule "nickserv/main";
#loadmodule "nickserv/access";
loadmodule "nickserv/badmail";
#loadmodule "nickserv/canonmail";
#loadmodule "nickserv/cert";
loadmodule "nickserv/drop";
#loadmodule "nickserv/enforce";
Expand Down Expand Up @@ -939,8 +941,21 @@ loadmodule "groupserv/set_url";
* with portal software and other useful things. To enable it, load this
* module, and uncomment the httpd { } block towards the bottom of the config.
*
* There are also modules that provide e-mail address canonicalization, thus
* allowing per-email registration limits to be applied to the "canonical"
* form of an address; this includes e.g. stripping out a +suffix or
* considering multiple domains of a single provider as if they were the same.
*
* Of these, the canon_gmail module strips +suffixes and dots from GMail
* localparts and requires no configuration. The canon_domains module has its
* own configuration block; see further below for documentation.
*
* Email domain canonicalization misc/canon_domains
* GMail localpart canonicalization misc/canon_gmail
* HTTP Server misc/httpd
*/
#loadmodule "misc/canon_domains";
#loadmodule "misc/canon_gmail";
#loadmodule "misc/httpd";


Expand Down Expand Up @@ -2367,6 +2382,39 @@ proxyscan {



/* Email domain canonicalizer configuration.
*
* This canonicalizer maps all domains associated with an email service
* provider onto a single one for canonicalization purposes.
*/
canon_domains {

/* (*) provider
*
* Each provider block contains a list of domains that will be considered
* the same for registration limit purposes.
*
* You can configure any number of providers, however any given domain
* can only be listed once. (Services will warn you about a configuration
* that contains duplicates as there is no meaningful way to handle them.)
*
* Note that there is currently no support for any sort of wildcard
* entries; domains can only be listed individually.
*/
provider {
"atheme.org";
"atheme.services";
};

provider {
"example.org";
"example.net";
"example.com";
};
};



/* HTTP server configuration.
*
* The HTTP server in Services is used for serving XMLRPC requests. It can
Expand Down
14 changes: 14 additions & 0 deletions help/default/nickserv/canonmail
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Help for CANONMAIL:

CANONMAIL takes an email address and looks up the
canonicalized form.

This is an internal representation of the email address
used in registration limit checks; two addresses that
canonicalize to the same string will be considered the
same address for purposes of such limits.

Syntax: CANONMAIL <email>

Example:
/msg &nick& CANONMAIL ilbelkyr@atheme.org
1 change: 1 addition & 0 deletions modules/misc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ include ../../extra.mk

MODULE = misc
SRCS = \
canon_domains.c \
canon_gmail.c \
httpd.c

Expand Down
145 changes: 145 additions & 0 deletions modules/misc/canon_domains.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* SPDX-License-Identifier: ISC
* SPDX-URL: https://spdx.org/licenses/ISC.html
*
* Copyright (C) 2012 Marien Zwart
* Copyright (C) 2018, 2021 Atheme Development Group (https://atheme.github.io/)
*
* Canonicalize multiple email domains to treat them as one.
*/

#include <atheme.h>

static mowgli_patricia_t *canon_domain_map;

static void
email_canonicalize_domains(char email[static (EMAILLEN + 1)],
void ATHEME_VATTR_UNUSED *const restrict user_data)
{
char *domain = strchr(email, '@');

if (!domain)
return;

// skip the actual @ itself
domain++;

const char *canon_domain = mowgli_patricia_retrieve(canon_domain_map, domain);

if (!canon_domain)
return;

// this should never fail as canon domains should never be
// greater in length than their input domains;
// double-check anyway to ensure strcpy won't overflow
return_if_fail((domain - email + strlen(canon_domain)) <= EMAILLEN);

slog(LG_DEBUG, "%s: '%s' -> '%s' for address '%s'", MOWGLI_FUNC_NAME, domain, canon_domain, email);
strcpy(domain, canon_domain);
}

static int
c_canon_domains(mowgli_config_file_entry_t *ce)
{
if (ce->entries == NULL)
{
conf_report_warning(ce, "no parameter for configuration option");
return 0;
}

mowgli_config_file_entry_t *group;

MOWGLI_ITER_FOREACH(group, ce->entries)
{
if (strcasecmp("PROVIDER", group->varname) != 0)
{
conf_report_warning(group, "Invalid configuration option");
continue;
}

if (group->entries == NULL)
{
conf_report_warning(group, "no parameter for configuration option");
continue;
}

mowgli_config_file_entry_t *domain_ce;

const char *shortest = NULL;
size_t shortest_len = SIZE_MAX;

// iterate once over all entries to find the shortest entry
MOWGLI_ITER_FOREACH(domain_ce, group->entries)
{
// We need to exclude duplicates from the shortest entry calculation.
// Don't report a warning as we'll do so during the second loop
if (mowgli_patricia_retrieve(canon_domain_map, domain_ce->varname))
continue;

size_t len = strlen(domain_ce->varname);
if (len < shortest_len)
{
shortest = domain_ce->varname;
shortest_len = len;
}
}

// iterate again, this time adding entries mapping them to the shortest entry
MOWGLI_ITER_FOREACH(domain_ce, group->entries)
{
// Check for duplicates again. We could avoid this step by creating a new
// list without duplicates in the first loop; however, we'd then also have
// to check for entries that are duplicated within the same provider's list
// separately, negating any real benefit of that.
if (mowgli_patricia_retrieve(canon_domain_map, domain_ce->varname))
{
conf_report_warning(group, "duplicate entry for domain");
continue;
}

if (!shortest)
{
slog(LG_ERROR, "%s: passed duplicate check but no shortest domain found (BUG)", MOWGLI_FUNC_NAME);
break;
}

mowgli_patricia_add(canon_domain_map, domain_ce->varname, sstrdup(shortest));
}
}

return 0;
}

static void canon_domain_map_destroy_cb(const char ATHEME_VATTR_UNUSED *key, void *data, void ATHEME_VATTR_UNUSED *privdata)
{
sfree(data);
}

static void
on_config_purge(void ATHEME_VATTR_UNUSED *hdata)
{
mowgli_patricia_destroy(canon_domain_map, &canon_domain_map_destroy_cb, NULL);
canon_domain_map = mowgli_patricia_create(&strcasecanon);
}

static void
mod_init(struct module ATHEME_VATTR_UNUSED *const restrict m)
{
canon_domain_map = mowgli_patricia_create(&strcasecanon);

hook_add_config_purge(&on_config_purge);
add_top_conf("CANON_DOMAINS", &c_canon_domains);
register_email_canonicalizer(&email_canonicalize_domains, NULL);
}

static void
mod_deinit(const enum module_unload_intent ATHEME_VATTR_UNUSED intent)
{
unregister_email_canonicalizer(&email_canonicalize_domains, NULL);
hook_del_config_purge(&on_config_purge);
del_top_conf("CANON_DOMAINS");

mowgli_patricia_destroy(canon_domain_map, &canon_domain_map_destroy_cb, NULL);
}

SIMPLE_DECLARE_MODULE_V1("misc/canon_domains", MODULE_UNLOAD_CAPABILITY_OK)
1 change: 1 addition & 0 deletions modules/nickserv/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ MODULE = nickserv
SRCS = \
access.c \
badmail.c \
canonmail.c \
cert.c \
drop.c \
enforce.c \
Expand Down
53 changes: 53 additions & 0 deletions modules/nickserv/canonmail.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* SPDX-License-Identifier: ISC
* SPDX-URL: https://spdx.org/licenses/ISC.html
*
* Copyright (C) 2021 Atheme Development Group (https://atheme.github.io/)
*
* Returns the canonicalized form of an email address.
*/

#include <atheme.h>

static void
ns_cmd_canonmail(struct sourceinfo *si, int parc, char *parv[])
{
char *email = parv[0];

if (!email)
{
command_fail(si, fault_needmoreparams, STR_INSUFFICIENT_PARAMS, "CANONMAIL");
command_fail(si, fault_needmoreparams, _("Usage: CANONMAIL <email>"));
return;
}

stringref email_canonical = canonicalize_email(email);

command_success_string(si, email_canonical, _("Email address \2%s\2 canonicalizes to \2%s\2"), email, email_canonical);

strshare_unref(email_canonical);
}

static struct command ns_canonmail = {
.name = "CANONMAIL",
.desc = N_("Displays the canonicalized form of an e-mail address."),
.access = PRIV_USER_AUSPEX,
.maxparc = 1,
.cmd = &ns_cmd_canonmail,
.help = { .path = "nickserv/canonmail" },
};

static void
mod_init(struct module *const restrict m)
{
MODULE_TRY_REQUEST_DEPENDENCY(m, "nickserv/main")
service_named_bind_command("nickserv", &ns_canonmail);
}

static void
mod_deinit(const enum module_unload_intent ATHEME_VATTR_UNUSED intent)
{
service_named_unbind_command("nickserv", &ns_canonmail);
}

SIMPLE_DECLARE_MODULE_V1("nickserv/canonmail", MODULE_UNLOAD_CAPABILITY_OK);