diff --git a/tests/cli_tests.py b/tests/cli_tests.py
index 7632850b..9d27e1f9 100644
--- a/tests/cli_tests.py
+++ b/tests/cli_tests.py
@@ -18,6 +18,7 @@
 from trafilatura import cli, cli_utils, settings, spider
 from trafilatura.downloads import add_to_compressed_dict, fetch_url
 from trafilatura.filters import LANGID_FLAG
+from trafilatura.settings import args_to_extractor
 
 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 RESOURCES_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources')
@@ -193,7 +194,7 @@ def test_sysoutput():
     result = 'DADIDA'
     cli_utils.write_result(result, args)
     # process with backup directory and no counter
-    options = cli_utils._args_to_extractor(args)
+    options = args_to_extractor(args)
     assert cli_utils.process_result('DADIDA', args, None, options) is None
     # test keeping dir structure
     testargs = ['', '-i', 'myinputdir/', '-o', 'test/', '--keep-dirs']
@@ -333,7 +334,7 @@ def test_file_processing():
     args.input_dir = RESOURCES_DIR
     cli_utils.file_processing_pipeline(args)
     # test manually
-    options = cli_utils._args_to_extractor(args)
+    options = args_to_extractor(args)
     for f in cli_utils.generate_filelist(args.input_dir):
         cli_utils.file_processing(f, args, options=options)
 
@@ -346,7 +347,7 @@ def test_cli_config_file():
     with open(os.path.join(RESOURCES_DIR, 'httpbin_sample.html'), 'r', encoding="utf-8") as f:
         teststring = f.read()
     args.config_file = os.path.join(RESOURCES_DIR, args.config_file)
-    options = cli_utils._args_to_extractor(args)
+    options = args_to_extractor(args)
     assert cli.examine(teststring, args, options=options) is None
 
 
diff --git a/tests/downloads_tests.py b/tests/downloads_tests.py
index 03652a2e..9d1c8066 100644
--- a/tests/downloads_tests.py
+++ b/tests/downloads_tests.py
@@ -24,10 +24,9 @@
 from courlan import UrlStore
 
 from trafilatura.cli import parse_args
-from trafilatura.cli_utils import (_args_to_extractor,
-                                   download_queue_processing,
+from trafilatura.cli_utils import (download_queue_processing,
                                    url_processing_pipeline)
-from trafilatura.core import extract
+from trafilatura.core import Extractor, extract
 import trafilatura.downloads
 from trafilatura.downloads import (DEFAULT_HEADERS, USER_AGENT, Response,
                                    _determine_headers, _handle_response,
@@ -36,7 +35,7 @@
                                    _urllib3_is_live_page,
                                    add_to_compressed_dict, fetch_url,
                                    is_live_page, load_download_buffer)
-from trafilatura.settings import DEFAULT_CONFIG, use_config
+from trafilatura.settings import DEFAULT_CONFIG, args_to_extractor, use_config
 from trafilatura.utils import decode_file, decode_response, load_html
 
 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
@@ -48,6 +47,8 @@
 RESOURCES_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources')
 UA_CONFIG = use_config(filename=os.path.join(RESOURCES_DIR, 'newsettings.cfg'))
 
+DEFAULT_OPTS = Extractor(config=DEFAULT_CONFIG)
+
 
 def _reset_downloads_global_objects():
     """
@@ -101,8 +102,8 @@ def test_fetch():
     if pycurl is not None:
         response1 = _send_pycurl_request('https://httpbun.com/status/200', True, True, DEFAULT_CONFIG)
         assert response1.headers["x-powered-by"].startswith("httpbun")
-        assert _handle_response(url, response1, False, DEFAULT_CONFIG).data == _handle_response(url, response, False, DEFAULT_CONFIG).data
-        assert _handle_response(url, response1, True, DEFAULT_CONFIG) == _handle_response(url, response, True, DEFAULT_CONFIG)
+        assert _handle_response(url, response1, False, DEFAULT_OPTS).data == _handle_response(url, response, False, DEFAULT_OPTS).data
+        assert _handle_response(url, response1, True, DEFAULT_OPTS) == _handle_response(url, response, True, DEFAULT_OPTS)
     # response object
     # too large response object
     data = ""
@@ -111,14 +112,14 @@ def test_fetch():
     response = Response(data, status, url)
     # too large
     response.data = b'ABC'*10000000
-    assert _handle_response(response.url, response, False, DEFAULT_CONFIG) is None
+    assert _handle_response(response.url, response, False, DEFAULT_OPTS) is None
     # too small
     response.data = b'ABC'
-    assert _handle_response(response.url, response, False, DEFAULT_CONFIG) is None
+    assert _handle_response(response.url, response, False, DEFAULT_OPTS) is None
     # straight handling of response object
     with open(os.path.join(RESOURCES_DIR, 'utf8.html'), 'rb') as filehandle:
         response.data = filehandle.read()
-    assert _handle_response(response.url, response, False, DEFAULT_CONFIG) is not None
+    assert _handle_response(response.url, response, False, DEFAULT_OPTS) is not None
     assert load_html(response) is not None
     # nothing to see here
     assert extract(response, url=response.url, config=ZERO_CONFIG) is None
@@ -198,7 +199,7 @@ def test_queue():
     url_store = add_to_compressed_dict(inputurls)
     args.archived = True
     args.config_file = os.path.join(RESOURCES_DIR, 'newsettings.cfg')
-    options = _args_to_extractor(args)
+    options = args_to_extractor(args)
     options.config['DEFAULT']['SLEEP_TIME'] = '0.2'
     results = download_queue_processing(url_store, args, None, options)
     assert len(results[0]) == 5 and results[1] is None
diff --git a/trafilatura/cli_utils.py b/trafilatura/cli_utils.py
index cea883da..5d8f8d4a 100644
--- a/trafilatura/cli_utils.py
+++ b/trafilatura/cli_utils.py
@@ -20,17 +20,18 @@
 from trafilatura import spider
 
 from .baseline import html2txt
-from .core import Extractor, extract
+from .core import extract
 from .downloads import (add_to_compressed_dict, buffered_downloads,
                         load_download_buffer)
 from .feeds import find_feed_urls
 from .filters import LANGID_FLAG, language_classifier
 from .hashing import generate_hash_filename
 from .meta import reset_caches
-from .settings import FILENAME_LEN, MAX_FILES_PER_DIRECTORY, use_config
+from .settings import FILENAME_LEN, MAX_FILES_PER_DIRECTORY, args_to_extractor
 from .sitemaps import sitemap_search
 from .utils import URL_BLACKLIST_REGEX, make_chunks
 
+
 LOGGER = logging.getLogger(__name__)
 
 random.seed(345)  # make generated file names reproducible
@@ -217,7 +218,7 @@ def download_queue_processing(url_store, args, counter, options):
     while url_store.done is False:
         bufferlist, url_store = load_download_buffer(url_store, options.config.getfloat('DEFAULT', 'SLEEP_TIME'))
         # process downloads
-        for url, result in buffered_downloads(bufferlist, args.parallel):
+        for url, result in buffered_downloads(bufferlist, args.parallel, options=options):
             # handle result
             if result is not None:
                 options.url = url
@@ -235,12 +236,12 @@ def cli_discovery(args):
     if args.list:
         url_store.reset()
 
-    config = use_config(filename=args.config_file)
+    options = args_to_extractor(args)
     func = partial(
                find_feed_urls if args.feed else sitemap_search,
                target_lang=args.target_language,
-               external=config.getboolean('DEFAULT', 'EXTERNAL_URLS'),
-               sleep_time=config.getfloat('DEFAULT', 'SLEEP_TIME')
+               external=options.config.getboolean('DEFAULT', 'EXTERNAL_URLS'),
+               sleep_time=options.config.getfloat('DEFAULT', 'SLEEP_TIME')
            )
 
     # link discovery and storage
@@ -264,7 +265,7 @@ def cli_discovery(args):
     if args.explore:
         # add to compressed dict and crawl the remaining websites
         control_dict = build_exploration_dict(url_store, input_urls, args)
-        cli_crawler(args, url_store=control_dict)
+        cli_crawler(args, url_store=control_dict, options=options)
 
 
 def build_exploration_dict(url_store, input_urls, args):
@@ -282,11 +283,12 @@ def build_exploration_dict(url_store, input_urls, args):
     return control_dict
 
 
-def cli_crawler(args, n=30, url_store=None):
+def cli_crawler(args, n=30, url_store=None, options=None):
     '''Start a focused crawler which downloads a fixed number of URLs within a website
        and prints the links found in the process'''
-    config = use_config(filename=args.config_file)
-    sleep_time = config.getfloat('DEFAULT', 'SLEEP_TIME')
+    if not options:
+        options = args_to_extractor(args)
+    sleep_time = options.config.getfloat('DEFAULT', 'SLEEP_TIME')
     # counter = None
     # load input URLs
     if url_store is None:
@@ -307,7 +309,7 @@ def cli_crawler(args, n=30, url_store=None):
     while spider.URL_STORE.done is False:
         bufferlist, spider.URL_STORE = load_download_buffer(spider.URL_STORE, sleep_time)
         # start several threads
-        for url, result in buffered_downloads(bufferlist, args.parallel, decode=False):
+        for url, result in buffered_downloads(bufferlist, args.parallel, decode=False, options=options):
             base_url = get_base_url(url)
             # handle result
             if result is not None:
@@ -325,31 +327,16 @@ def cli_crawler(args, n=30, url_store=None):
 def probe_homepage(args):
     "Probe websites for extractable content and print the fitting ones."
     input_urls = load_input_urls(args)
-    config = use_config(filename=args.config_file)
-    min_length = config.getint('DEFAULT', 'MIN_EXTRACTED_SIZE')
+    options = args_to_extractor(args)
 
-    for url, result in buffered_downloads(input_urls, args.parallel):
+    for url, result in buffered_downloads(input_urls, args.parallel, options=options):
         if result is not None:
             result = html2txt(result)
-            if result and len(result) > min_length and any(c.isalpha() for c in result):
+            if result and len(result) > options.min_extracted_size and any(c.isalpha() for c in result):
                 if not LANGID_FLAG or not args.target_language or language_classifier(result, "") == args.target_language:
                     print(url, flush=True)
 
 
-def _args_to_extractor(args, url=None):
-    "Derive extractor configuration from CLI args."
-    options = Extractor(
-                  config=use_config(filename=args.config_file), output_format=args.output_format,
-                  comments=args.no_comments, tables=args.no_tables,
-                  dedup=args.deduplicate, lang=args.target_language,
-                  url=url, only_with_metadata=args.only_with_metadata,
-                  tei_validation=args.validate_tei
-              )
-    for attr in ("fast", "precision", "recall", "formatting", "images", "links"):
-        setattr(options, attr, getattr(args, attr))
-    return options
-
-
 def url_processing_pipeline(args, url_store):
     '''Aggregated functions to show a list and download and process an input list'''
     # print list without further processing
@@ -357,7 +344,7 @@ def url_processing_pipeline(args, url_store):
         url_store.print_unvisited_urls()  # and not write_result()
         return False  # and not sys.exit(0)
 
-    options = _args_to_extractor(args)
+    options = args_to_extractor(args)
 
     # initialize file counter if necessary
     if url_store.total_url_number() > MAX_FILES_PER_DIRECTORY:
@@ -383,7 +370,7 @@ def url_processing_pipeline(args, url_store):
 def file_processing_pipeline(args):
     '''Define batches for parallel file processing and perform the extraction'''
     filecounter = None
-    options = _args_to_extractor(args)
+    options = args_to_extractor(args)
     timeout = options.config.getint('DEFAULT', 'EXTRACTION_TIMEOUT')
 
     # max_tasks_per_child available in Python >= 3.11
@@ -403,7 +390,7 @@ def examine(htmlstring, args, url=None, options=None):
     """Generic safeguards and triggers"""
     result = None
     if not options:
-        options = _args_to_extractor(args, url)
+        options = args_to_extractor(args, url)
     # safety check
     if htmlstring is None:
         sys.stderr.write('ERROR: empty document\n')
diff --git a/trafilatura/core.py b/trafilatura/core.py
index 18b02c22..b0d5874a 100644
--- a/trafilatura/core.py
+++ b/trafilatura/core.py
@@ -19,8 +19,8 @@
 from .hashing import content_fingerprint
 from .htmlprocessing import convert_tags, prune_unwanted_nodes, tree_cleaning
 from .main_extractor import extract_comments, extract_content
-from .metadata import Document, extract_metadata, set_date_params
-from .settings import DEFAULT_CONFIG, use_config
+from .metadata import Document, extract_metadata
+from .settings import DEFAULT_CONFIG, Extractor, use_config
 from .utils import load_html, normalize_unicode
 from .xml import build_json_output, control_xml_output, xmltotxt, xmltocsv
 from .xpaths import REMOVE_COMMENTS_XPATH
@@ -29,67 +29,6 @@
 LOGGER = logging.getLogger(__name__)
 
 
-class Extractor:
-    "Defines a class to store all extraction options."
-    __slots__ = [
-    'config',
-    # general
-    'format', 'fast', 'precision', 'recall', 'comments',
-    'formatting', 'links', 'images', 'tables', 'dedup', 'lang',
-    # extraction size
-    'min_extracted_size', 'min_output_size',
-    'min_output_comm_size', 'min_extracted_comm_size',
-    # deduplication
-    'min_duplcheck_size', 'max_repetitions',
-    # rest
-    'max_file_size', 'min_file_size', 'max_tree_size',
-    # meta
-    'source', 'url', 'only_with_metadata', 'tei_validation',
-    'date_params',
-    'author_blacklist', 'url_blacklist'
-    ]
-    # consider dataclasses for Python 3.7+
-    def __init__(self, *, config=DEFAULT_CONFIG, output_format="txt",
-                 fast=False, precision=False, recall=False,
-                 comments=True, formatting=False, links=False, images=False,
-                 tables=True, dedup=False, lang=None, max_tree_size=None,
-                 url=None, source=None, only_with_metadata=False, tei_validation=False,
-                 author_blacklist=None, url_blacklist=None, date_params=None):
-        self._add_config(config)
-        self.format = output_format
-        self.fast = fast
-        self.precision = precision
-        self.recall = recall
-        self.comments = comments
-        self.formatting = formatting or output_format == "markdown"
-        self.links = links
-        self.images = images
-        self.tables = tables
-        self.dedup = dedup
-        self.lang = lang
-        self.max_tree_size = max_tree_size
-        self.url = url
-        self.source = url or source
-        self.only_with_metadata = only_with_metadata
-        self.tei_validation = tei_validation
-        self.author_blacklist = author_blacklist or set()
-        self.url_blacklist = url_blacklist or set()
-        self.date_params = date_params or \
-                           set_date_params(self.config.getboolean('DEFAULT', 'EXTENSIVE_DATE_SEARCH'))
-
-    def _add_config(self, config):
-        "Store options loaded from config file."
-        self.min_extracted_size = config.getint('DEFAULT', 'MIN_EXTRACTED_SIZE')
-        self.min_output_size = config.getint('DEFAULT', 'MIN_OUTPUT_SIZE')
-        self.min_output_comm_size = config.getint('DEFAULT', 'MIN_OUTPUT_COMM_SIZE')
-        self.min_extracted_comm_size = config.getint('DEFAULT', 'MIN_EXTRACTED_COMM_SIZE')
-        self.min_duplcheck_size = config.getint('DEFAULT', 'MIN_DUPLCHECK_SIZE')
-        self.max_repetitions = config.getint('DEFAULT', 'MAX_REPETITIONS')
-        self.max_file_size = config.getint('DEFAULT', 'MAX_FILE_SIZE')
-        self.min_file_size = config.getint('DEFAULT', 'MIN_FILE_SIZE')
-        self.config = config  # todo: remove?
-
-
 def determine_returnstring(document, options):
     '''Convert XML tree to chosen format, clean the result and output it as a string'''
     # XML (TEI) steps
diff --git a/trafilatura/downloads.py b/trafilatura/downloads.py
index 281ed7e0..83abbd44 100644
--- a/trafilatura/downloads.py
+++ b/trafilatura/downloads.py
@@ -8,6 +8,7 @@
 import warnings
 
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import partial
 from io import BytesIO
 from time import sleep
 
@@ -34,8 +35,7 @@
 except ImportError:
     from importlib_metadata import version
 
-
-from .settings import DEFAULT_CONFIG
+from .settings import DEFAULT_CONFIG, Extractor
 from .utils import URL_BLACKLIST_REGEX, decode_file, make_chunks
 
 
@@ -164,15 +164,15 @@ def _send_urllib_request(url, no_ssl, with_headers, config):
     return None
 
 
-def _handle_response(url, response, decode, config):
+def _handle_response(url, response, decode, options):
     'Internal function to run safety checks on response result.'
     lentest = len(response.html or response.data or "")
     if response.status != 200:
         LOGGER.error('not a 200 response: %s for URL %s', response.status, url)
-    elif lentest < config.getint('DEFAULT', 'MIN_FILE_SIZE'):
+    elif lentest < options.min_file_size:
         LOGGER.error('too small/incorrect for URL %s', url)
         # raise error instead?
-    elif lentest > config.getint('DEFAULT', 'MAX_FILE_SIZE'):
+    elif lentest > options.max_file_size:
         LOGGER.error('too large: length %s for URL %s', lentest, url)
         # raise error instead?
     else:
@@ -181,13 +181,14 @@ def _handle_response(url, response, decode, config):
     return None
 
 
-def fetch_url(url, decode=True, no_ssl=False, config=DEFAULT_CONFIG):
+def fetch_url(url, decode=True, no_ssl=False, config=DEFAULT_CONFIG, options=None):
     """Downloads a web page and seamlessly decodes the response.
 
     Args:
         url: URL of the page to fetch.
         no_ssl: Don't try to establish a secure connection (to prevent SSLError).
         config: Pass configuration values for output control.
+        options: Extraction options (supersedes config).
 
     Returns:
         Unicode string or None in case of failed downloads and invalid results.
@@ -201,7 +202,9 @@ def fetch_url(url, decode=True, no_ssl=False, config=DEFAULT_CONFIG):
         )
     response = fetch_response(url, decode=decode, no_ssl=no_ssl, config=config)
     if response is not None and response != '':
-        return _handle_response(url, response, decode, config)
+        if not options:
+            options = Extractor(config=config)
+        return _handle_response(url, response, decode, options)
         # return '' (useful do discard further processing?)
         # return response
     return None
@@ -307,11 +310,12 @@ def load_download_buffer(url_store, sleep_time=5):
     return bufferlist, url_store
 
 
-def buffered_downloads(bufferlist, download_threads, decode=True):
+def buffered_downloads(bufferlist, download_threads, decode=True, options=None):
     '''Download queue consumer, single- or multi-threaded.'''
+    worker = partial(fetch_url, decode=decode, options=options)
     with ThreadPoolExecutor(max_workers=download_threads) as executor:
         for chunk in make_chunks(bufferlist, 10000):
-            future_to_url = {executor.submit(fetch_url, url, decode): url for url in chunk}
+            future_to_url = {executor.submit(worker, url): url for url in chunk}
             for future in as_completed(future_to_url):
                 # url and download result
                 yield future_to_url[future], future.result()
diff --git a/trafilatura/metadata.py b/trafilatura/metadata.py
index 11fb947e..eb34c0fd 100644
--- a/trafilatura/metadata.py
+++ b/trafilatura/metadata.py
@@ -7,7 +7,6 @@
 import re
 
 from copy import deepcopy
-from datetime import datetime
 
 from courlan import extract_domain, get_base_url, is_valid_url, normalize_url, validate_url
 from htmldate import find_date
@@ -16,10 +15,11 @@
 from .htmlprocessing import prune_unwanted_nodes
 from .json_metadata import (extract_json, extract_json_parse_error,
                             normalize_json)
-from .xpaths import (AUTHOR_DISCARD_XPATHS, AUTHOR_XPATHS,
-                     CATEGORIES_XPATHS, TAGS_XPATHS, TITLE_XPATHS)
+from .settings import set_date_params
 from .utils import (line_processing, load_html, normalize_authors,
                     normalize_tags, trim, unescape)
+from .xpaths import (AUTHOR_DISCARD_XPATHS, AUTHOR_XPATHS,
+                     CATEGORIES_XPATHS, TAGS_XPATHS, TITLE_XPATHS)
 
 LOGGER = logging.getLogger(__name__)
 logging.getLogger('htmldate').setLevel(logging.WARNING)
@@ -128,15 +128,6 @@ def as_dict(self):
 EXTRA_META = {'charset', 'http-equiv', 'property'}
 
 
-def set_date_params(extensive=True):
-    "Provide default parameters for date extraction."
-    return {
-               "original_date": True,
-               "extensive_search": extensive,
-               "max_date": datetime.now().strftime("%Y-%m-%d")
-           }
-
-
 def check_authors(authors, author_blacklist):
     "Check if the authors string correspond to expected values."
     author_blacklist = {a.lower() for a in author_blacklist}
diff --git a/trafilatura/settings.py b/trafilatura/settings.py
index 963896c9..a92640d2 100644
--- a/trafilatura/settings.py
+++ b/trafilatura/settings.py
@@ -4,6 +4,7 @@
 """
 
 from configparser import ConfigParser
+from datetime import datetime
 
 try:
     from os import sched_getaffinity
@@ -16,7 +17,6 @@
 from lxml.etree import XPath
 
 
-
 def use_config(filename=None, config=None):
     """
     Use configuration object or read and parse a settings file.
@@ -36,6 +36,91 @@ def use_config(filename=None, config=None):
 
 DEFAULT_CONFIG = use_config()
 
+
+class Extractor:
+    "Defines a class to store all extraction options."
+    __slots__ = [
+    'config',
+    # general
+    'format', 'fast', 'precision', 'recall', 'comments',
+    'formatting', 'links', 'images', 'tables', 'dedup', 'lang',
+    # extraction size
+    'min_extracted_size', 'min_output_size',
+    'min_output_comm_size', 'min_extracted_comm_size',
+    # deduplication
+    'min_duplcheck_size', 'max_repetitions',
+    # rest
+    'max_file_size', 'min_file_size', 'max_tree_size',
+    # meta
+    'source', 'url', 'only_with_metadata', 'tei_validation',
+    'date_params',
+    'author_blacklist', 'url_blacklist'
+    ]
+    # consider dataclasses for Python 3.7+
+    def __init__(self, *, config=DEFAULT_CONFIG, output_format="txt",
+                 fast=False, precision=False, recall=False,
+                 comments=True, formatting=False, links=False, images=False,
+                 tables=True, dedup=False, lang=None, max_tree_size=None,
+                 url=None, source=None, only_with_metadata=False, tei_validation=False,
+                 author_blacklist=None, url_blacklist=None, date_params=None):
+        self._add_config(config)
+        self.format = output_format
+        self.fast = fast
+        self.precision = precision
+        self.recall = recall
+        self.comments = comments
+        self.formatting = formatting or output_format == "markdown"
+        self.links = links
+        self.images = images
+        self.tables = tables
+        self.dedup = dedup
+        self.lang = lang
+        self.max_tree_size = max_tree_size
+        self.url = url
+        self.source = url or source
+        self.only_with_metadata = only_with_metadata
+        self.tei_validation = tei_validation
+        self.author_blacklist = author_blacklist or set()
+        self.url_blacklist = url_blacklist or set()
+        self.date_params = date_params or \
+                           set_date_params(self.config.getboolean('DEFAULT', 'EXTENSIVE_DATE_SEARCH'))
+
+    def _add_config(self, config):
+        "Store options loaded from config file."
+        self.min_extracted_size = config.getint('DEFAULT', 'MIN_EXTRACTED_SIZE')
+        self.min_output_size = config.getint('DEFAULT', 'MIN_OUTPUT_SIZE')
+        self.min_output_comm_size = config.getint('DEFAULT', 'MIN_OUTPUT_COMM_SIZE')
+        self.min_extracted_comm_size = config.getint('DEFAULT', 'MIN_EXTRACTED_COMM_SIZE')
+        self.min_duplcheck_size = config.getint('DEFAULT', 'MIN_DUPLCHECK_SIZE')
+        self.max_repetitions = config.getint('DEFAULT', 'MAX_REPETITIONS')
+        self.max_file_size = config.getint('DEFAULT', 'MAX_FILE_SIZE')
+        self.min_file_size = config.getint('DEFAULT', 'MIN_FILE_SIZE')
+        self.config = config  # todo: remove?
+
+
+def args_to_extractor(args, url=None):
+    "Derive extractor configuration from CLI args."
+    options = Extractor(
+                  config=use_config(filename=args.config_file), output_format=args.output_format,
+                  comments=args.no_comments, tables=args.no_tables,
+                  dedup=args.deduplicate, lang=args.target_language,
+                  url=url, only_with_metadata=args.only_with_metadata,
+                  tei_validation=args.validate_tei
+              )
+    for attr in ("fast", "precision", "recall", "formatting", "images", "links"):
+        setattr(options, attr, getattr(args, attr))
+    return options
+
+
+def set_date_params(extensive=True):
+    "Provide default parameters for date extraction."
+    return {
+               "original_date": True,
+               "extensive_search": extensive,
+               "max_date": datetime.now().strftime("%Y-%m-%d")
+           }
+
+
 # Safety checks
 PARALLEL_CORES = min(len(sched_getaffinity(0)) if sched_getaffinity else cpu_count(), 16)  # 16 processes at most
 LRU_SIZE = 4096