diff --git a/README.rst b/README.rst index 7119686..a08492f 100755 --- a/README.rst +++ b/README.rst @@ -21,9 +21,8 @@ datefinder - extract dates from text A python module for locating dates inside text. Use this package to extract all sorts of date like strings from a document and turn them into datetime objects. -This module finds the likely datetime strings and then uses the -`dateparser `_ package to convert -to the datetime object. +This module finds the likely datetime strings and then uses +`dateutil`to convert to the datetime object. Installation diff --git a/datefinder/__init__.py b/datefinder/__init__.py index eb294ed..62fffa6 100644 --- a/datefinder/__init__.py +++ b/datefinder/__init__.py @@ -3,9 +3,15 @@ import regex as re from dateutil import tz, parser -from .constants import REPLACEMENTS, TIMEZONE_REPLACEMENTS, STRIP_CHARS, DATE_REGEX, RANGE_REGEX +from .constants import ( + REPLACEMENTS, + TIMEZONE_REPLACEMENTS, + STRIP_CHARS, + DATE_REGEX, + RANGE_REGEX, +) -logger = logging.getLogger('datefinder') +logger = logging.getLogger("datefinder") class DateFinder(object): @@ -18,7 +24,9 @@ def __init__(self, base_date=None): def find_dates(self, text, source=False, index=False, strict=False): - for date_string, indices, captures in self.extract_date_strings(text, strict=strict): + for date_string, indices, captures in self.extract_date_strings( + text, strict=strict + ): as_dt = self.parse_date_string(date_string, captures) if as_dt is None: @@ -44,8 +52,8 @@ def _find_and_replace(self, date_string, captures): """ # add timezones to replace cloned_replacements = copy.copy(REPLACEMENTS) # don't mutate - for tz_string in captures.get('timezones', []): - cloned_replacements.update({tz_string: ' '}) + for tz_string in captures.get("timezones", []): + cloned_replacements.update({tz_string: " "}) date_string = date_string.lower() for key, replacement in cloned_replacements.items(): @@ -55,9 +63,14 @@ def _find_and_replace(self, date_string, captures): # 2. match ' to' # 3. match ' to ' # but never match r'(\s|)to(\s|)' which would make 'october' > 'ocber' - date_string = re.sub(r'(^|\s)' + key + r'(\s|$)', replacement, date_string, flags=re.IGNORECASE) + date_string = re.sub( + r"(^|\s)" + key + r"(\s|$)", + replacement, + date_string, + flags=re.IGNORECASE, + ) - return date_string, self._pop_tz_string(sorted(captures.get('timezones', []))) + return date_string, self._pop_tz_string(sorted(captures.get("timezones", []))) def _pop_tz_string(self, list_of_timezones): try: @@ -66,7 +79,7 @@ def _pop_tz_string(self, list_of_timezones): # want replaced with better abbreviation return TIMEZONE_REPLACEMENTS.get(tz_string, tz_string) except IndexError: - return '' + return "" def _add_tzinfo(self, datetime_obj, tz_string): """ @@ -98,7 +111,7 @@ def parse_date_string(self, date_string, captures): return None try: - logger.debug('Parsing {0} with dateutil'.format(date_string)) + logger.debug("Parsing {0} with dateutil".format(date_string)) as_dt = parser.parse(date_string, default=self.base_date) except Exception as e: logger.debug(e) @@ -127,12 +140,12 @@ def extract_date_strings(self, text, strict=False): for dt2_str in dt2: range_strings.extend(self.extract_date_strings(dt2_str, strict=strict)) - + found_range = True for range_string in range_strings: yield range_string - + # Try to match regular datetimes if no ranges have been found if not found_range: for match in DATE_REGEX.finditer(text): @@ -142,10 +155,10 @@ def extract_date_strings(self, text, strict=False): ## Get individual group matches captures = match.capturesdict() # time = captures.get('time') - digits = captures.get('digits') + digits = captures.get("digits") # digits_modifiers = captures.get('digits_modifiers') # days = captures.get('days') - months = captures.get('months') + months = captures.get("months") # timezones = captures.get('timezones') # delimiters = captures.get('delimiters') # time_periods = captures.get('time_periods') @@ -155,7 +168,9 @@ def extract_date_strings(self, text, strict=False): complete = False if len(digits) == 3: # 12-05-2015 complete = True - elif (len(months) == 1) and (len(digits) == 2): # 19 February 2013 year 09:10 + elif (len(months) == 1) and ( + len(digits) == 2 + ): # 19 February 2013 year 09:10 complete = True if not complete: @@ -163,20 +178,14 @@ def extract_date_strings(self, text, strict=False): ## sanitize date string ## replace unhelpful whitespace characters with single whitespace - match_str = re.sub(r'[\n\t\s\xa0]+', ' ', match_str) + match_str = re.sub(r"[\n\t\s\xa0]+", " ", match_str) match_str = match_str.strip(STRIP_CHARS) ## Save sanitized source string - yield match_str, indices, captures + yield match_str, indices, captures -def find_dates( - text, - source=False, - index=False, - strict=False, - base_date=None - ): +def find_dates(text, source=False, index=False, strict=False, base_date=None): """ Extract datetime strings from text diff --git a/datefinder/constants.py b/datefinder/constants.py index b643a00..de51718 100644 --- a/datefinder/constants.py +++ b/datefinder/constants.py @@ -1,37 +1,48 @@ import regex as re -NUMBERS_PATTERN = r'first|second|third|fourth|fifth|sixth|seventh|eighth|nineth|tenth' -POSITIONNAL_TOKENS= r'next|last' -DIGITS_PATTERN = r'\d+' -DIGITS_SUFFIXES= r'st|th|rd|nd' -DAYS_PATTERN = 'monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|tues|wed|thur|thurs|fri|sat|sun' -MONTHS_PATTERN = r'january|february|march|april|may|june|july|august|september|october|november|december|jan\.?|feb\.?|mar\.?|apr\.?|may\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|nov\.?|dec\.?' -TIMEZONES_PATTERN = 'ACDT|ACST|ACT|ACWDT|ACWST|ADDT|ADMT|ADT|AEDT|AEST|AFT|AHDT|AHST|AKDT|AKST|AKTST|AKTT|ALMST|ALMT|AMST|AMT|ANAST|ANAT|ANT|APT|AQTST|AQTT|ARST|ART|ASHST|ASHT|AST|AWDT|AWST|AWT|AZOMT|AZOST|AZOT|AZST|AZT|BAKST|BAKT|BDST|BDT|BEAT|BEAUT|BIOT|BMT|BNT|BORT|BOST|BOT|BRST|BRT|BST|BTT|BURT|CANT|CAPT|CAST|CAT|CAWT|CCT|CDDT|CDT|CEDT|CEMT|CEST|CET|CGST|CGT|CHADT|CHAST|CHDT|CHOST|CHOT|CIST|CKHST|CKT|CLST|CLT|CMT|COST|COT|CPT|CST|CUT|CVST|CVT|CWT|CXT|ChST|DACT|DAVT|DDUT|DFT|DMT|DUSST|DUST|EASST|EAST|EAT|ECT|EDDT|EDT|EEDT|EEST|EET|EGST|EGT|EHDT|EMT|EPT|EST|ET|EWT|FET|FFMT|FJST|FJT|FKST|FKT|FMT|FNST|FNT|FORT|FRUST|FRUT|GALT|GAMT|GBGT|GEST|GET|GFT|GHST|GILT|GIT|GMT|GST|GYT|HAA|HAC|HADT|HAE|HAP|HAR|HAST|HAT|HAY|HDT|HKST|HKT|HLV|HMT|HNA|HNC|HNE|HNP|HNR|HNT|HNY|HOVST|HOVT|HST|ICT|IDDT|IDT|IHST|IMT|IOT|IRDT|IRKST|IRKT|IRST|ISST|IST|JAVT|JCST|JDT|JMT|JST|JWST|KART|KDT|KGST|KGT|KIZST|KIZT|KMT|KOST|KRAST|KRAT|KST|KUYST|KUYT|KWAT|LHDT|LHST|LINT|LKT|LMT|LMT|LMT|LMT|LRT|LST|MADMT|MADST|MADT|MAGST|MAGT|MALST|MALT|MART|MAWT|MDDT|MDST|MDT|MEST|MET|MHT|MIST|MIT|MMT|MOST|MOT|MPT|MSD|MSK|MSM|MST|MUST|MUT|MVT|MWT|MYT|NCST|NCT|NDDT|NDT|NEGT|NEST|NET|NFT|NMT|NOVST|NOVT|NPT|NRT|NST|NT|NUT|NWT|NZDT|NZMT|NZST|OMSST|OMST|ORAST|ORAT|PDDT|PDT|PEST|PET|PETST|PETT|PGT|PHOT|PHST|PHT|PKST|PKT|PLMT|PMDT|PMMT|PMST|PMT|PNT|PONT|PPMT|PPT|PST|PT|PWT|PYST|PYT|QMT|QYZST|QYZT|RET|RMT|ROTT|SAKST|SAKT|SAMT|SAST|SBT|SCT|SDMT|SDT|SET|SGT|SHEST|SHET|SJMT|SLT|SMT|SRET|SRT|SST|STAT|SVEST|SVET|SWAT|SYOT|TAHT|TASST|TAST|TBIST|TBIT|TBMT|TFT|THA|TJT|TKT|TLT|TMT|TOST|TOT|TRST|TRT|TSAT|TVT|ULAST|ULAT|URAST|URAT|UTC|UYHST|UYST|UYT|UZST|UZT|VET|VLAST|VLAT|VOLST|VOLT|VOST|VUST|VUT|WARST|WART|WAST|WAT|WDT|WEDT|WEMT|WEST|WET|WFT|WGST|WGT|WIB|WIT|WITA|WMT|WSDT|WSST|WST|WT|XJT|YAKST|YAKT|YAPT|YDDT|YDT|YEKST|YEKST|YEKT|YEKT|YERST|YERT|YPT|YST|YWT|zzz' +NUMBERS_PATTERN = r"first|second|third|fourth|fifth|sixth|seventh|eighth|nineth|tenth" +POSITIONNAL_TOKENS = r"next|last" +DIGITS_PATTERN = r"\d+" +DIGITS_SUFFIXES = r"st|th|rd|nd" +DAYS_PATTERN = "monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|tues|wed|thur|thurs|fri|sat|sun" +MONTHS_PATTERN = r"january|february|march|april|may|june|july|august|september|october|november|december|jan\.?|feb\.?|mar\.?|apr\.?|may\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|nov\.?|dec\.?" +TIMEZONES_PATTERN = "ACDT|ACST|ACT|ACWDT|ACWST|ADDT|ADMT|ADT|AEDT|AEST|AFT|AHDT|AHST|AKDT|AKST|AKTST|AKTT|ALMST|ALMT|AMST|AMT|ANAST|ANAT|ANT|APT|AQTST|AQTT|ARST|ART|ASHST|ASHT|AST|AWDT|AWST|AWT|AZOMT|AZOST|AZOT|AZST|AZT|BAKST|BAKT|BDST|BDT|BEAT|BEAUT|BIOT|BMT|BNT|BORT|BOST|BOT|BRST|BRT|BST|BTT|BURT|CANT|CAPT|CAST|CAT|CAWT|CCT|CDDT|CDT|CEDT|CEMT|CEST|CET|CGST|CGT|CHADT|CHAST|CHDT|CHOST|CHOT|CIST|CKHST|CKT|CLST|CLT|CMT|COST|COT|CPT|CST|CUT|CVST|CVT|CWT|CXT|ChST|DACT|DAVT|DDUT|DFT|DMT|DUSST|DUST|EASST|EAST|EAT|ECT|EDDT|EDT|EEDT|EEST|EET|EGST|EGT|EHDT|EMT|EPT|EST|ET|EWT|FET|FFMT|FJST|FJT|FKST|FKT|FMT|FNST|FNT|FORT|FRUST|FRUT|GALT|GAMT|GBGT|GEST|GET|GFT|GHST|GILT|GIT|GMT|GST|GYT|HAA|HAC|HADT|HAE|HAP|HAR|HAST|HAT|HAY|HDT|HKST|HKT|HLV|HMT|HNA|HNC|HNE|HNP|HNR|HNT|HNY|HOVST|HOVT|HST|ICT|IDDT|IDT|IHST|IMT|IOT|IRDT|IRKST|IRKT|IRST|ISST|IST|JAVT|JCST|JDT|JMT|JST|JWST|KART|KDT|KGST|KGT|KIZST|KIZT|KMT|KOST|KRAST|KRAT|KST|KUYST|KUYT|KWAT|LHDT|LHST|LINT|LKT|LMT|LMT|LMT|LMT|LRT|LST|MADMT|MADST|MADT|MAGST|MAGT|MALST|MALT|MART|MAWT|MDDT|MDST|MDT|MEST|MET|MHT|MIST|MIT|MMT|MOST|MOT|MPT|MSD|MSK|MSM|MST|MUST|MUT|MVT|MWT|MYT|NCST|NCT|NDDT|NDT|NEGT|NEST|NET|NFT|NMT|NOVST|NOVT|NPT|NRT|NST|NT|NUT|NWT|NZDT|NZMT|NZST|OMSST|OMST|ORAST|ORAT|PDDT|PDT|PEST|PET|PETST|PETT|PGT|PHOT|PHST|PHT|PKST|PKT|PLMT|PMDT|PMMT|PMST|PMT|PNT|PONT|PPMT|PPT|PST|PT|PWT|PYST|PYT|QMT|QYZST|QYZT|RET|RMT|ROTT|SAKST|SAKT|SAMT|SAST|SBT|SCT|SDMT|SDT|SET|SGT|SHEST|SHET|SJMT|SLT|SMT|SRET|SRT|SST|STAT|SVEST|SVET|SWAT|SYOT|TAHT|TASST|TAST|TBIST|TBIT|TBMT|TFT|THA|TJT|TKT|TLT|TMT|TOST|TOT|TRST|TRT|TSAT|TVT|ULAST|ULAT|URAST|URAT|UTC|UYHST|UYST|UYT|UZST|UZT|VET|VLAST|VLAT|VOLST|VOLT|VOST|VUST|VUT|WARST|WART|WAST|WAT|WDT|WEDT|WEMT|WEST|WET|WFT|WGST|WGT|WIB|WIT|WITA|WMT|WSDT|WSST|WST|WT|XJT|YAKST|YAKT|YAPT|YDDT|YDT|YEKST|YEKST|YEKT|YEKT|YERST|YERT|YPT|YST|YWT|zzz" ## explicit north american timezones that get replaced -NA_TIMEZONES_PATTERN = 'pacific|eastern|mountain|central' -ALL_TIMEZONES_PATTERN = TIMEZONES_PATTERN + '|' + NA_TIMEZONES_PATTERN -DELIMITERS_PATTERN = r'[/\:\-\,\s\_\+\@]+' +NA_TIMEZONES_PATTERN = "pacific|eastern|mountain|central" +ALL_TIMEZONES_PATTERN = TIMEZONES_PATTERN + "|" + NA_TIMEZONES_PATTERN +DELIMITERS_PATTERN = r"[/\:\-\,\s\_\+\@]+" # Allows for straightforward datestamps e.g 2017, 201712, 20171223. Created with: # YYYYMM_PATTERN = '|'.join(['19\d\d'+'{:0>2}'.format(mon)+'|20\d\d'+'{:0>2}'.format(mon) for mon in range(1, 13)]) # YYYYMMDD_PATTERN = '|'.join(['19\d\d'+'{:0>2}'.format(mon)+'[0123]\d|20\d\d'+'{:0>2}'.format(mon)+'[0123]\d' for mon in range(1, 13)]) -YYYY_PATTERN = r'19\d\d|20\d\d' -YYYYMM_PATTERN = r'19\d\d01|20\d\d01|19\d\d02|20\d\d02|19\d\d03|20\d\d03|19\d\d04|20\d\d04|19\d\d05|20\d\d05|19\d\d06|20\d\d06|19\d\d07|20\d\d07|19\d\d08|20\d\d08|19\d\d09|20\d\d09|19\d\d10|20\d\d10|19\d\d11|20\d\d11|19\d\d12|20\d\d12' -YYYYMMDD_PATTERN = r'19\d\d01[0123]\d|20\d\d01[0123]\d|19\d\d02[0123]\d|20\d\d02[0123]\d|19\d\d03[0123]\d|20\d\d03[0123]\d|19\d\d04[0123]\d|20\d\d04[0123]\d|19\d\d05[0123]\d|20\d\d05[0123]\d|19\d\d06[0123]\d|20\d\d06[0123]\d|19\d\d07[0123]\d|20\d\d07[0123]\d|19\d\d08[0123]\d|20\d\d08[0123]\d|19\d\d09[0123]\d|20\d\d09[0123]\d|19\d\d10[0123]\d|20\d\d10[0123]\d|19\d\d11[0123]\d|20\d\d11[0123]\d|19\d\d12[0123]\d|20\d\d12[0123]\d' -YYYYMMDDHHMMSS_PATTERN = '|'.join([r'19\d\d' + '{:0>2}'.format(mon) + r'[0-3]\d[0-5]\d[0-5]\d[0-5]\d|20\d\d' + '{:0>2}'.format(mon) + r'[0-3]\d[0-5]\d[0-5]\d[0-5]\d' for mon in range(1, 13)]) -ISO8601_PATTERN = r'(?P-?(\:[1-9][0-9]*)?[0-9]{4})\-(?P1[0-2]|0[1-9])\-(?P3[01]|0[1-9]|[12][0-9])T(?P2[0-3]|[01][0-9])\:(?P[0-5][0-9]):(?P[0-5][0-9])(?:[\.,]+(?P[0-9]+))?(?P(?:Z|[+-](?:2[0-3]|[01][0-9])\:[0-5][0-9]))?' -UNDELIMITED_STAMPS_PATTERN = '|'.join([YYYYMMDDHHMMSS_PATTERN, YYYYMMDD_PATTERN, YYYYMM_PATTERN, ISO8601_PATTERN]) -DELIMITERS_PATTERN = r'[/\:\-\,\.\s\_\+\@]+' -TIME_PERIOD_PATTERN = r'a\.m\.|am|p\.m\.|pm' +YYYY_PATTERN = r"19\d\d|20\d\d" +YYYYMM_PATTERN = r"19\d\d01|20\d\d01|19\d\d02|20\d\d02|19\d\d03|20\d\d03|19\d\d04|20\d\d04|19\d\d05|20\d\d05|19\d\d06|20\d\d06|19\d\d07|20\d\d07|19\d\d08|20\d\d08|19\d\d09|20\d\d09|19\d\d10|20\d\d10|19\d\d11|20\d\d11|19\d\d12|20\d\d12" +YYYYMMDD_PATTERN = r"19\d\d01[0123]\d|20\d\d01[0123]\d|19\d\d02[0123]\d|20\d\d02[0123]\d|19\d\d03[0123]\d|20\d\d03[0123]\d|19\d\d04[0123]\d|20\d\d04[0123]\d|19\d\d05[0123]\d|20\d\d05[0123]\d|19\d\d06[0123]\d|20\d\d06[0123]\d|19\d\d07[0123]\d|20\d\d07[0123]\d|19\d\d08[0123]\d|20\d\d08[0123]\d|19\d\d09[0123]\d|20\d\d09[0123]\d|19\d\d10[0123]\d|20\d\d10[0123]\d|19\d\d11[0123]\d|20\d\d11[0123]\d|19\d\d12[0123]\d|20\d\d12[0123]\d" +YYYYMMDDHHMMSS_PATTERN = "|".join( + [ + r"19\d\d" + + "{:0>2}".format(mon) + + r"[0-3]\d[0-5]\d[0-5]\d[0-5]\d|20\d\d" + + "{:0>2}".format(mon) + + r"[0-3]\d[0-5]\d[0-5]\d[0-5]\d" + for mon in range(1, 13) + ] +) +ISO8601_PATTERN = r"(?P-?(\:[1-9][0-9]*)?[0-9]{4})\-(?P1[0-2]|0[1-9])\-(?P3[01]|0[1-9]|[12][0-9])T(?P2[0-3]|[01][0-9])\:(?P[0-5][0-9]):(?P[0-5][0-9])(?:[\.,]+(?P[0-9]+))?(?P(?:Z|[+-](?:2[0-3]|[01][0-9])\:[0-5][0-9]))?" +UNDELIMITED_STAMPS_PATTERN = "|".join( + [YYYYMMDDHHMMSS_PATTERN, YYYYMMDD_PATTERN, YYYYMM_PATTERN, ISO8601_PATTERN] +) +DELIMITERS_PATTERN = r"[/\:\-\,\.\s\_\+\@]+" +TIME_PERIOD_PATTERN = r"a\.m\.|am|p\.m\.|pm" ## can be in date strings but not recognized by dateutils -EXTRA_TOKENS_PATTERN = r'due|by|on|during|standard|daylight|savings|time|date|dated|of|to|through|between|until|at|day' +EXTRA_TOKENS_PATTERN = r"due|by|on|during|standard|daylight|savings|time|date|dated|of|to|through|between|until|at|day" ## TODO: Get english numbers? ## http://www.rexegg.com/regex-trick-numbers-in-english.html -RELATIVE_PATTERN = 'before|after|next|last|ago' -TIME_SHORTHAND_PATTERN = 'noon|midnight|today|yesterday' -UNIT_PATTERN = 'second|minute|hour|day|week|month|year' +RELATIVE_PATTERN = "before|after|next|last|ago" +TIME_SHORTHAND_PATTERN = "noon|midnight|today|yesterday" +UNIT_PATTERN = "second|minute|hour|day|week|month|year" ## Time pattern is used independently, so specified here. TIME_PATTERN = r""" @@ -60,8 +71,7 @@ ) ) """.format( - time_periods=TIME_PERIOD_PATTERN, - timezones=ALL_TIMEZONES_PATTERN + time_periods=TIME_PERIOD_PATTERN, timezones=ALL_TIMEZONES_PATTERN ) DATES_PATTERN = """ @@ -111,7 +121,7 @@ months=MONTHS_PATTERN, delimiters=DELIMITERS_PATTERN, positionnal_tokens=POSITIONNAL_TOKENS, - extra_tokens=EXTRA_TOKENS_PATTERN + extra_tokens=EXTRA_TOKENS_PATTERN, ) RANGE_PATTERN = r""" @@ -120,13 +130,21 @@ [\s]?(to|through)[\s]? (?P{date_pattern}) ) -""".format(date_pattern=DATES_PATTERN) +""".format( + date_pattern=DATES_PATTERN +) -DATE_REGEX = re.compile(DATES_PATTERN, re.IGNORECASE | re.MULTILINE | re.UNICODE | re.DOTALL | re.VERBOSE) +DATE_REGEX = re.compile( + DATES_PATTERN, re.IGNORECASE | re.MULTILINE | re.UNICODE | re.DOTALL | re.VERBOSE +) -TIME_REGEX = re.compile(TIME_PATTERN, re.IGNORECASE | re.MULTILINE | re.UNICODE | re.DOTALL | re.VERBOSE) +TIME_REGEX = re.compile( + TIME_PATTERN, re.IGNORECASE | re.MULTILINE | re.UNICODE | re.DOTALL | re.VERBOSE +) -RANGE_REGEX = re.compile(RANGE_PATTERN, re.IGNORECASE | re.MULTILINE | re.UNICODE | re.DOTALL | re.VERBOSE) +RANGE_REGEX = re.compile( + RANGE_PATTERN, re.IGNORECASE | re.MULTILINE | re.UNICODE | re.DOTALL | re.VERBOSE +) ## These tokens can be in original text but dateutil ## won't handle them without modification @@ -151,4 +169,5 @@ } ## Characters that can be removed from ends of matched strings -STRIP_CHARS = ' \n\t:-.,_' \ No newline at end of file +STRIP_CHARS = " \n\t:-.,_" + diff --git a/setup.py b/setup.py index 2aa926e..e362338 100755 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ # Always prefer setuptools over distutils from setuptools import setup, find_packages + # To use a consistent encoding from codecs import open from os import path @@ -14,97 +15,80 @@ here = path.abspath(path.dirname(__file__)) # Get the long description from the README file -with open(path.join(here, 'README.rst'), encoding='utf-8') as f: +with open(path.join(here, "README.rst"), encoding="utf-8") as f: long_description = f.read() setup( - name='datefinder', - + name="datefinder", # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # https://packaging.python.org/en/latest/single_source_version.html - version='0.6.2', - - description='Extract datetime objects from strings', + version="0.6.2", + description="Extract datetime objects from strings", long_description=long_description, - # The project's main homepage. - url='https://github.com/akoumjian/datefinder', - + url="https://github.com/akoumjian/datefinder", # Author details - author='Alec Koumjian', - author_email='akoumjian@gmail.com', - + author="Alec Koumjian", + author_email="akoumjian@gmail.com", # Choose your license - license='MIT', - + license="MIT", # See https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ # How mature is this project? Common values are # 3 - Alpha # 4 - Beta # 5 - Production/Stable - 'Development Status :: 4 - Beta', - + "Development Status :: 4 - Beta", # Indicate who your project is intended for - 'Intended Audience :: Developers', - + "Intended Audience :: Developers", # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - + "License :: OSI Approved :: MIT License", # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 2', + "Programming Language :: Python :: 2", # 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", # Topics - 'Natural Language :: English', + "Natural Language :: English", ], - # What does your project relate to? - keywords='datetime parser nlp', - + keywords="datetime parser nlp", # You can just specify the packages manually here if your project is # simple. Or you can use find_packages(). - packages=find_packages(exclude=['tests']), - + packages=find_packages(exclude=["tests"]), # Alternatively, if you want to distribute just a my_module.py, uncomment # this: # py_modules=['datefinder'], - # List run-time dependencies here. These will be installed by pip when # your project is installed. For an analysis of "install_requires" vs pip's # requirements files see: # https://packaging.python.org/en/latest/requirements.html - install_requires=['regex>=2017.02.08', 'python-dateutil>=2.4.2', 'pytz'], - + install_requires=["regex>=2017.02.08", "python-dateutil>=2.4.2", "pytz"], # List additional groups of dependencies here (e.g. development # dependencies). You can install these using the following syntax, # for example: # $ pip install -e .[dev,test] extras_require={ - 'dev': ['pytest>=2.8.5', 'mock', 'pytz>=2015.7', 'pylint==2.1.1'], - 'test': ['pytest>=2.8.5', 'mock', 'pytz>=2015.7'], + "dev": ["pytest>=2.8.5", "mock", "pytz>=2015.7", "pylint==2.1.1"], + "test": ["pytest>=2.8.5", "mock", "pytz>=2015.7"], }, - # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these # have to be included in MANIFEST.in as well. # package_data={ # 'sample': ['package_data.dat'], # }, - # Although 'package_data' is the preferred approach, in some case you may # need to place data files outside of your packages. See: # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa # In this case, 'data_file' will be installed into '/my_data' # data_files=[('my_data', ['data/data_file'])], - # To provide executable scripts, use entry points in preference to the # "scripts" keyword. Entry points provide cross-platform support and allow # pip to create the appropriate form of executable for the target platform.