Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework move logic #189

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
122 changes: 79 additions & 43 deletions afew/MailMover.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,42 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: ISC
# Copyright (c) dtk <dtk@gmx.de>
"""This module contain the MailMover class"""

import notmuch
import os
import logging
import os, shutil
import shutil
import sys
from subprocess import check_call, CalledProcessError

from .Database import Database
from .utils import get_message_summary
from datetime import date, datetime, timedelta
import uuid
import notmuch
from .Database import Database
from .utils import get_message_summary


class MailMover(Database):
'''
Move mail files matching a given notmuch query into a target maildir folder.
Move mail files matching a given notmuch query to a target maildir folder.
'''


def __init__(self, max_age=0, rename = False, dry_run=False, notmuch_args=''):
super().__init__()
super(MailMover, self).__init__()
self.db = notmuch.Database(self.db_path)
self.query = 'folder:"{folder}" AND {subquery}'
if max_age:
days = timedelta(int(max_age))
start = date.today() - days
now = datetime.now()
self.query += ' AND {start}..{now}'.format(start=start.strftime('%s'),
now=now.strftime('%s'))
self.query += ' AND {start}..{now}'.format(
start=start.strftime('%s'),
now=now.strftime('%s'))
self.dry_run = dry_run
self.rename = rename
self.notmuch_args = notmuch_args

""" return the new name """
def get_new_name(self, fname, destination):
basename = os.path.basename(fname)
submaildir = os.path.split(os.path.split(fname)[0])[1]
Expand All @@ -47,13 +51,13 @@ def get_new_name(self, fname, destination):
basename = str(uuid.uuid1()) + flagpart
return os.path.join(destination, submaildir, basename)

""" Move mails in folder maildir according to the given rules. """
def move(self, maildir, rules):
'''
Move mails in folder maildir according to the given rules.
'''
# identify and move messages
logging.info("checking mails in '{}'".format(maildir))
to_delete_fnames = []
moved = False
for query in rules.keys():

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you assure that the mailbox dirs do exist. I would like to only change the config, without having to additionally create a new directory manually:

    for ncs in 'tmp new cur'.split(): 
        destination = os.path.join(self.db_path,rules[query],ncs)
        os.makedirs(destination, mode=0o700, exist_ok=True)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On that point I am not sure that behavior should be default,
to my pov, the mailmover should move from existing mailbox to an other (existing).

I am not sure, this case may append often, but if there is a real use case behind this,
we could easily add this check, feel free to add more details @rpuntaie maybe I am missing something.

destination = '{}/{}/'.format(self.db_path, rules[query])
Expand All @@ -63,44 +67,78 @@ def move(self, maildir, rules):
messages = notmuch.Query(self.db, main_query).search_messages()
for message in messages:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add below per-message code into a self-contained staticmethod? This would greatly improve readability and testability.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really sure to fully understand, but if you talk about export the whole per-message process I agree, I think the whole class need some refactor, for example the move() method is also searching the messages which is problematic to add tests and keep simple.

So I have project to refactor the class to be more simple, and support some unit tests, but as the mail mover is a big feature I think it would be safer to do that after.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, code in the loop body should go inside it's own staticmethod, for better testability.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes I agree the thing is I feel it would be simpler to use the function move() to do the actual moving operation, and create an other function like getMessages() to do the notmuch query and pass the message list to the move(), that way it could be easy tested by using mock data.

# a single message (identified by Message-ID) can be in several
# places; only touch the one(s) that exists in this maildir
# places; only touch the one(s) that exists in this maildir
all_message_fnames = message.get_filenames()
to_move_fnames = [name for name in all_message_fnames
if maildir in name]
to_move_fnames = [
name for name in all_message_fnames
if maildir in name]
if not to_move_fnames:
continue
moved = True
self.__log_move_action(message, maildir, rules[query],
self.dry_run)
for fname in to_move_fnames:
if self.dry_run:
continue
try:
shutil.copy2(fname, self.get_new_name(fname, destination))
to_delete_fnames.append(fname)
except shutil.SameFileError:
logging.warn("trying to move '{}' onto itself".format(fname))
continue
except shutil.Error as e:
# this is ugly, but shutil does not provide more
# finely individuated errors
if str(e).endswith("already exists"):
continue
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's with this line? Do we already check somewhere else if source and destination are the same, which is why we skip that check?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed that because this will be logged by shutil log and I think if you try to copy over the same thing it should be an error from shutil and/or your FS but I will add a test with a similar message for that case.

else:
raise

# remove mail from source locations only after all copies are finished
for fname in set(to_delete_fnames):
os.remove(fname)

# update notmuch database
if not self.dry_run:
if moved:
logging.info("updating database")
self.__update_db(maildir)
else:
moved = self.moveMail(fname, destination, True)
# Return the moved flag
return moved

""" Action of moving a mail and upgrade the DB """
def moveMail(self, mailFrom, mailTo, upgradeDatabase):
output = False
""" We need to check that we are not moving a file to itself """
if mailFrom == mailTo:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mailFrom == mailTo should not be an error. But mailTo is a dir, while mailFrom is a file. So: can this happen? If not, it should not be considered, either.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes sure I will rebase the branch and change that.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey,
after rebasing the branch, and re-reading I aggree we need to be sure that:

  • mailFrom is a file AND mailTo is a directory

If this is not I will raise an error.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you see those:

            try:
                " mailFrom must be a file "
                if not os.path.isfile(mailFrom):
                    logging.error("mailFrom not a file: {}".format(mailFrom))
                    raise SystemExit
                " mailTo must be a directory "
                if not os.path.isdir(mailTo):
                    logging.error("mailTo not a dir: {}".format(mailTo))
                    raise SystemExit

So those tests already exist (sorry took me a second reading to be sure),
so duplicating those is not a good idea, and moving them end in the same state.

So tell me @rpuntaie

Copy link

@rpuntaie rpuntaie Sep 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like the commits I referred to have gone. There is no mailFrom anywhere anymore. So this seems to be obsolete.

As commented further below: tests are absolutely needed, to avoid breaking established behavior. E.g. I've seen {}/{}/cur/ is now {}/{}, which is an interface change: The user would have to change its config.

logging.error("Can not move mail to itself: {} -> {}".format(
mailFrom,
mailTo))
raise SystemExit
else:
logging.info("Would update database")

try:
" mailFrom must be a file "
if not os.path.isfile(mailFrom):
logging.error("mailFrom not a file: {}".format(mailFrom))
raise SystemExit
" mailTo must be a directory "
if not os.path.isdir(mailTo):
logging.error("mailTo not a dir: {}".format(mailTo))
raise SystemExit
filename = os.path.basename(mailFrom)
logging.info("Copy original mail: {}".format(filename))
logging.info("{} -> {}".format(
os.path.dirname(mailFrom),
os.path.dirname(mailTo)))
# 1 COPY
shutil.copy2(mailFrom, mailTo)
mailToFile = os.path.join(mailTo, filename)
if upgradeDatabase:
self.db = notmuch.Database(self.db_path,
mode=notmuch.Database.MODE.READ_WRITE)
logging.info("Add new copy to the DB: {}".format(mailToFile))
notmuch.Database.add_message(self.db, mailToFile)
self.db = notmuch.Database(self.db_path)
logging.info("Delete original file: {}".format(mailFrom))
os.remove(mailFrom)
# 3 REMOVE ORIGINAL
if upgradeDatabase:
self.db = notmuch.Database(self.db_path,
mode=notmuch.Database.MODE.READ_WRITE)
logging.info("Cleanup the original mail in DB")
# 4 REMOVE ORIGINAL FROM DB
notmuch.Database.remove_message(self.db, mailFrom)
self.db = notmuch.Database(self.db_path)
output = True
except IOError as e:
logging.error("Error IO: {}".format(e))
raise SystemExit
except shutil.Error as e:
logging.error("shutil: {}".format(e))
raise SystemExit
except OSError as e:
logging.error("OS: {}".format(e))
raise SystemExit
except notmuch.NotmuchError as e:
logging.error("notmuch: {}".format(e.status))
raise SystemExit
return output

#
# private:
Expand All @@ -117,7 +155,6 @@ def __update_db(self, maildir):
"after syncing maildir '{}': {}".format(maildir, err))
raise SystemExit


def __log_move_action(self, message, source, destination, dry_run):
'''
Report which mails have been identified for moving.
Expand All @@ -132,4 +169,3 @@ def __log_move_action(self, message, source, destination, dry_run):
logging.log(level, " {}".format(get_message_summary(message).encode('utf8')))
logging.log(level, "from '{}' to '{}'".format(source, destination))
#logging.debug("rule: '{}' in [{}]".format(tag, message.get_tags()))