Qui il codice

Da WikiSitech.
Vai alla navigazioneVai alla ricerca
#!/usr/bin/python -tt
#
# Copyright 2004-2006 Nathaniel W. Turner <nate@houseofnate.net>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

"""
USAGE
  cleanup-maildir [OPTION].. COMMAND FOLDERNAME..

DESCRIPTION
  Cleans up old messages in FOLDERNAME; the exact action taken
  depends on COMMAND.  (See next section.)
      Note that FOLDERNAME is a name such as 'Drafts', and the
  corresponding maildir path is determined using the values of
  maildir-root, folder-prefix, and folder-seperator.

COMMANDS
  archive - move old messages to subfolders based on message date
  trash   - move old message to trash folder
  delete  - permanently delete old messages

OPTIONS
  -h, --help
      Show this help.
  -q, --quiet
      Suppress normal output.
  -v, --verbose
      Output extra information for testing.
  -n, --trial-run
      Do not actually touch any files; just say what would be done.
  -a, --age=N
      Only touch messages older than N days.  Default is 14 days.
  -k, --keep-flagged-threads
      If any messages in a thread are flagged, do not touch them or
      any other messages in that thread.
      Note: the thread-detection mechanism is currently base purely on
      a message's subject.  The In-Reply-To header is not currently used.
  -r, --keep-read
      If any messages are flagged as READ, do not touch them.
  -t, --trash-folder=F
      Use F as trash folder when COMMAND is 'trash'.
      Default is 'Trash'.
  --archive-folder=F
      Use F as the base for constructing archive folders.  For example, if F is
      'Archive', messages from 2004 might be put in the folder 'Archive.2004'.
  -d, --archive-hierarchy-depth=N
      Specify number of subfolders in archive hierarchy; 1 is just
      the year, 2 is year/month (default), 3 is year/month/day.
  --maildir-root=F
      Specifies folder that contains mail folders.
      Default is "$HOME/Maildir".
  --folder-seperator=str
      Folder hierarchy seperator.  Default is '.'
  --folder-prefix=str
      Folder prefix.  Default is '.'

NOTES
  The following form is accepted for backwards compatibility, but is deprecated:
  cleanup-maildir --mode=COMMAND [OPTION].. FOLDERNAME..

EXAMPLES
  # Archive messages in 'Sent Items' folder over 30 days old
  cleanup-maildir --age=30 archive 'Sent Items'" 

  # Delete messages over 2 weeks old in 'Lists/debian-devel' folder,
  # except messages that are part of a thread containing a flagged message.
  cleanup-maildir --keep-flagged-threads trash 'Lists.debian-devel'
"""

__version__ = "0.3.0"
# $Id$
# $URL$

import mailbox
import os.path
import os
import rfc822
import string
import socket
import time
import logging
import sys
import getopt


def mkMaildir(path):
    """Make a Maildir structure rooted at 'path'"""
    os.mkdir(path, 0700)
    os.mkdir(os.path.join(path, 'tmp'), 0700)
    os.mkdir(os.path.join(path, 'new'), 0700)
    os.mkdir(os.path.join(path, 'cur'), 0700)


class MaildirWriter(object):

    """Deliver messages into a Maildir"""

    path = None
    counter = 0

    def __init__(self, path=None):
        """Create a MaildirWriter that manages the Maildir at 'path'

        Arguments:
        path -- if specified, used as the default Maildir for this object
        """
        if path != None:
            if not os.path.isdir(path):
                raise ValueError, 'Path does not exist: %s' % path
            self.path = path
        self.logger = logging.getLogger('MaildirWriter')

    def deliver(self, msg, path=None):
        """Deliver a message to a Maildir

        Arguments:
        msg -- a message object
        path -- the path of the Maildir; if None, uses default from __init__
        """
        if path != None:
            self.path = path
        if self.path == None or not os.path.isdir(self.path):
            raise ValueError, 'Path does not exist'
        tryCount = 1
        srcFile = msg.getFilePath();
        (dstName, tmpFile, newFile, dstFile) = (None, None, None, None)
        while 1:
            try:
                dstName = "%d.%d_%d.%s" % (int(time.time()), os.getpid(), 
                                           self.counter, socket.gethostname())
                tmpFile = os.path.join(os.path.join(self.path, "tmp"), dstName)
                newFile = os.path.join(os.path.join(self.path, "new"), dstName)
                self.logger.debug("deliver: attempt copy %s to %s" %
                              (srcFile, tmpFile))
                os.link(srcFile, tmpFile) # Copy into tmp
                self.logger.debug("deliver: attempt link to %s" % newFile)
                os.link(tmpFile, newFile) # Link into new
            except OSError, (n, s):
                self.logger.critical(
                        "deliver failed: %s (src=%s tmp=%s new=%s i=%d)" % 
                        (s, srcFile, tmpFile, newFile, tryCount))
                self.logger.info("sleeping")
                time.sleep(2)
                tryCount += 1
                self.counter += 1
                if tryCount > 10:
                    raise OSError("too many failed delivery attempts")
            else:
                break

        # Successful delivery; increment deliver counter
        self.counter += 1

        # For the rest of this method we are acting as an MUA, not an MDA.

        # Move message to cur and restore any flags
        dstFile = os.path.join(os.path.join(self.path, "cur"), dstName)
        if msg.getFlags() != None:
            dstFile += ':' + msg.getFlags()
        self.logger.debug("deliver: attempt link to %s" % dstFile)
        os.link(newFile, dstFile)
        os.unlink(newFile)

        # Cleanup tmp file
        os.unlink(tmpFile)


class MessageDateError(TypeError):
    """Indicate that the message date was invalid"""
    pass


class MaildirMessage(rfc822.Message):
    
    """An email message

    Has extra Maildir-specific attributes
    """

    def getFilePath(self):
        if sys.hexversion >= 0x020500F0:
            return self.fp._file.name
        else:
            return self.fp.name

    def isFlagged(self):
        """return true if the message is flagged as important"""
        import re
        fname = self.getFilePath()
        if re.search(r':.*F', fname) != None:
            return True
        return False

    def getFlags(self):
        """return the flag part of the message's filename"""
        parts = self.getFilePath().split(':')
        if len(parts) == 2:
            return parts[1]
        return None

    def isNew(self):
        """return true if the message is marked as unread"""
        # XXX should really be called isUnread
        import re
        fname = self.getFilePath()
        if re.search(r':.*S', fname) != None:
            return False
        return True

    def getSubject(self):
        """get the message's subject as a unicode string"""

        import email.Header
        s = self.getheader("Subject")
        try:
            return u"".join(map(lambda x: x[0].decode(x[1] or 'ASCII', 'replace'),
                                email.Header.decode_header(s)))
        except(LookupError):
            return s

    def getSubjectHash(self):
        """get the message's subject in a "normalized" form

        This currently means lowercasing and removing any reply or forward
        indicators.
        """
        import re
        import string
        s = self.getSubject()
        if s == None:
            return '(no subject)'
        return re.sub(r'^(re|fwd?):\s*', '', string.strip(s.lower()))

    def getDateSent(self):
        """Get the time of sending from the Date header

        Returns a time object using time.mktime.  Not very reliable, because
        the Date header can be missing or spoofed (and often is, by spammers).
        Throws a MessageDateError if the Date header is missing or invalid.
        """
        dh = self.getheader('Date')
        if dh == None: 
            return None
        try:
            return time.mktime(rfc822.parsedate(dh))
        except ValueError:
            raise MessageDateError("message has missing or bad Date")
        except TypeError:  # gets thrown by mktime if parsedate returns None
            raise MessageDateError("message has missing or bad Date")
        except OverflowError:
            raise MessageDateError("message has missing or bad Date")

    def getDateRecd(self):
        """Get the time the message was received"""
        # XXX check that stat returns time in UTC, fix if not
        return os.stat(self.getFilePath())[8]

    def getDateSentOrRecd(self):
        """Get the time the message was sent, fall back on time received"""
        try:
            d = self.getDateSent()
            if d != None:
                return d
        except MessageDateError:
            pass
        return self.getDateRecd()

    def getAge(self):
        """Get the number of seconds since the message was received"""
        msgTime = self.getDateRecd()
        msgAge = time.mktime(time.gmtime()) - msgTime
        return msgAge / (60*60*24)


class MaildirCleaner(object):

    """Clean a maildir by deleting or moving old messages"""

    __trashWriter = None
    __mdWriter = None
    stats = {'total': 0, 'delete': 0, 'trash': 0, 'archive': 0}
    keepSubjects = {}
    archiveFolder = None
    archiveHierDepth = 2
    folderBase = None
    folderPrefix = "."
    folderSeperator = "."
    keepFlaggedThreads = False
    trashFolder = "Trash"
    isTrialRun = False
    keepRead = False

    def __init__(self, folderBase=None):
        """Initialize the MaildirCleaner

        Arguments:
        folderBase -- the directory in which the folders are found
        """
        self.folderBase = folderBase
        self.__mdWriter = MaildirWriter()
        self.logger = logging.getLogger('MaildirCleaner')
        self.logger.setLevel(logging.DEBUG)

    def __getTrashWriter(self):
        if not self.__trashWriter:
            path = os.path.join(self.folderBase, self.folderPrefix + self.trashFolder)
            self.__trashWriter = MaildirWriter(path)
        return self.__trashWriter

    trashWriter = property(__getTrashWriter)

    def scanSubjects(self, folderName):
        """Scans for flagged subjects"""
        self.logger.info("Scanning for flagged subjects...")
        if (folderName == 'INBOX'):
            path = self.folderBase
        else:
            path = os.path.join(self.folderBase, self.folderPrefix + folderName)
        maildir = mailbox.Maildir(path, MaildirMessage)
        self.keepSubjects = {}
        for i, msg in enumerate(maildir):
            if msg.isFlagged():
                self.keepSubjects[msg.getSubjectHash()] = 1
                self.logger.debug("Flagged (%d): %s", i, msg.getSubjectHash())
        self.logger.info("Done scanning.")


    def clean(self, mode, folderName, minAge):

        """Trashes or archives messages older than minAge days

        Arguments:
        mode -- the cleaning mode.  Valid modes are:
            trash -- moves the messages to a trash folder
            archive -- moves the messages to folders based on their date
            delete -- deletes the messages
        folderName -- the name of the folder on which to operate
            This is a name like "Stuff", not a filename
        minAge -- messages younger than minAge days are left alone
        """

        if not mode in ('trash', 'archive', 'delete'):
            raise ValueError

        if (self.keepFlaggedThreads):
            self.scanSubjects(folderName)

        archiveFolder = self.archiveFolder
        if (archiveFolder == None):
            if (folderName == 'INBOX'):
                archiveFolder = ""
            else:
                archiveFolder = folderName

        if (folderName == 'INBOX'):
            path = self.folderBase
        else:
            path = os.path.join(self.folderBase, self.folderPrefix + folderName)

        maildir = mailbox.Maildir(path, MaildirMessage)

        fakeMsg = ""
        if self.isTrialRun: 
            fakeMsg = "(Not really) "

        # Move old messages
        for i, msg in enumerate(maildir):
            if self.keepFlaggedThreads == True \
                    and msg.getSubjectHash() in self.keepSubjects:
                self.log(logging.DEBUG, "Keeping #%d (topic flagged)" % i, msg)
            else:
                if (msg.getAge() >= minAge) and ((not self.keepRead) or (self.keepRead and msg.isNew())):
                    if mode == 'trash':
                        self.log(logging.INFO, "%sTrashing #%d (old)" %
                                 (fakeMsg, i), msg)
                        if not self.isTrialRun:
                            self.trashWriter.deliver(msg)
                            os.unlink(msg.getFilePath())
                    elif mode == 'delete':
                        self.log(logging.INFO, "%sDeleting #%d (old)" % 
                                 (fakeMsg, i), msg)
                        if not self.isTrialRun:
                            os.unlink(msg.getFilePath())
                    else: # mode == 'archive'
                        # Determine subfolder path
                        mdate = time.gmtime(msg.getDateSentOrRecd())
                        datePart = str(mdate[0])
                        if self.archiveHierDepth > 1:
                            datePart += self.folderSeperator \
                                        + time.strftime("%m-%b", mdate)
                        if self.archiveHierDepth > 2:
                            datePart += self.folderSeperator \
                                        + time.strftime("%d-%a", mdate)
                        subFolder = archiveFolder + self.folderSeperator \
                                    + datePart
                        sfPath = os.path.join(self.folderBase, 
                                              self.folderPrefix + subFolder)
                        self.log(logging.INFO, "%sArchiving #%d to %s" %
                                 (fakeMsg, i, subFolder), msg)
                        if not self.isTrialRun:
                            # Create the subfolder if needed
                            if not os.path.exists(sfPath):
                                mkMaildir(sfPath)
                            # Deliver
                            self.__mdWriter.deliver(msg, sfPath)
                            os.unlink(msg.getFilePath())
                    self.stats[mode] += 1
                else:
                    self.log(logging.DEBUG, "Keeping #%d (fresh)" % i, msg)
            self.stats['total'] += 1

    def log(self, lvl, text, msgObj):
        """Log some text with the subject of a message"""
        subj = msgObj.getSubject()
        if subj == None:
            subj = "(no subject)"
        self.logger.log(lvl, text + ": " + subj)


# Defaults
minAge = 14
mode = None

logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
logging.disable(logging.INFO - 1)
logger = logging.getLogger('cleanup-maildir')
cleaner = MaildirCleaner()

# Read command-line arguments
try:
    opts, args = getopt.getopt(sys.argv[1:], 
            "hqvnrm:t:a:kd:",
            ["help", "quiet", "verbose", "version", "mode=", "trash-folder=",
             "age=", "keep-flagged-threads", "keep-read", "folder-seperator=",
             "folder-prefix=", "maildir-root=", "archive-folder=",
             "archive-hierarchy-depth=", "trial-run"])
except getopt.GetoptError, (msg, opt):
    logger.error("%s\n\n%s" % (msg, __doc__))
    sys.exit(2)
output = None
for o, a in opts:
    if o in ("-h", "--help"):
        print __doc__
        sys.exit()
    if o in ("-q", "--quiet"): 
        logging.disable(logging.WARNING - 1)
    if o in ("-v", "--verbose"): 
        logging.disable(logging.DEBUG - 1)
    if o == "--version":
        print __version__
        sys.exit()
    if o in ("-n", "--trial-run"):
        cleaner.isTrialRun = True
    if o in ("-m", "--mode"): 
        logger.warning("the --mode flag is deprecated (see --help)")
        if a in ('trash', 'archive', 'delete'):
            mode = a
        else:
            logger.error("%s is not a valid command" % a)
            sys.exit(2)
    if o in ("-t", "--trash-folder"): 
        cleaner.trashFolder = a
    if o == "--archive-folder":
        cleaner.archiveFolder = a
    if o in ("-a", "--age"): 
        minAge = int(a)
    if o in ("-k", "--keep-flagged-threads"): 
        cleaner.keepFlaggedThreads = True
    if o in ("-r", "--keep-read"):
        cleaner.keepRead = True
    if o == "--folder-seperator":
        cleaner.folderSeperator = a
    if o == "--folder-prefix":
        cleaner.folderPrefix = a
    if o == "--maildir-root":
        cleaner.folderBase = a
    if o in ("-d", "--archive-hierarchy-depth"): 
        archiveHierDepth = int(a)
        if archiveHierDepth < 1 or archiveHierDepth > 3:
            sys.stderr.write("Error: archive hierarchy depth must be 1, " +
                             "2, or 3.\n")
            sys.exit(2)
        cleaner.archiveHierDepth = archiveHierDepth

if not cleaner.folderBase:
    cleaner.folderBase = os.path.join(os.environ["HOME"], "Maildir")
if mode == None:
    if len(args) < 1:
        logger.error("No command specified")
        sys.stderr.write(__doc__)
        sys.exit(2)
    mode = args.pop(0)
    if not mode in ('trash', 'archive', 'delete'):
        logger.error("%s is not a valid command" % mode)
        sys.exit(2)

if len(args) == 0:
    logger.error("No folder(s) specified")
    sys.stderr.write(__doc__)
    sys.exit(2)

logger.debug("Mode is " + mode)

# Clean each folder
for dir in args:
    logger.debug("Cleaning up %s..." % dir)
    cleaner.clean(mode, dir, minAge)

logger.info('Total messages:     %5d' % cleaner.stats['total'])
logger.info('Affected messages:  %5d' % cleaner.stats[mode])
logger.info('Untouched messages: %5d' %
            (cleaner.stats['total'] - cleaner.stats[mode]))