#! /bin/bash
# Usage: sa-education-false-positives
#
# Purpose: inject into 'sa-learn --ham' the mail :
#     * arrived in the last $THRESHOLD_DAYS days
#     * located in the $NONSPAM_DIR Maildir folders
#
# Background: this is only useful if your mail users move the false
# positives (ham recognized as spam by spamassassin) to the dedicated
# Maildir folder. Also, this script will re-inject the email already
# detected by SA into sa-learn, who will ignore them.
#
# Warning: the Maildir tree is supposed to be organized like this :
#     $MAIL_ROOT_DIR
#        / domain1.tld
#            / mailbox1 / $MAILDIR_ROOT
#            / mailbox2 / $MAILDIR_ROOT
#        / domain2.tld
#            / mailbox3 / $MAILDIR_ROOT
# If it's not the case, you have to adjust the 'find' command below.


#
##
### Configuration variables

MAIL_ROOT_DIR='/var/mail'
MAILDIR_ROOT='Maildir'
NONSPAM_DIR='.NonSpam'
THRESHOLD_DAYS=1

#
##
### Main
### We copy the interesting emails to a temporary directory, since it's more
# effective to run sa-learn once on a directory than once per email.

# Create a temporary directory, exit if it fails.
TMP_DIR=`mktemp -d -t sa-education-false-positives.XXXXXX` || exit 1

# Copy the interesting emails to the temporary directory.
find $MAIL_ROOT_DIR/*/*/$MAILDIR_ROOT/$NONSPAM_DIR/cur/ \
    -type f \
    -mtime -$THRESHOLD_DAYS \
    -exec cp {} $TMP_DIR \;

# Inject these emails into sa-learn.
sa-learn --ham $TMP_DIR

# Clean.
rm -rf $TMP_DIR
