#!/usr/bin/perl -w
#
# The following is an example application on using the Poppy.pm module
# in perl scripts.  It also has some suggested tests for dtecting
# spam.  Most of the tests are just checks for spam headers that
# I've recently recieved.
#
# Also, this script makes a big assumption that any message with lots
# of non ISO 8859 text in it is spam.  This is probably only true
# for people that only speak English...  The test really is targetting
# non-western languages though since thats were a high percentage of
# spam comes from.
# 

use Poppy;

$poppy = Poppy->connect(user_name => "put_name_here",
                        user_pass => "put_passwd",
			server_name => "mail.yourdomain.com",
			server_proto => "pop3",
			debug => 0);

die "Couldn't open server" unless ($poppy);

($totalmsgs, $lastmsg, $totalbytes) = $poppy->stats();
$num_new_msgs = $totalmsgs - $lastmsg;

die "No messages to process" if ($totalmsgs == 0);

print "Server has $totalmsgs messages and $num_new_msgs new messages\n";

my $total_deleted = 0;
for  ($msg_num = 1; $msg_num <= $totalmsgs; $msg_num++) {
  my $is_spam = 0;
  my $possible_spam = 0;

  print "Scanning header #$msg_num...\n";

  $poppy->header($msg_num);
  ($to) = $poppy->to();
  ($from) = $poppy->from();
  ($replyto) = $poppy->reply_to();
  ($date) = $poppy->date();
  ($subject) = $poppy->subject();
  ($msg_id) = $poppy->message_id();
  ($x_mailer) = $poppy->get_header("X-Mailer");

  if ($x_mailer) {
      print "Mass Mailer\n" if ($x_mailer =~ /(Aristotle Mail|WorldMerge|Extractor Pro|Floodgate Pro|Emailer Platinum.*Internet Marketing)/);

      $is_spam = 1 if ($x_mailer =~ /StormPost/);
      $is_spam = 1 if ($x_mailer =~ /UnityMail/);
      $is_spam = 1 if ($x_mailer =~ /CurrentMailer/);
  }

  $is_spam = 1 if (subject_lots_of_8bits());
  $is_spam = 1 if (subject_is_mime());
  $is_spam = 1 if (missing_manditory_fields());
  # Questionable check
#  $is_spam = 1 if (msg_id_invalid());
  # Questionable check
#  $is_spam = 1 if (header_field_with_no_space());
  $is_spam = 1 if (forged_received());

  if ($from) {
      $is_spam = 1 if ($from =~ /<.*bb02\.net>/);
      $is_spam = 1 if ($from =~ /<.*N0O1\.com>/);
      $is_spam = 1 if ($from =~ /<.*mailboxed\.com>/);
      $is_spam = 1 if ($from =~ /<.*planetinternet\.be>/);
      $is_spam = 1 if ($from =~ /<.*play4keeps\.com>/);
      $is_spam = 1 if ($from =~ /<.*verticalresponse\.com>/);
      $is_spam = 1 if ($from =~ /<.*currentmail\.com>/);
      $is_spam = 1 if ($from =~ /<.*offersondemand.com>/);
      $is_spam = 1 if ($from =~ /<.*virtual0.net>/);
      $is_spam = 1 if ($from =~ /<.*easywinning.com>/);
      $is_spam = 1 if ($from =~ /<.*emza.net>/);
      $is_spam = 1 if ($from =~ /<.*PriorityHandling.com>/);
      $is_spam = 1 if ($from =~ /<.*FirstClass-Delivery.com>/);
      $is_spam = 1 if ($from =~ /<leech120\@yahoo.co.kr>/);
      $is_spam = 1 if ($from =~ /<.*exitrequest.com>/);
      $is_spam = 1 if ($from =~ /<.*lists.zoanmail.com>/);
      $is_spam = 1 if ($from =~ /<.*your-daily-horoscope.com>/);
      $is_spam = 1 if ($from =~ /<.*permission-mail.com>/);
      $is_spam = 1 if ($from =~ /<.*virtual0.net>/);
      $is_spam = 1 if ($from =~ /<.*ivaluenetwork.com>/);
      $is_spam = 1 if ($from =~ /<.*winxpnews.com>/);
      $is_spam = 1 if ($from =~ /<.*tremendousbuys.com>/);
      $is_spam = 1 if ($from =~ /<.*e54.org>/);
      $is_spam = 1 if ($from =~ /<.*bonanzaoffers.com>/);
      $is_spam = 1 if ($from =~ /<.*yarpit.com>/);
      $is_spam = 1 if ($from =~ /<.*actionemail.com>/);
      $is_spam = 1 if ($from =~ /<.*flashsavings.com>/);
      $is_spam = 1 if ($from =~ /<.*postalmanager.com>/);
      $is_spam = 1 if ($from =~ /<.*cool-values.com>/);
      $is_spam = 1 if ($from =~ /<.*MailerDns.com>/);
      $is_spam = 1 if ($from =~ /<.*emailcourrier.com>/);
      $is_spam = 1 if ($from =~ /<.*smarteryou.com>/);
      $is_spam = 1 if ($from =~ /<.*servitall.com>/);
      $is_spam = 1 if ($from =~ /<.*thedealdoctor.com>/);
      $is_spam = 1 if ($from =~ /<.*speedydeals.com>/);
      $is_spam = 1 if ($from =~ /<.*blarpit.com>/);
      $is_spam = 1 if ($from =~ /<.*mg0.net>/);
      $is_spam = 1 if ($from =~ /<.*rhinorewards.net>/);
      $is_spam = 1 if ($from =~ /<.*RoyalSavings.com>/);
      $is_spam = 1 if ($from =~ /<.*valuegazette.com>/);
      $is_spam = 1 if ($from =~ /<.*thedealdoctor.com>/);
      $is_spam = 1 if ($from =~ /<.*sbase30.com>/);
      $is_spam = 1 if ($from =~ /<.*clickboost.com>/);
      $is_spam = 1 if ($from =~ /<.*yourmailsource.com>/);
      $is_spam = 1 if ($from =~ /<.*wickedfastmail.com>/);
      $is_spam = 1 if ($from =~ /<.*more-money-for-you.com>/);
      $is_spam = 1 if ($from =~ /<.*beyondspecials.com>/);
      $is_spam = 1 if ($from =~ /<.*milespree.com>/);
      $is_spam = 1 if ($from =~ /<.*ientrymail.com>/);
      $is_spam = 1 if ($from =~ /<.*hotdeals247.com>/);
      $is_spam = 1 if ($from =~ /<.*dealsalert.com>/);
      $is_spam = 1 if ($from =~ /<.*Thoruoughmail.com>/);
      $is_spam = 1 if ($from =~ /<.*TopDispatch.com>/);
      $is_spam = 1 if ($from =~ /<.*great-dealz-for-you.com>/);
      $is_spam = 1 if ($from =~ /<.*ClickBoost.com>/);
      $is_spam = 1 if ($from =~ /<.*investorsinsight.com>/);
      $is_spam = 1 if ($from =~ /<.*admanmail.com>/);
      $is_spam = 1 if ($from =~ /<.*resourcepage.com>/);
      $is_spam = 1 if ($from =~ /<.*StyleDelivers.com>/);


  }

  if ($replyto) {
      $is_spam = 1 if ($replyto =~ /<.*bluerockdove.com>/);
  }

  if ($subject) {
      $is_spam = 1 if ($subject =~ /^Adv:/i);
      $is_spam = 1 if ($subject =~ /CELLPADDING/i);
      $is_spam = 1 if ($subject =~ /cellspacing/i);
      $is_spam = 1 if ($subject =~ /\[Adv\]/i);
      $is_spam = 1 if ($subject =~ /W32.Elkern/);
      $is_spam = 1 if ($subject =~ /\sHGH\s/);
      # Subject ends with lots of space plus a unique #
      $is_spam = 1 if ($subject =~ /\s\s\s+\d+\s*$/);
      $is_spam = 1 if ($subject =~ /a\s+very\s+nice\s+game/i);
      $is_spam = 1 if ($subject =~ /a\s+funny\s+website/i);
      $is_spam = 1 if ($subject =~ /A\s+special\s+humour\s+game/i);
      $is_spam = 1 if ($subject =~ /Japanese girl VS playboy/i);
  }

  if ($to) {
      $is_spam = 1 if ($to =~ /Friend\@public\.com/);
  }

  if ($msg_id) {
      $is_spam = 1 if ($msg_id =~ /dailydeals4you.com>/);
      $is_spam = 1 if ($msg_id =~ /firstratedeals.com>/);
      $is_spam = 1 if ($msg_id =~ /thenetdeals.com>/);
      $is_spam = 1 if ($msg_id =~ /outerspacedeals.com>/);
      $is_spam = 1 if ($msg_id =~ /milio.com>/);
      $is_spam = 1 if ($msg_id =~ /financepages\.com>/);
      $is_spam = 1 if ($msg_id =~ /thenetdeals\.com>/i);
      $is_spam = 1 if ($msg_id =~ /yarpit.com>/);
      $is_spam = 1 if ($msg_id =~ /dirpit.com>/);
      $is_spam = 1 if ($msg_id =~ /lirpit.com>/);
      $is_spam = 1 if ($msg_id =~ /insuranceiq.com/);
      $is_spam = 1 if ($msg_id =~ /pxlg.com/);

  }

  if ($is_spam || $possible_spam) {
      print "To: $to\n" if ($to);
      print "From: $from\n" if ($from);
      print "Subject: $subject\n" if ($subject);
      print "Date: $date\n" if ($date);
      print "Message-ID: $msg_id\n" if ($msg_id);
  }
  if ($is_spam) {
     $total_deleted++;
     $poppy->delete($msg_num);
  }
}

$poppy->disconnect();

print "Deleted $total_deleted messages\n";

sub subject_lots_of_8bits {
  my ($subject) = $poppy->subject();

  return 0 unless ($subject);

  my @with_high_bit = ($subject =~ /[\200-\377]/g);
  my $num_high_bits = $#with_high_bit+1;
  my $num_low_bits = length($subject) - $num_high_bits;

  # If there are more 8-bit chars set then not, then it more
  # then likely means that the sender is not using an ISO 8859
  # encoding.  Also, be a little less strict on 1 word
  # strings since there is the rare case they picked a word
  # that has a high count.
  ($num_low_bits <= $num_high_bits && $num_high_bits > 3);
}

sub subject_is_mime {
  my ($subject) = $poppy->subject();

  return 0 unless ($subject);

  my $found = ($subject =~ /=\?.*\?=/);
  return $found;
}

sub missing_manditory_fields {
  my $is_spam = 0;
  my $has_date = 0;
  my $has_source = 0;
  my $has_destination = 0;

  $has_date = 1 if ($poppy->get_header("Date"));
  $has_source = 1 if ($poppy->get_header("Return-path") ||
                      $poppy->get_header("Received") ||
		      $poppy->get_header("Reply-To") ||
		      $poppy->from() ||
		      $poppy->get_header("Sender"));
  $has_destination = 1 if ($poppy->to() ||
                           $poppy->get_header("Resent-To") ||
			   $poppy->cc() ||
			   $poppy->get_header("cc") ||
			   $poppy->get_header("Resent-cc") ||
			   $poppy->bcc() ||
			   $poppy->get_header("bcc") ||
			   $poppy->get_header("Resent-bcc"));

  $is_spam = 1 unless($has_date && $has_source && $has_destination);

  return $is_spam;
}

sub msg_id_invalid {
    my $valid = 1;

    ($_) = $poppy->get_header("Message-ID");

    return 0 unless ($_);

    $valid = 0 if (/^<.*@.*>$/);
    return $valid
}

sub forged_received {
   my @msg_header = $poppy->header();
   my $found_user_header = 0;
   my $is_forged = 0;

   foreach $line (@msg_header) {
       $is_forged = 1 if ($found_user_header && $line =~ /^Received:\s/);
       $found_user_header = 1 if ($line =~ /^From:\s/ ||
                                  $line =~ /^To:\s/ ||
                        	  $line =~ /^Subject:\s/ ||
                                  $line =~ /^Date:\s/);

   }
   return $is_forged;
}

sub header_field_with_no_space {
   my @msg_header = $poppy->header();
   my $is_invalid = 0;

   foreach $line (@msg_header) {
       ($after) = ($line =~ /^.+?:(.*)/);
       $is_invalid = 1 if ($after =~ /^\S/ && $after ne "\r\n" && $after ne "\n");
   }
   return $is_invalid;
}
