#!/usr/bin/php
<?
/***
	DatuX recipient adress verifier for SYN-3 transparant mail filter.

	This will connect the SMPT servers found in qmails smtproutes and verify if the receipient adress really exists. (results are cached)

	Mails to nonexisting adresses will be used to train dspam.

*/

require 'Net/SMTP.php';

DEFINE ("CACHEDIR", "/tmp/smtpcheck");
DEFINE ("CACHETIME", 600);
DEFINE ("DSPAM_SOURCE", "corpus"); 
//DEFINE ("DSPAM_SOURCE", "inoculation"); 

DEFINE ("BACKUP", "spamfilter@vrieling.nl");

function abort($txt)
{
	logError($txt);
	exit(111); //always return temporary error
}

function logVerbose($txt)
{
	syslog(LOG_MAIL|LOG_INFO,getmypid().": $txt");
};

function logError($txt)
{
	syslog(LOG_MAIL|LOG_ERR, getmypid().": $txt");
};

define("CHECK_OK", 0); 		//address exists accoding to server
define("CHECK_REJECT",1);	//adresss was rejected by server
define("CHECK_ERROR",2);    //some kind of other error occured (dont cache it)

//connects to smtp server to check the receipient is ok
function checkRcpt($host, $port, $from, $rcpt)
{
	logVerbose("Connecting to $host to check $rcpt from $from");
	
	/* Create a new Net_SMTP object. */
	if (! ($smtp = new Net_SMTP($host,$port))) 
	{
		logError("Unable to instantiate Net_SMTP object");
		return (CHECK_ERROR);
	}

	/* Connect to the SMTP server. */
	if (PEAR::isError($e = $smtp->connect(10))) 
	{
		logError($e->getMessage());
		return (CHECK_ERROR);
	}

	/* Send the 'MAIL FROM:' SMTP command. */
	if (PEAR::isError($smtp->mailFrom($from))) 
	{
		logError("Unable to set sender to <$from>");
		return (CHECK_ERROR);
	}

	/* Address the message to each of the recipients. */
	if (PEAR::isError($res = $smtp->rcptTo($rcpt))) 
	{
		logVerbose("Address REJECTED (" . $res->getMessage().")");
		return (CHECK_REJECT);
	}
	
	//dont send anything offcourse, just disconnect and assume the adress exists at this point.

	/* Disconnect from the SMTP server. */
	$smtp->disconnect();
	
	logVerbose("Address OK");
	
	return (CHECK_OK);
}



function cacheGet($id)
{
	$cachefile=CACHEDIR."/$id";
	$cachedata=array();
	if (file_exists($cachefile))
	{
		$cachedata=unserialize(file_get_contents($cachefile));

		//cache entry expired?
		if (time()-$cachedata["time"]> CACHETIME)
		{
			logVerbose("Cache: $id expired");
			$cachedata=array();
		}
		else
		{
			logVerbose("Cache: $id loaded");
		}
	}
	else
	{
		logVerbose("Cache: $id not cached yet");
	}
	return ($cachedata);
}

function cachePut($id, $cachedata)
{
	if (!file_exists(CACHEDIR))
		mkdir(CACHEDIR);
	
	$cachefile=CACHEDIR."/$id";

	$cachedata["time"]=time();
	file_put_contents($cachefile, serialize($cachedata));
	logVerbose("Cache: stored $id");
}


function cachedCheckRcpt($host, $port, $from, $rcpt)
{
	logVerbose("Checking host $host to $rcpt from $from");

	$cachedata=cacheGet($rcpt);
	
	//not yet checked or last time was error?
	if (!isset($cachedata["checkRcpt"]) || $cachedata["checkRcpt"]==CHECK_ERROR)
	{
		//check it
		$cachedata["checkRcpt"]=checkRcpt($host, $port, $from, $rcpt);
		cachePut($rcpt, $cachedata);
	}
	
	return ($cachedata["checkRcpt"]);	
}

function callDspam($args, $tmpfh)
{
	$cmd="/usr/bin/dspam $args";
	logVerbose("Executing: $cmd");
	$pipes=array();
	$proc=proc_open ( $cmd, 
		array (0=>$tmpfh),
		$pipes);
	if (!is_resource($proc))
	{
		logError("Error while executing");
	}
	return(proc_close($proc)); 
}




//checks data against all filters and returns descriptive string if a match is found.
$filters=array();
//return: 1=blacklist 0=no match -1=whitelist
function filterCheck($user, $data)
{
	global $filters;
	if (!$filters)
	{
		$filters=eval('return '.file_get_contents("/etc/amavis2dspam.filter").';');
	}

	//check whitelists:
	foreach ($filters as $filter)
	{
		if ($filter["mode"]=="allow")
		{
			if (strtolower($filter["username"])=="global" || strtolower($filter["username"])==strtolower($user))
			{
				if  (preg_match("/".$filter["regex"]."/im",$data,$matches))
				{
					logVerbose("FILTER: Whitelisted by user ".$filter["username"].", regex '".$filter["regex"]."' on data: ".$matches[0]);	
					return -1;
				} 
			}
		}
	}
	//check blacklists
	foreach ($filters as $filter)
	{
		if ($filter["mode"]=="deny")
		{
			if (strtolower($filter["username"])=="global" || strtolower($filter["username"])==strtolower($user))
			{
				if  (preg_match("/".$filter["regex"]."/im",$data,$matches))
				{
					logVerbose("FILTER: Blacklisted by user ".$filter["username"].", regex '".$filter["regex"]."' on data: ".$matches[0]);	
					return 1;
				}
			}
		}
	}
	return (0);
}

//determine senders and receviers
$sender=$argv[1];
$sender=strtolower($sender);
$sender=str_replace("'","_",$sender);
array_shift($argv);
array_shift($argv);
$rcpts=$argv;

//read qmail smtp routes
$smtproutes=array();
$smtproutelines=file("/var/qmail/control/smtproutes");
foreach ($smtproutelines as $smtprouteline)
{
	$fields=explode(":",$smtprouteline);
	$smtproutes[strtolower($fields[0])]["host"]=$fields[1];
	$smtproutes[strtolower($fields[0])]["port"]=$fields[2];
}
if (!$smtproutes)
	abort("error reading smtproutes");


$filtered=array();

//read mail from stdin, into temporary file
$tmpfh=tmpfile();
$firstblock="";
while (!feof(STDIN)) 
{
	$data=fread(STDIN, 65535);
	if (is_string($data))
	{
		if (!$firstblock)
			$firstblock=$data;
		if (fwrite($tmpfh, $data)!=strlen($data))
		{
			abort("error while writing mail to temporary file");
		}
	}
	else
	{
		abort("error while reading mail from input");
	}
}


$inoculatedglobal=false;
$inoculateduser=array();
$users_str="";
$rcpts_str="";
$backedup=false;

//traverse all the rcpts
foreach ($rcpts as $rcpt)
{
	//filter rcpt
	$rcpt=strtolower($rcpt);
	$rcpt=str_replace("'","_",$rcpt);
	list($bla,$user)=explode("@", $rcpt);

	logVerbose("Processing mail from $sender to $rcpt");

	//reset tmp file
	fseek($tmpfh,0);
	unset($pipes);

	$smtproute=$smtproutes[$user];
	if (!$smtproute)
	{
		logError("No smtp route found for $user!");
		if (!$inoculatedglobal)
		{
			logError("INOCULATING global filter.");
			$inoculatedglobal=(callDspam("--user global --deliver=summary --source=".DSPAM_SOURCE." --class=spam" , $tmpfh)==0);
		}
		else
		{
			logError("IGNORING mail.");
		}
	}
	//route exists
	else
	{
		//check filter for this user
		$filtered=filtercheck($user, $firstblock);

		//whitelisted?
		if ($filtered==-1)
		{
			logVerbose("INOCULATING as ham $rcpt");
			callDspam("--user '$user' --deliver=summary  --mail-from='$sender' --rcpt-to '$rcpt' --source=".DSPAM_SOURCE." --class=innocent" , $tmpfh);
			logVerbose("DELIVERING to $rcpt");
			$users_str.="'$user' ";
			$rcpts_str.="'$rcpt' ";
		}

		//is it blacklisted? does the final smtp server reject the mail?
		else if ($filtered==1 || cachedCheckRcpt($smtproute["host"], $smtproute["port"], $sender, $rcpt)==CHECK_REJECT)
		{
			if (!$inoculateduser[$user])
			{
				logVerbose("INOCULATING as spam $rcpt");
				if (callDspam("--user '$user' --deliver=summary  --mail-from='$sender' --rcpt-to '$rcpt' --source=".DSPAM_SOURCE." --class=spam" , $tmpfh)==0)
//				if (callDspam("--user '$user' --mail-from='$sender' --rcpt-to '$rcpt' --source=".DSPAM_SOURCE." --class=spam" , $tmpfh)==0)
				{
					$inoculateduser[$user]=true;
				}

			}
			else
			{
				logVerbose("IGNORING $rcpt ($user already inoculated)");
			}

			//backup 1 mail to special mailbox to not lose filtered mail:
			if (!$backedup)
			{
				logVerbose("Backing up filtered mail to ".BACKUP);
				fseek($tmpfh,0);
				unset($pipes);
				$pipes=array();
				$proc=proc_open ( "/var/qmail/bin/qmail-inject -f '$sender' ".BACKUP, 
					array (0=>$tmpfh),
					$pipes);
					if (!is_resource($proc))
					{
						logError("Error while executing");
					}
				proc_close($proc);
				$backedup=true;
			}
		}

		//normal mail
		else
		{
			logVerbose("DELIVERING to $rcpt");
			$users_str.="'$user' ";
			$rcpts_str.="'$rcpt' ";
		}
	}
}

//reset tmp file
fseek($tmpfh,0);
unset($pipes);

//something left to actually try to deliver?
if ($users_str)
{
	logVerbose("Calling dspam to deliver mails");
	exit(callDspam("--user $users_str --deliver=innocent --mail-from='$sender' --rcpt-to $rcpts_str", $tmpfh));
}
else
{
	logVerbose("(No mails left to deliver to dspam)");
	exit(0);
}


