sieve-spamasssassin   [plain text]


From bob@nas.com Thu May  9 18:00:16 2002
Date: Thu, 25 Apr 2002 11:01:11 -0700
From: Bob Finch <bob@nas.com>
To: info-cyrus@lists.andrew.cmu.edu
Subject: "spam" sieve extension

I've gotten a handful of requests for the "spam" sieve extension I
mentioned yesterday.  The diffs aren't too big, so I'll post them to
the list.

The diffs add a test to sieve that passes the message to SpamAssassin.
If SpamAssassin scores the messages as spam, the test fails, otherwise
the test succeeds.  Here's a simple sieve script using the spam
extension:

  require [ "spam", "fileinto" ];
  if spam {
      fileinto "spamfolder";
  }

The diffs are based on cyrus-imapd-2.1.3 -- I haven't tried them with
other versions.  You'll also need to install SpamAssassin (see
http://www.spamassassin.org/) and have spamd running.  I'm currently
running SpamAssassin 2.11.  I've been running it for about a month on
several mailboxes that typically get about 500 messages a day.

The extension adds three new imapd.conf parameters:

  max_size: 256000
      The spam test will always return false for messages larger than
      max_size.  Messages larger than max_size will not be passed to
      spamd.

  spam_spamd_host: 127.0.0.1
      The IP addresses or hostname of the spamd server.

  spam_spamd_port: 783
      The port number of the spamd server.

When sieve evaluates the spam test, it calls a callback in
imap/lmptd.c that opens a connection to spamd.  It then sends a CHECK
command writes the message to spamd.  It parses the response from
spamd and returns the result of the test to sieve.

Things on my to-do list:

  * Add a configure option and ifdefs to conditionally include the
    spam extension

  * Provide a way for users to get more information about how
    SpamAssassin scores messages.  This will require help from spamd,
    since it currently just returns the score and threshold.

  * Documentation

-- Bob


diff -cr cyrus-imapd-2.1.3-orig/imap/lmtpd.c cyrus-imapd-2.1.3/imap/lmtpd.c
*** cyrus-imapd-2.1.3-orig/imap/lmtpd.c	Thu Mar  7 09:55:28 2002
--- cyrus-imapd-2.1.3/imap/lmtpd.c	Thu Apr 18 21:14:10 2002
***************
*** 111,116 ****
--- 111,124 ----
  
      char *authuser;		/* user who submitted message */
      struct auth_state *authstate;
+ 
+     /* spam stuff */
+ 
+     /* This is in script_data, but the spam callback can't get to it */
+     /* so we put a copy here */
+     char *username;		/* Username of mailbox */
+     int spam_result_valid;	/* != 0 iff spam result is valid */
+     int spam_result;		/* != iff message is spam */
  } mydata_t;
  
  /* data per script */
***************
*** 910,915 ****
--- 918,1087 ----
  static char *markflags[] = { "\\flagged" };
  static sieve_imapflags_t mark = { markflags, 1 };
  
+ /* spam support */
+ 
+ static int
+ getline (int s, char *buf, int len)
+ {
+     char *bp = buf;
+     int ret = 1;
+     char ch;
+ 
+     while ((ret = read (s, &ch, 1)) == 1 && ch != '\n') {
+ 	if (len > 0) {
+ 	    *bp++ = ch;
+ 	    len--;
+ 	}
+     }
+     if (len > 0)
+ 	*bp = '\0';
+     return (buf != bp);
+ }
+ 
+ 
+ static int
+ full_write (int s, char *buf, int len)
+ {
+     int total;
+     int ret;
+ 
+     for (total = 0; total < len; total += ret) {
+ 	ret = write (s, buf + total, len - total);
+ 	if (ret < 0)
+ 	    return 0;
+     }
+     return total == len;
+ }
+ 
+ 
+ static int
+ read_response (int s, int *result)
+ {
+     char is_spam[6];
+     char buf[1024];
+     int major;
+     int minor;
+     int response;
+     int score;
+     int threshold;
+ 
+     if (! getline (s, buf, sizeof (buf))) {
+ 	syslog (LOG_ERR, "read_response: response getline failed");
+ 	return SIEVE_FAIL;
+     }
+     if (sscanf (buf, "SPAMD/%d.%d %d %*s", &major, &minor, &response) != 3) {
+ 	syslog (LOG_ERR, "read_response: response sscanf failed, buf: %s",
+ 		buf);
+ 	return SIEVE_FAIL;
+     }
+     if (major < 1 || (major == 1 && minor < 1)) {
+ 	syslog (LOG_ERR, "read_response: bad spamd version: %d.%d",
+ 		major, minor);
+ 	return SIEVE_FAIL;
+     }
+     if (! getline (s, buf, sizeof (buf))) {
+ 	syslog (LOG_ERR, "read_response: header getline failed");
+ 	return SIEVE_FAIL;
+     }
+     if (sscanf (buf, "Spam: %5s ; %d / %d", is_spam, &score, &threshold) != 3) {
+ 	syslog (LOG_ERR, "read_response: header sscanf failed, buf: %s",
+ 		buf);
+ 	return SIEVE_FAIL;
+     }
+ 
+     *result = ! strcmp(is_spam, "True");
+     return SIEVE_OK;
+ }
+     
+ 
+ int spam (void *mc, int *is_spam)
+ {
+     mydata_t *d = (mydata_t *) mc;
+     message_data_t *m = d->m;
+     int s;
+     struct sockaddr_in addr;
+     struct hostent *host;
+     char header[128];
+     int max_size = config_getint ("spam_max_size", 250 * 1024);
+     const char *hostname = config_getstring ("spam_spamd_host", "127.0.0.1");
+     int port = config_getint ("spam_spamd_port", 783);
+     char *msg_buf;
+     int ret;
+ 
+     /* Assume message isn't spam if it is larger than max_size */
+     if (m->size > max_size) {
+ 	syslog (LOG_INFO, "spam: skipping message bigger than %d", max_size);
+ 	return SIEVE_FAIL;
+     }
+ 
+     memset (&addr, 0, sizeof(addr));
+     addr.sin_family = AF_INET;
+     addr.sin_port = htons(port);
+ 
+     if ((host = gethostbyname (hostname)) == NULL) {
+ 	syslog (LOG_ERR, "spam: gethostbyname failed");
+ 	return SIEVE_FAIL;
+     }
+     memcpy (&addr.sin_addr, host->h_addr, sizeof (addr.sin_addr));
+ 
+     if((s = socket (PF_INET, SOCK_STREAM, 0)) < 0) {
+ 	syslog (LOG_ERR, "spam: socket failed");
+ 	return SIEVE_FAIL;
+     }
+ 
+     if (connect (s, (const struct sockaddr *) &addr, sizeof (addr)) < 0) {
+ 	syslog (LOG_ERR, "spam: connect failed");
+ 	close (s);
+ 	return SIEVE_FAIL;
+     }
+ 
+     if ((msg_buf = malloc (m->size)) == NULL) {
+ 	syslog (LOG_ERR, "spam: malloc(%d) failed", m->size);
+ 	close (s);
+ 	return SIEVE_FAIL;
+     }
+     rewind (m->f);
+     if (fread (msg_buf, 1, m->size, m->f) != m->size || ferror (m->f)) {
+ 	syslog (LOG_ERR, "spam: read message failed");
+ 	free (msg_buf);
+ 	close (s);
+ 	return SIEVE_FAIL;
+     }
+ 
+     if (d->username) {
+ 	snprintf (header, sizeof (header),
+ 		  "CHECK SPAMC/1.2\r\nUser: %s\r\nContent-length: %d\r\n\r\n",
+ 		  d->username, m->size);
+     }
+     else {
+ 	snprintf (header, sizeof (header),
+ 		  "CHECK SPAMC/1.2\r\nContent-length: %d\r\n\r\n", m->size);
+     }
+     if (! full_write (s, header, strlen (header))) {
+ 	syslog (LOG_ERR, "spam: write header failed");
+ 	free (msg_buf);
+ 	close (s);
+ 	return SIEVE_FAIL;
+     }
+     if (! full_write (s, msg_buf, m->size)) {
+ 	syslog (LOG_ERR, "spam: write message failed");
+ 	free (msg_buf);
+ 	close (s);
+ 	return SIEVE_FAIL;
+     }
+ 
+     shutdown (s, SHUT_WR);
+     ret = read_response (s, is_spam);
+     shutdown (s, SHUT_RD);
+ 
+     free (msg_buf);
+     close (s);
+ 
+     syslog(LOG_DEBUG, "spam result: %d\n", ret);
+     return ret;
+ }
+  
+ 
  int sieve_parse_error_handler(int lineno, const char *msg, void *ic, void *sc)
  {
      script_data_t *sd = (script_data_t *) sc;
***************
*** 999,1004 ****
--- 1171,1182 ----
  	fatal("sieve_register_vacation()", EC_SOFTWARE);
      }
  
+     res = sieve_register_spam(sieve_interp, &spam);
+     if (res != SIEVE_OK) {
+ 	syslog(LOG_ERR, "sieve_register_spam() returns %d\n", res);
+ 	fatal("sieve_register_spam()", EC_SOFTWARE);
+     }
+  
      res = sieve_register_parse_error(sieve_interp, &sieve_parse_error_handler);
      if (res != SIEVE_OK) {
  	syslog(LOG_ERR, "sieve_register_parse_error() returns %d\n", res);
***************
*** 1148,1154 ****
      mydata.notifyheader = generate_notify(msgdata);
      mydata.authuser = authuser;
      mydata.authstate = authstate;
!     
      /* loop through each recipient, attempting delivery for each */
      for (n = 0; n < nrcpts; n++) {
  	char *rcpt = xstrdup(msg_getrcpt(msgdata, n));
--- 1326,1335 ----
      mydata.notifyheader = generate_notify(msgdata);
      mydata.authuser = authuser;
      mydata.authstate = authstate;
!     mydata.username = NULL;
!     mydata.spam_result = 0;
!     mydata.spam_result_valid = 0;
! 
      /* loop through each recipient, attempting delivery for each */
      for (n = 0; n < nrcpts; n++) {
  	char *rcpt = xstrdup(msg_getrcpt(msgdata, n));
***************
*** 1187,1192 ****
--- 1368,1376 ----
  		sdata->username = rcpt;
  		sdata->mailboxname = plus;
  		sdata->authstate = auth_newstate(rcpt, (char *)0);
+ 
+ 		/* Make a copy of mailbox username for spam stuff */
+ 		mydata.username = sdata->username;
  
  		/* slap the mailboxname back on so we hash the envelope & id
  		   when we figure out whether or not to keep the message */
diff -cr cyrus-imapd-2.1.3-orig/sieve/interp.c cyrus-imapd-2.1.3/sieve/interp.c
*** cyrus-imapd-2.1.3-orig/sieve/interp.c	Tue Oct  2 14:08:13 2001
--- cyrus-imapd-2.1.3/sieve/interp.c	Sun Mar 24 11:39:14 2002
***************
*** 154,159 ****
--- 154,166 ----
      return SIEVE_OK;
  }
  
+ int sieve_register_spam(sieve_interp_t *interp, sieve_spam *f)
+ {
+     interp->spam = f;
+  
+     return SIEVE_OK;
+ }
+ 
  /* add the callbacks for messages. again, undefined if used after
     sieve_script_parse */
  int sieve_register_size(sieve_interp_t *interp, sieve_get_size *f)
diff -cr cyrus-imapd-2.1.3-orig/sieve/interp.h cyrus-imapd-2.1.3/sieve/interp.h
*** cyrus-imapd-2.1.3-orig/sieve/interp.h	Mon Feb 21 23:56:40 2000
--- cyrus-imapd-2.1.3/sieve/interp.h	Sun Mar 24 11:40:53 2002
***************
*** 35,40 ****
--- 35,41 ----
      sieve_callback *redirect, *discard, *reject, *fileinto, *keep;
      sieve_callback *notify;
      sieve_vacation_t *vacation;
+     sieve_spam *spam;
  
      sieve_get_size *getsize;
      sieve_get_header *getheader;
diff -cr cyrus-imapd-2.1.3-orig/sieve/script.c cyrus-imapd-2.1.3/sieve/script.c
*** cyrus-imapd-2.1.3-orig/sieve/script.c	Wed Feb 27 13:05:13 2002
--- cyrus-imapd-2.1.3/sieve/script.c	Thu Apr 18 21:02:51 2002
***************
*** 102,107 ****
--- 102,114 ----
  	return 1;
      } else if (!strcmp("comparator-i;ascii-casemap", req)) {
  	return 1;
+     } else if (!strcmp("spam",req)) {
+ 	if (s->interp.spam) {
+ 	    s->support.spam = 1;
+ 	    return 1;
+ 	} else {
+ 	    return 0;
+ 	}
      }
      return 0;
  }
***************
*** 361,366 ****
--- 368,381 ----
  	    res = (sz < t->u.sz.n);
  	}
  	break;
+     }
+     case SPAM:
+     {
+ 	int is_spam;
+ 
+ 	if (i->spam == NULL || i->spam (m, &is_spam) != SIEVE_OK)
+ 	    break;
+ 	res = is_spam;
      }
      }
  
diff -cr cyrus-imapd-2.1.3-orig/sieve/script.h cyrus-imapd-2.1.3/sieve/script.h
*** cyrus-imapd-2.1.3-orig/sieve/script.h	Wed Feb  9 16:39:14 2000
--- cyrus-imapd-2.1.3/sieve/script.h	Sun Mar 24 11:39:13 2002
***************
*** 45,50 ****
--- 45,51 ----
  	int notify    : 1;
  	int regex     : 1;
  	int subaddress: 1;
+ 	int spam      : 1;
      } support;
  
      void *script_context;
diff -cr cyrus-imapd-2.1.3-orig/sieve/sieve-lex.l cyrus-imapd-2.1.3/sieve/sieve-lex.l
*** cyrus-imapd-2.1.3-orig/sieve/sieve-lex.l	Tue Feb 19 10:09:46 2002
--- cyrus-imapd-2.1.3/sieve/sieve-lex.l	Sat Mar 23 18:43:22 2002
***************
*** 90,95 ****
--- 90,96 ----
  <INITIAL>header		return HEADER;
  <INITIAL>not		return NOT;
  <INITIAL>size		return SIZE;
+ <INITIAL>spam		return SPAM;
  <INITIAL>reject		return REJCT;
  <INITIAL>fileinto	return FILEINTO;
  <INITIAL>redirect	return REDIRECT;
diff -cr cyrus-imapd-2.1.3-orig/sieve/sieve.y cyrus-imapd-2.1.3/sieve/sieve.y
*** cyrus-imapd-2.1.3-orig/sieve/sieve.y	Tue Mar  5 08:15:01 2002
--- cyrus-imapd-2.1.3/sieve/sieve.y	Sun Mar 24 11:36:18 2002
***************
*** 141,146 ****
--- 141,147 ----
  %token SETFLAG ADDFLAG REMOVEFLAG MARK UNMARK
  %token NOTIFY DENOTIFY
  %token ANYOF ALLOF EXISTS SFALSE STRUE HEADER NOT SIZE ADDRESS ENVELOPE
+ %token SPAM
  %token COMPARATOR IS CONTAINS MATCHES REGEX OVER UNDER
  %token ALL LOCALPART DOMAIN USER DETAIL
  %token DAYS ADDRESSES SUBJECT MIME
***************
*** 398,403 ****
--- 399,409 ----
  	| NOT test		 { $$ = new_test(NOT); $$->u.t = $2; }
  	| SIZE sizetag NUMBER    { $$ = new_test(SIZE); $$->u.sz.t = $2;
  		                   $$->u.sz.n = $3; }
+ 	| SPAM			 { if (!parse_script->support.spam) {
+ 				     yyerror("spam not required");
+ 				     YYERROR;
+ 				   }
+ 				   $$ = new_test(SPAM); }
  	| error			 { $$ = NULL; }
  	;
  
diff -cr cyrus-imapd-2.1.3-orig/sieve/sieve_interface.h cyrus-imapd-2.1.3/sieve/sieve_interface.h
*** cyrus-imapd-2.1.3-orig/sieve/sieve_interface.h	Tue Feb 19 10:09:46 2002
--- cyrus-imapd-2.1.3/sieve/sieve_interface.h	Sun Mar 24 19:46:44 2002
***************
*** 50,55 ****
--- 50,57 ----
  typedef int sieve_get_envelope(void *message_context, 
  			       const char *field,
  			       const char ***contents);
+ typedef int sieve_spam(void *message_context, int *is_spam);
+ 
  
  typedef struct sieve_vacation {
      int min_response;		/* 0 -> defaults to 3 */
***************
*** 121,126 ****
--- 123,129 ----
  int sieve_register_vacation(sieve_interp_t *interp, sieve_vacation_t *v);
  int sieve_register_imapflags(sieve_interp_t *interp, sieve_imapflags_t *mark);
  int sieve_register_notify(sieve_interp_t *interp, sieve_callback *f);
+ int sieve_register_spam(sieve_interp_t *interp, sieve_spam *f);
  
  /* add the callbacks for messages. again, undefined if used after
     sieve_script_parse */
diff -cr cyrus-imapd-2.1.3-orig/timsieved/scripttest.c cyrus-imapd-2.1.3/timsieved/scripttest.c
*** cyrus-imapd-2.1.3-orig/timsieved/scripttest.c	Sun Dec 17 20:53:43 2000
--- cyrus-imapd-2.1.3/timsieved/scripttest.c	Sun Mar 24 13:12:06 2002
***************
*** 171,176 ****
--- 171,182 ----
  	return TIMSIEVE_FAIL;
      }
  
+     res = sieve_register_spam(i, (sieve_spam *) &foo);
+     if (res != SIEVE_OK) {
+ 	syslog (LOG_ERR, "sieve_register_spam() returns %d\n", res);
+ 	return TIMSIEVE_FAIL;
+     }
+  
      res = sieve_register_parse_error(i, &mysieve_error);
      if (res != SIEVE_OK) {
  	syslog(LOG_ERR, "sieve_register_parse_error() returns %d\n", res);