Thanks to John Cummings.

This commit is contained in:
rsc 2005-10-29 16:26:44 +00:00
parent cd37451963
commit 5cdb17983a
94 changed files with 26853 additions and 0 deletions

View file

@ -0,0 +1,667 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include "spam.h"
enum {
Quanta = 8192,
Minbody = 6000,
HdrMax = 15,
};
typedef struct keyword Keyword;
typedef struct word Word;
struct word{
char *string;
int n;
};
struct keyword{
char *string;
int value;
};
Word htmlcmds[] =
{
"html", 4,
"!doctype html", 13,
0,
};
Word hrefs[] =
{
"a href=", 7,
"a title=", 8,
"a target=", 9,
"base href=", 10,
"img src=", 8,
"img border=", 11,
"form action=", 12,
"!--", 3,
0,
};
/*
* RFC822 header keywords to look for for fractured header.
* all lengths must be less than HdrMax defined above.
*/
Word hdrwords[] =
{
"cc:", 3,
"bcc:", 4,
"to:", 3,
0, 0,
};
Keyword keywords[] =
{
"header", HoldHeader,
"line", SaveLine,
"hold", Hold,
"dump", Dump,
"loff", Lineoff,
0, Nactions,
};
Patterns patterns[] = {
[Dump] { "DUMP:", 0, 0 },
[HoldHeader] { "HEADER:", 0, 0 },
[Hold] { "HOLD:", 0, 0 },
[SaveLine] { "LINE:", 0, 0 },
[Lineoff] { "LINEOFF:", 0, 0 },
[Nactions] { 0, 0, 0 },
};
static char* endofhdr(char*, char*);
static int escape(char**);
static int extract(char*);
static int findkey(char*);
static int hash(int);
static int isword(Word*, char*, int);
static void parsealt(Biobuf*, char*, Spat**);
/*
* The canonicalizer: convert input to canonical representation
*/
char*
readmsg(Biobuf *bp, int *hsize, int *bufsize)
{
char *p, *buf;
int n, offset, eoh, bsize, delta;
buf = 0;
offset = 0;
if(bufsize)
*bufsize = 0;
if(hsize)
*hsize = 0;
for(;;) {
buf = Realloc(buf, offset+Quanta+1);
n = Bread(bp, buf+offset, Quanta);
if(n < 0){
free(buf);
return 0;
}
p = buf+offset; /* start of this chunk */
offset += n; /* end of this chunk */
buf[offset] = 0;
if(n == 0){
if(offset == 0)
return 0;
break;
}
if(hsize == 0) /* don't process header */
break;
if(p != buf && p[-1] == '\n') /* check for EOH across buffer split */
p--;
p = endofhdr(p, buf+offset);
if(p)
break;
if(offset >= Maxread) /* gargantuan header - just punt*/
{
if(hsize)
*hsize = offset;
if(bufsize)
*bufsize = offset;
return buf;
}
}
eoh = p-buf; /* End of header */
bsize = offset - eoh; /* amount of body already read */
/* Read at least Minbody bytes of the body */
if (bsize < Minbody){
delta = Minbody-bsize;
buf = Realloc(buf, offset+delta+1);
n = Bread(bp, buf+offset, delta);
if(n > 0) {
offset += n;
buf[offset] = 0;
}
}
if(hsize)
*hsize = eoh;
if(bufsize)
*bufsize = offset;
return buf;
}
static int
isword(Word *wp, char *text, int len)
{
for(;wp->string; wp++)
if(len >= wp->n && strncmp(text, wp->string, wp->n) == 0)
return 1;
return 0;
}
static char*
endofhdr(char *raw, char *end)
{
int i;
char *p, *q;
char buf[HdrMax];
/*
* can't use strchr to search for newlines because
* there may be embedded NULL's.
*/
for(p = raw; p < end; p++){
if(*p != '\n' || p[1] != '\n')
continue;
p++;
for(i = 0, q = p+1; i < sizeof(buf) && *q; q++){
buf[i++] = tolower(*q);
if(*q == ':' || *q == '\n')
break;
}
if(!isword(hdrwords, buf, i))
return p+1;
}
return 0;
}
static int
htmlmatch(Word *wp, char *text, char *end, int *n)
{
char *cp;
int i, c, lastc;
char buf[MaxHtml];
/*
* extract a string up to '>'
*/
i = lastc = 0;
cp = text;
while (cp < end && i < sizeof(buf)-1){
c = *cp++;
if(c == '=')
c = escape(&cp);
switch(c){
case 0:
case '\r':
continue;
case '>':
goto out;
case '\n':
case ' ':
case '\t':
if(lastc == ' ')
continue;
c = ' ';
break;
default:
c = tolower(c);
break;
}
buf[i++] = lastc = c;
}
out:
buf[i] = 0;
if(n)
*n = cp-text;
return isword(wp, buf, i);
}
static int
escape(char **msg)
{
int c;
char *p;
p = *msg;
c = *p;
if(c == '\n'){
p++;
c = *p++;
} else
if(c == '2'){
c = tolower(p[1]);
if(c == 'e'){
p += 2;
c = '.';
}else
if(c == 'f'){
p += 2;
c = '/';
}else
if(c == '0'){
p += 2;
c = ' ';
}
else c = '=';
} else {
if(c == '3' && tolower(p[1]) == 'd')
p += 2;
c = '=';
}
*msg = p;
return c;
}
static int
htmlchk(char **msg, char *end)
{
int n;
char *p;
static int ishtml;
p = *msg;
if(ishtml == 0){
ishtml = htmlmatch(htmlcmds, p, end, &n);
/* If not an HTML keyword, check if it's
* an HTML comment (<!comment>). if so,
* skip over it; otherwise copy it in.
*/
if(ishtml == 0 && *p != '!') /* not comment */
return '<'; /* copy it */
} else if(htmlmatch(hrefs, p, end, &n)) /* if special HTML string */
return '<'; /* copy it */
/*
* this is an uninteresting HTML command; skip over it.
*/
p += n;
*msg = p+1;
return *p;
}
/*
* decode a base 64 encode body
*/
void
conv64(char *msg, char *end, char *buf, int bufsize)
{
int len, i;
char *cp;
len = end - msg;
i = (len*3)/4+1; // room for max chars + null
cp = Malloc(i);
len = dec64((uchar*)cp, i, msg, len);
convert(cp, cp+len, buf, bufsize, 1);
free(cp);
}
int
convert(char *msg, char *end, char *buf, int bufsize, int isbody)
{
char *p;
int c, lastc, base64;
lastc = 0;
base64 = 0;
while(msg < end && bufsize > 0){
c = *msg++;
/*
* In the body only, try to strip most HTML and
* replace certain MIME escape sequences with the character
*/
if(isbody) {
do{
p = msg;
if(c == '<')
c = htmlchk(&msg, end);
if(c == '=')
c = escape(&msg);
} while(p != msg && p < end);
}
switch(c){
case 0:
case '\r':
continue;
case '\t':
case ' ':
case '\n':
if(lastc == ' ')
continue;
c = ' ';
break;
case 'C': /* check for MIME base 64 encoding in header */
case 'c':
if(isbody == 0)
if(msg < end-32 && *msg == 'o' && msg[1] == 'n')
if(cistrncmp(msg+2, "tent-transfer-encoding: base64", 30) == 0)
base64 = 1;
c = 'c';
break;
default:
c = tolower(c);
break;
}
*buf++ = c;
lastc = c;
bufsize--;
}
*buf = 0;
return base64;
}
/*
* The pattern parser: build data structures from the pattern file
*/
static int
hash(int c)
{
return c & 127;
}
static int
findkey(char *val)
{
Keyword *kp;
for(kp = keywords; kp->string; kp++)
if(strcmp(val, kp->string) == 0)
break;
return kp->value;
}
#define whitespace(c) ((c) == ' ' || (c) == '\t')
void
parsepats(Biobuf *bp)
{
Pattern *p, *new;
char *cp, *qp;
int type, action, n, h;
Spat *spat;
for(;;){
cp = Brdline(bp, '\n');
if(cp == 0)
break;
cp[Blinelen(bp)-1] = 0;
while(*cp == ' ' || *cp == '\t')
cp++;
if(*cp == '#' || *cp == 0)
continue;
type = regexp;
if(*cp == '*'){
type = string;
cp++;
}
qp = strchr(cp, ':');
if(qp == 0)
continue;
*qp = 0;
if(debug)
fprint(2, "action = %s\n", cp);
action = findkey(cp);
if(action >= Nactions)
continue;
cp = qp+1;
n = extract(cp);
if(n <= 0 || *cp == 0)
continue;
qp = strstr(cp, "~~");
if(qp){
*qp = 0;
n = strlen(cp);
}
if(debug)
fprint(2, " Pattern: `%s'\n", cp);
/* Hook regexps into a chain */
if(type == regexp) {
new = Malloc(sizeof(Pattern));
new->action = action;
new->pat = regcomp(cp);
if(new->pat == 0){
free(new);
continue;
}
new->type = regexp;
new->alt = 0;
new->next = 0;
if(qp)
parsealt(bp, qp+2, &new->alt);
new->next = patterns[action].regexps;
patterns[action].regexps = new;
continue;
}
/* not a Regexp - hook strings into Pattern hash chain */
spat = Malloc(sizeof(*spat));
spat->next = 0;
spat->alt = 0;
spat->len = n;
spat->string = Malloc(n+1);
spat->c1 = cp[1];
strcpy(spat->string, cp);
if(qp)
parsealt(bp, qp+2, &spat->alt);
p = patterns[action].strings;
if(p == 0) {
p = Malloc(sizeof(Pattern));
memset(p, 0, sizeof(*p));
p->action = action;
p->type = string;
patterns[action].strings = p;
}
h = hash(*spat->string);
spat->next = p->spat[h];
p->spat[h] = spat;
}
}
static void
parsealt(Biobuf *bp, char *cp, Spat** head)
{
char *p;
Spat *alt;
while(cp){
if(*cp == 0){ /*escaped newline*/
do{
cp = Brdline(bp, '\n');
if(cp == 0)
return;
cp[Blinelen(bp)-1] = 0;
} while(extract(cp) <= 0 || *cp == 0);
}
p = cp;
cp = strstr(p, "~~");
if(cp){
*cp = 0;
cp += 2;
}
if(strlen(p)){
alt = Malloc(sizeof(*alt));
alt->string = strdup(p);
alt->next = *head;
*head = alt;
}
}
}
static int
extract(char *cp)
{
int c;
char *p, *q, *r;
p = q = r = cp;
while(whitespace(*p))
p++;
while(c = *p++){
if (c == '#')
break;
if(c == '"'){
while(*p && *p != '"'){
if(*p == '\\' && p[1] == '"')
p++;
if('A' <= *p && *p <= 'Z')
*q++ = *p++ + ('a'-'A');
else
*q++ = *p++;
}
if(*p)
p++;
r = q; /* never back up over a quoted string */
} else {
if('A' <= c && c <= 'Z')
c += ('a'-'A');
*q++ = c;
}
}
while(q > r && whitespace(q[-1]))
q--;
*q = 0;
return q-cp;
}
/*
* The matching engine: compare canonical input to pattern structures
*/
static Spat*
isalt(char *message, Spat *alt)
{
while(alt) {
if(*cmd)
if(message != cmd && strstr(cmd, alt->string))
break;
if(message != header+1 && strstr(header+1, alt->string))
break;
if(strstr(message, alt->string))
break;
alt = alt->next;
}
return alt;
}
int
matchpat(Pattern *p, char *message, Resub *m)
{
Spat *spat;
char *s;
int c, c1;
if(p->type == string){
c1 = *message;
for(s=message; c=c1; s++){
c1 = s[1];
for(spat=p->spat[hash(c)]; spat; spat=spat->next){
if(c1 == spat->c1)
if(memcmp(s, spat->string, spat->len) == 0)
if(!isalt(message, spat->alt)){
m->sp = s;
m->ep = s + spat->len;
return 1;
}
}
}
return 0;
}
m->sp = m->ep = 0;
if(regexec(p->pat, message, m, 1) == 0)
return 0;
if(isalt(message, p->alt))
return 0;
return 1;
}
void
xprint(int fd, char *type, Resub *m)
{
char *p, *q;
int i;
if(m->sp == 0 || m->ep == 0)
return;
/* back up approx 30 characters to whitespace */
for(p = m->sp, i = 0; *p && i < 30; i++, p--)
;
while(*p && *p != ' ')
p--;
p++;
/* grab about 30 more chars beyond the end of the match */
for(q = m->ep, i = 0; *q && i < 30; i++, q++)
;
while(*q && *q != ' ')
q++;
fprint(fd, "%s %.*s~%.*s~%.*s\n", type, (int)(m->sp-p), p, (int)(m->ep-m->sp), m->sp, (int)(q-m->ep), m->ep);
}
enum {
INVAL= 255
};
static uchar t64d[256] = {
/*00 */ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*10*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*20*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, 62, INVAL, INVAL, INVAL, 63,
/*30*/ 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*40*/ INVAL, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14,
/*50*/ 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, INVAL, INVAL, INVAL, INVAL, INVAL,
/*60*/ INVAL, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40,
/*70*/ 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, INVAL, INVAL, INVAL, INVAL, INVAL,
/*80*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*90*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*A0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*B0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*C0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*D0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*E0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
/*F0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
};

View file

@ -0,0 +1,24 @@
</$objtype/mkfile
TARG=scanmail\
testscan
OFILES= common.$O
HFILES= spam.h\
../common/sys.h\
LIB= ../common/libcommon.a$O\
BIN=/$objtype/bin/upas
UPDATE=\
mkfile\
$HFILES\
${OFILES:%.$O=%.c}\
${TARG:%=%.c}\
</sys/src/cmd/mkmany
CFLAGS=$CFLAGS -I../common
scanmail.$O: scanmail.c
$CC $CFLAGS -D'SPOOL="/mail"' scanmail.c

View file

@ -0,0 +1,476 @@
#include "common.h"
#include "spam.h"
int cflag;
int debug;
int hflag;
int nflag;
int sflag;
int tflag;
int vflag;
Biobuf bin, bout, *cout;
/* file names */
char patfile[128];
char linefile[128];
char holdqueue[128];
char copydir[128];
char header[Hdrsize+2];
char cmd[1024];
char **qname;
char **qdir;
char *sender;
String *recips;
char* canon(Biobuf*, char*, char*, int*);
int matcher(char*, Pattern*, char*, Resub*);
int matchaction(int, char*, Resub*);
Biobuf *opencopy(char*);
Biobuf *opendump(char*);
char *qmail(char**, char*, int, Biobuf*);
void saveline(char*, char*, Resub*);
int optoutofspamfilter(char*);
void
usage(void)
{
fprint(2, "missing or bad arguments to qer\n");
exits("usage");
}
void
regerror(char *s)
{
fprint(2, "scanmail: %s\n", s);
}
void *
Malloc(long n)
{
void *p;
p = malloc(n);
if(p == 0)
exits("malloc");
return p;
}
void*
Realloc(void *p, ulong n)
{
p = realloc(p, n);
if(p == 0)
exits("realloc");
return p;
}
void
main(int argc, char *argv[])
{
int i, n, nolines, optout;
char **args, **a, *cp, *buf;
char body[Bodysize+2];
Resub match[1];
Biobuf *bp;
optout = 1;
a = args = Malloc((argc+1)*sizeof(char*));
sprint(patfile, "%s/patterns", UPASLIB);
sprint(linefile, "%s/lines", UPASLOG);
sprint(holdqueue, "%s/queue.hold", SPOOL);
sprint(copydir, "%s/copy", SPOOL);
*a++ = argv[0];
for(argc--, argv++; argv[0] && argv[0][0] == '-'; argc--, argv++){
switch(argv[0][1]){
case 'c': /* save copy of message */
cflag = 1;
break;
case 'd': /* debug */
debug++;
*a++ = argv[0];
break;
case 'h': /* queue held messages by sender domain */
hflag = 1; /* -q flag must be set also */
break;
case 'n': /* NOHOLD mode */
nflag = 1;
break;
case 'p': /* pattern file */
if(argv[0][2] || argv[1] == 0)
usage();
argc--;
argv++;
strecpy(patfile, patfile+sizeof patfile, *argv);
break;
case 'q': /* queue name */
if(argv[0][2] || argv[1] == 0)
usage();
*a++ = argv[0];
argc--;
argv++;
qname = a;
*a++ = argv[0];
break;
case 's': /* save copy of dumped message */
sflag = 1;
break;
case 't': /* test mode - don't log match
* and write message to /dev/null
*/
tflag = 1;
break;
case 'v': /* vebose - print matches */
vflag = 1;
break;
default:
*a++ = argv[0];
break;
}
}
if(argc < 3)
usage();
Binit(&bin, 0, OREAD);
bp = Bopen(patfile, OREAD);
if(bp){
parsepats(bp);
Bterm(bp);
}
qdir = a;
sender = argv[2];
/* copy the rest of argv, acummulating the recipients as we go */
for(i = 0; argv[i]; i++){
*a++ = argv[i];
if(i < 4) /* skip queue, 'mail', sender, dest sys */
continue;
/* recipients and smtp flags - skip the latter*/
if(strcmp(argv[i], "-g") == 0){
*a++ = argv[++i];
continue;
}
if(recips)
s_append(recips, ", ");
else
recips = s_new();
s_append(recips, argv[i]);
if(optout && !optoutofspamfilter(argv[i]))
optout = 0;
}
*a = 0;
/* construct a command string for matching */
snprint(cmd, sizeof(cmd)-1, "%s %s", sender, s_to_c(recips));
cmd[sizeof(cmd)-1] = 0;
for(cp = cmd; *cp; cp++)
*cp = tolower(*cp);
/* canonicalize a copy of the header and body.
* buf points to orginal message and n contains
* number of bytes of original message read during
* canonicalization.
*/
*body = 0;
*header = 0;
buf = canon(&bin, header+1, body+1, &n);
if (buf == 0)
exits("read");
/* if all users opt out, don't try matches */
if(optout){
if(cflag)
cout = opencopy(sender);
exits(qmail(args, buf, n, cout));
}
/* Turn off line logging, if command line matches */
nolines = matchaction(Lineoff, cmd, match);
for(i = 0; patterns[i].action; i++){
/* Lineoff patterns were already done above */
if(i == Lineoff)
continue;
/* don't apply "Line" patterns if excluded above */
if(nolines && i == SaveLine)
continue;
/* apply patterns to the sender/recips, header and body */
if(matchaction(i, cmd, match))
break;
if(matchaction(i, header+1, match))
break;
if(i == HoldHeader)
continue;
if(matchaction(i, body+1, match))
break;
}
if(cflag && patterns[i].action == 0) /* no match found - save msg */
cout = opencopy(sender);
exits(qmail(args, buf, n, cout));
}
char*
qmail(char **argv, char *buf, int n, Biobuf *cout)
{
Waitmsg *status;
int i, pid, pipefd[2];
char path[512];
Biobuf *bp;
pid = 0;
if(tflag == 0){
if(pipe(pipefd) < 0)
exits("pipe");
pid = fork();
if(pid == 0){
dup(pipefd[0], 0);
for(i = sysfiles(); i >= 3; i--)
close(i);
snprint(path, sizeof(path), "%s/qer", UPASBIN);
*argv=path;
exec(path, argv);
exits("exec");
}
Binit(&bout, pipefd[1], OWRITE);
bp = &bout;
} else
bp = Bopen("/dev/null", OWRITE);
while(n > 0){
Bwrite(bp, buf, n);
if(cout)
Bwrite(cout, buf, n);
n = Bread(&bin, buf, sizeof(buf)-1);
}
Bterm(bp);
if(cout)
Bterm(cout);
if(tflag)
return 0;
close(pipefd[1]);
close(pipefd[0]);
for(;;){
status = wait();
if(status == nil || status->pid == pid)
break;
free(status);
}
if(status == nil)
strcpy(buf, "wait failed");
else{
strcpy(buf, status->msg);
free(status);
}
return buf;
}
char*
canon(Biobuf *bp, char *header, char *body, int *n)
{
int hsize;
char *raw;
hsize = 0;
*header = 0;
*body = 0;
raw = readmsg(bp, &hsize, n);
if(raw){
if(convert(raw, raw+hsize, header, Hdrsize, 0))
conv64(raw+hsize, raw+*n, body, Bodysize); /* base64 */
else
convert(raw+hsize, raw+*n, body, Bodysize, 1); /* text */
}
return raw;
}
int
matchaction(int action, char *message, Resub *m)
{
char *name;
Pattern *p;
if(message == 0 || *message == 0)
return 0;
name = patterns[action].action;
p = patterns[action].strings;
if(p)
if(matcher(name, p, message, m))
return 1;
for(p = patterns[action].regexps; p; p = p->next)
if(matcher(name, p, message, m))
return 1;
return 0;
}
int
matcher(char *action, Pattern *p, char *message, Resub *m)
{
char *cp;
String *s;
for(cp = message; matchpat(p, cp, m); cp = m->ep){
switch(p->action){
case SaveLine:
if(vflag)
xprint(2, action, m);
saveline(linefile, sender, m);
break;
case HoldHeader:
case Hold:
if(nflag)
continue;
if(vflag)
xprint(2, action, m);
*qdir = holdqueue;
if(hflag && qname){
cp = strchr(sender, '!');
if(cp){
*cp = 0;
*qname = strdup(sender);
*cp = '!';
} else
*qname = strdup(sender);
}
return 1;
case Dump:
if(vflag)
xprint(2, action, m);
*(m->ep) = 0;
if(!tflag){
s = s_new();
s_append(s, sender);
s = unescapespecial(s);
syslog(0, "smtpd", "Dumped %s [%s] to %s", s_to_c(s), m->sp,
s_to_c(s_restart(recips)));
s_free(s);
}
tflag = 1;
if(sflag)
cout = opendump(sender);
return 1;
default:
break;
}
}
return 0;
}
void
saveline(char *file, char *sender, Resub *rp)
{
char *p, *q;
int i, c;
Biobuf *bp;
if(rp->sp == 0 || rp->ep == 0)
return;
/* back up approx 20 characters to whitespace */
for(p = rp->sp, i = 0; *p && i < 20; i++, p--)
;
while(*p && *p != ' ')
p--;
p++;
/* grab about 20 more chars beyond the end of the match */
for(q = rp->ep, i = 0; *q && i < 20; i++, q++)
;
while(*q && *q != ' ')
q++;
c = *q;
*q = 0;
bp = sysopen(file, "al", 0644);
if(bp){
Bprint(bp, "%s-> %s\n", sender, p);
Bterm(bp);
}
else if(debug)
fprint(2, "can't save line: (%s) %s\n", sender, p);
*q = c;
}
Biobuf*
opendump(char *sender)
{
int i;
ulong h;
char buf[512];
Biobuf *b;
char *cp;
cp = ctime(time(0));
cp[7] = 0;
cp[10] = 0;
if(cp[8] == ' ')
sprint(buf, "%s/queue.dump/%s%c", SPOOL, cp+4, cp[9]);
else
sprint(buf, "%s/queue.dump/%s%c%c", SPOOL, cp+4, cp[8], cp[9]);
cp = buf+strlen(buf);
if(access(buf, 0) < 0 && sysmkdir(buf, 0777) < 0){
syslog(0, "smtpd", "couldn't dump mail from %s: %r", sender);
return 0;
}
h = 0;
while(*sender)
h = h*257 + *sender++;
for(i = 0; i < 50; i++){
h += lrand();
sprint(cp, "/%lud", h);
b = sysopen(buf, "wlc", 0644);
if(b){
if(vflag)
fprint(2, "saving in %s\n", buf);
return b;
}
}
return 0;
}
Biobuf*
opencopy(char *sender)
{
int i;
ulong h;
char buf[512];
Biobuf *b;
h = 0;
while(*sender)
h = h*257 + *sender++;
for(i = 0; i < 50; i++){
h += lrand();
sprint(buf, "%s/%lud", copydir, h);
b = sysopen(buf, "wlc", 0600);
if(b)
return b;
}
return 0;
}
int
optoutofspamfilter(char *addr)
{
char *p, *f;
int rv;
p = strchr(addr, '!');
if(p)
p++;
else
p = addr;
rv = 0;
f = smprint("/mail/box/%s/nospamfiltering", p);
if(f != nil){
rv = access(f, 0)==0;
free(f);
}
return rv;
}

View file

@ -0,0 +1,62 @@
enum{
Dump = 0, /* Actions must be in order of descending importance */
HoldHeader,
Hold,
SaveLine,
Lineoff, /* Lineoff must be the last action code */
Nactions,
Nhash = 128,
regexp = 1, /* types: literal string or regular expression */
string = 2,
MaxHtml = 256,
Hdrsize = 4096,
Bodysize = 8192,
Maxread = 64*1024,
};
typedef struct spat Spat;
typedef struct pattern Pattern;
typedef struct patterns Patterns;
struct spat
{
char* string;
int len;
int c1;
Spat* next;
Spat* alt;
};
struct pattern{
struct pattern *next;
int action;
int type;
Spat* alt;
union{
Reprog* pat;
Spat* spat[Nhash];
};
};
struct patterns {
char *action;
Pattern *strings;
Pattern *regexps;
};
extern int debug;
extern Patterns patterns[];
extern char header[];
extern char cmd[];
extern void conv64(char*, char*, char*, int);
extern int convert(char*, char*, char*, int, int);
extern void* Malloc(long n);
extern int matchpat(Pattern*, char*, Resub*);
extern char* readmsg(Biobuf*, int*, int*);
extern void parsepats(Biobuf*);
extern void* Realloc(void*, ulong);
extern void xprint(int, char*, Resub*);

View file

@ -0,0 +1,212 @@
#include "sys.h"
#include "spam.h"
int debug;
Biobuf bin;
char patfile[128], header[Hdrsize+2];
char cmd[1024];
char* canon(Biobuf*, char*, char*, int*);
int matcher(char *, Pattern*, char*, Resub*);
int matchaction(Patterns*, char*);
void
usage(void)
{
fprint(2, "missing or bad arguments to qer\n");
exits("usage");
}
void *
Malloc(long n)
{
void *p;
p = malloc(n);
if(p == 0){
fprint(2, "malloc error");
exits("malloc");
}
return p;
}
void*
Realloc(void *p, ulong n)
{
p = realloc(p, n);
if(p == 0){
fprint(2, "realloc error");
exits("realloc");
}
return p;
}
void
dumppats(void)
{
int i, j;
Pattern *p;
Spat *s, *q;
for(i = 0; patterns[i].action; i++){
for(p = patterns[i].regexps; p; p = p->next){
print("%s <REGEXP>\n", patterns[i].action);
if(p->alt)
print("Alt:");
for(s = p->alt; s; s = s->next)
print("\t%s\n", s->string);
}
p = patterns[i].strings;
if(p == 0)
continue;
for(j = 0; j < Nhash; j++){
for(s = p->spat[j]; s; s = s->next){
print("%s %s\n", patterns[i].action, s->string);
if(s->alt)
print("Alt:");
for(q = s->alt; q; q = q->next)
print("\t%s\n", q->string);
}
}
}
}
void
main(int argc, char *argv[])
{
int i, fd, n, aflag, vflag;
char body[Bodysize+2], *raw, *ret;
Biobuf *bp;
sprint(patfile, "%s/patterns", UPASLIB);
aflag = -1;
vflag = 0;
ARGBEGIN {
case 'a':
aflag = 1;
break;
case 'v':
vflag = 1;
break;
case 'd':
debug++;
break;
case 'p':
strcpy(patfile,ARGF());
break;
} ARGEND
bp = Bopen(patfile, OREAD);
if(bp){
parsepats(bp);
Bterm(bp);
}
if(argc >= 1){
fd = open(*argv, OREAD);
if(fd < 0){
fprint(2, "can't open %s\n", *argv);
exits("open");
}
Binit(&bin, fd, OREAD);
} else
Binit(&bin, 0, OREAD);
*body = 0;
*header = 0;
ret = 0;
for(;;){
raw = canon(&bin, header+1, body+1, &n);
if(raw == 0)
break;
if(aflag == 0)
continue;
if(aflag < 0)
aflag = 0;
if(vflag){
if(header[1]) {
fprint(2, "\t**** Header ****\n\n");
write(2, header+1, strlen(header+1));
fprint(2, "\n");
}
fprint(2, "\t**** Body ****\n\n");
if(body[1])
write(2, body+1, strlen(body+1));
fprint(2, "\n");
}
for(i = 0; patterns[i].action; i++){
if(matchaction(&patterns[i], header+1))
ret = patterns[i].action;
if(i == HoldHeader)
continue;
if(matchaction(&patterns[i], body+1))
ret = patterns[i].action;
}
}
exits(ret);
}
char*
canon(Biobuf *bp, char *header, char *body, int *n)
{
int hsize, base64;
static char *raw;
hsize = 0;
base64 = 0;
*header = 0;
*body = 0;
if(raw == 0){
raw = readmsg(bp, &hsize, n);
if(raw)
base64 = convert(raw, raw+hsize, header, Hdrsize, 0);
} else {
free(raw);
raw = readmsg(bp, 0, n);
}
if(raw){
if(base64)
conv64(raw+hsize, raw+*n, body, Bodysize);
else
convert(raw+hsize, raw+*n, body, Bodysize, 1);
}
return raw;
}
int
matchaction(Patterns *pp, char *message)
{
char *name, *cp;
int ret;
Pattern *p;
Resub m[1];
if(message == 0 || *message == 0)
return 0;
name = pp->action;
p = pp->strings;
ret = 0;
if(p)
for(cp = message; matcher(name, p, cp, m); cp = m[0].ep)
ret++;
for(p = pp->regexps; p; p = p->next)
for(cp = message; matcher(name, p, cp, m); cp = m[0].ep)
ret++;
return ret;
}
int
matcher(char *action, Pattern *p, char *message, Resub *m)
{
if(matchpat(p, message, m)){
if(p->action != Lineoff)
xprint(1, action, m);
return 1;
}
return 0;
}