Great Circle Associates List-Managers
(December 1992)
 

Indexed By Date: [Previous] [Next] Indexed By Thread: [Previous] [Next]

Subject: How to deal with bounces
From: dupuy @ tiemann . cs . columbia . edu (Alexander Dupuy)
Date: Thu, 17 Dec 92 15:22:19 EST
To: list-managers @ GreatCircle . COM
In-reply-to: "Michael H. Morse"'s message of Tue, 15 Dec 1992 08:36:27 EST <9212151336 . AA24138 @ z . nsf . gov>
Reply-to: dupuy @ cs . columbia . edu

Michael Morse writes:
> I have a little Perl script (I'm starting to sound like a broken record) that
> uses nslookup to at least check that the host name is in the DNS.  That has
> almost totally cut out bounces on a non-listserv system I run.  If anyone's
> interested, I've included it here.  It's not perfect, but it does most of
> the job.

I also have a C program, expn(1), which is useful for verifying Internet
addresses - it checks that there is at least one SMTP server which will accept
mail for the address, though of course it cannot guarantee that the mail will
ultimately be deliverable.  I'm not sure if I have already posted this program
to list-managers - forgive me if I have, but since there are probably some new
subscribers, at least it won't be completely redundant.

@alex

/*-
 * $Id: expn.c,v 1.9 1992/05/31 03:43:02 dupuy Exp $
 *
 *	To compile with DNS support, cc -O -o expn expn.c -lresolv
 *	To compile without DNS support, cc -O -o expn -DNODOMAINS expn.c
 * 	To use, expn user @
 host
 *		exit codes: 0 = valid address
 *			    1 = system error (address may be valid)
 *			    2 = SMTP server error (address may be valid)
 *			    3 = invalid user
 *			    4 = invalid host
 *
 * This started out as mverify, by Jeff Beadles <jeff @
 quark .
 WV .
 TEK .
 COM>
 * with a couple of lines of autobounce.c by Pete Shipley <shipley @
 berkley .
 edu>
 *
 * I decided to enhance it for use in verifying mailing lists, requiring better
 * support for all the varieties of RFC-821 mailers out there in the swamps.
 * So I added support for MX records and real handling of RFC-821 result codes.
 *
 * If you think there's anything left to add, please send it to me, and I'll
 * see about adding it.
 *
 * Alexander Dupuy <dupuy @
 cs .
 columbia .
 edu>
 *
 */

#ifdef BSD
#include <strings.h>
#else
#include <string.h>
#include <memory.h>
#define index strchr
#define bcopy(b1,b2,len) memcpy(b2,b1,len)
#endif

#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <netinet/in.h>
#ifndef NODOMAINS
#include <arpa/nameser.h>
#include <resolv.h>
#endif
#include <stdio.h>

#define MAXMXHOSTS 20

#define SYSERR 1
#define SMTPERR 2
#define BADUSER 3
#define BADHOST 4

char *fgets ();
char *index ();
struct hostent *gethostbyname ();

main (argc, argv)
    int argc;
    char **argv;
{
    int smtpfd;
    FILE *fin;				/* separate stdio streams to allow */
    FILE *fout;				/* mixed reads and writes w/o seeks */
    char buffer[2048];			/* stdio line buffer */

    char *user;				/* pieces of argv */
    char *orighost;
    char *host;

    char *command;			/* for expn/vrfy smtp commands */
    int exitstatus;

    struct hostent *hp;			/* inet networking */
    struct servent *sp;
    struct sockaddr_in server;

    char *bp;				/* utility infielder */

#ifndef NODOMAINS
    int n;				/* utility index */

    HEADER *dhp;			/* DNS reply header and fields*/
    int ancount;
    int qdcount;

    union				/* DNS reply and pointers */
    {
	HEADER hdr;
	u_char bytes[PACKETSZ];
    }     answer;
    u_char *cp;
    u_char *eom;

    char hostbuf[PACKETSZ];		/* hostnames buffer */
    int buflen;

    u_short pref;
    u_short type;

    char *hosts[MAXMXHOSTS];		/* arrays for multiple MX hosts */
    u_short prefs[MAXMXHOSTS];
    u_long bestpref;
    int besthost;			/* indexes into arrays */
    int i;
#endif

    if (argc != 2)
    {
	(void) fprintf (stderr, "Usage: %s user @
 host\n", argv[0]);
	return (SYSERR);
    }

    user = argv[1];
    if ((orighost = index (user, '@')) == 0)
	host = "localhost";
    else
    {
	*orighost = '\0';
	orighost++;
	host = orighost;
    }

    server.sin_family = AF_INET;

    /*
     * Get the smtp port using tcp.
     */

    sp = getservbyname ("smtp", "tcp");
    server.sin_port = sp->s_port;
    if (!server.sin_port)
    {
	(void) fprintf (stderr, "unknown service: smtp/tcp\n");
	return (SYSERR);
    }

#ifdef NODOMAINS

    /*
     * Now get the information for the @host part of the address.
     */

    if ((hp = gethostbyname (host)) == 0)
    {
	(void) fprintf (stderr, "%s: unknown host\n", host);
	return (BADHOST);
    }

#else

    besthost = -1;			/* don't retry MX */

    /*
     * Check MX records for the @host part of the address.
     */

    n = res_search (host, C_IN, T_MX, answer.bytes, sizeof (answer));
    if (n < 0)
	goto punt;

    /* find first satisfactory answer */

    dhp = &answer.hdr;
    cp = answer.bytes + sizeof (HEADER);
    eom = answer.bytes + n;
    for (qdcount = ntohs (dhp->qdcount); qdcount--; cp += n + QFIXEDSZ)
	if ((n = dn_skipname (cp, eom)) < 0)
	    goto punt;

    /* copy MX hosts and preferences into arrays */

    buflen = sizeof (hostbuf);
    bp = hostbuf;
    ancount = ntohs (dhp->ancount);
    i = 0;
    while (--ancount >= 0 && cp < eom && i < MAXMXHOSTS)
    {
	if ((n = dn_expand (&answer.hdr, eom, cp, bp, buflen)) < 0)
	    break;
	cp += n;
	GETSHORT (type, cp);
	cp += sizeof (u_short) + sizeof (u_long);
	GETSHORT (n, cp);
	if (type != T_MX)
	{
	    cp += n;
	    continue;
	}
	GETSHORT (pref, cp);
	if ((n = dn_expand (&answer.hdr, eom, cp, bp, buflen)) < 0)
	    break;
	cp += n;

	prefs[i] = pref;
	hosts[i] = bp;
	i++;

	n = strlen (bp) + 1;
	bp += n;
	buflen -= n;
    }
    hosts[i] = 0;			/* terminate host array */

  nextmxhost:

    i = 0;
    n = 0;
    bestpref = 65536;

    while (hosts[i] && i < MAXMXHOSTS)
    {
	if (prefs[i] < 65535)		/* count untried MX hosts */
	    n++;
	if (prefs[i] < bestpref)
	{
	    bestpref = prefs[i];
	    host = hosts[i];
	    besthost = i;
	}
	i++;
    }
    if (n <= 1)				/* don't retry if this is last host */
	besthost = -1;
    
  punt:

    /*
     * Now get the information for the @host part of the address.
     */

    if ((hp = gethostbyname (host)) == 0)
    {
	herror (host);
	if (besthost >= 0)
	{
	    prefs[besthost] = 65535;
	    goto nextmxhost;
	}
	if (h_errno == TRY_AGAIN)
	    return (SYSERR);

	return (BADHOST);
    }

#endif

  reconnect:

    (void) bcopy (hp->h_addr, (char *) &server.sin_addr, hp->h_length);

    /*
     * One socket please...
     */

    if ((smtpfd = socket (AF_INET, SOCK_STREAM, 0)) < 0)
    {
	perror ("socket");
	return (SYSERR);
    }

    /*
     * Connecting to the socket might make things go a little easier...  :-)
     */

    while (connect (smtpfd, (struct sockaddr *) &server, sizeof (server)) < 0)
    {
#ifdef h_addr
	if (*++hp->h_addr_list)
	{
	    (void) close (smtpfd);
	    goto reconnect;
	}
#endif
	perror (host);
	if (besthost >= 0)
	{
	    prefs[besthost] = 65535;
	    (void) close (smtpfd);
	    goto nextmxhost;
	}
	return (SYSERR);
    }

    /*
     * Change them to streams 'cause I like 'em better.
     */

    fout = fdopen (smtpfd, "w");
    fin = fdopen (smtpfd, "r");

    /*
     * The format of SMTP reply codes is a three digit number followed by
     * either space or '-' minus.  The minus code indicates a multi-line
     * response, so we keep reading lines until we see one with a space.
     *
     * Reply codes beginning with 2 are positive, and can be ignored
     * (although 251 indicates a non-local user).
     * Reply codes beginning with 4 indicate transient errors.
     * Reply codes beginning with 5 indicate permanent errors
     * (although 551 will return a forward-path for a non-local user).
     *
     * Note that RFC-1123 defines a new reply code 252 which indicates that
     * the user cannot be verified, but the server will attempt forwarding.
     * This is pretty much the same as 251 except there is less assurance that
     * the address is valid.
     */

    /*
     * Wait for the smtp mailer to answer.  It should greet us with a 220 code
     */

    while (fgets (buffer, sizeof (buffer) - 1, fin) != NULL)
    {
	if (buffer[3] == '-')
	    continue;

	if (!strncmp (buffer, "220 ", 4))
	    break;
	else				/* some kind of mailer error */
	{
	    (void) fputs (buffer, stderr);
	    (void) putc ('\n', stderr);	/* fputs doesn't add a newline */
	    exitstatus = SMTPERR;
	    goto done;
	}
    }

    exitstatus = 0;			/* let's be optimistic :-) */

    if (orighost)
	command = "EXPN %s @
 %s\r\n";	/* RFC-821 requires CRLF */
    else
	command = "EXPN %s\r\n";

    /*
     * Now that we have the mailer's attention, tell it to 'verify' the user.
     * We have four ways of trying this, using EXPN or VRFY, and using @host
     * or not.  We will try all of them (only two if no @host specified).
     */

    (void) fprintf (fout, command, user, orighost);
    (void) fflush (fout);

    /*
     * Now just go into a loop reading responses from the mailer.
     */

    while (fgets (buffer, sizeof (buffer) - 1, fin) != NULL)
    {
	/*
	 * Zap ^M from the lines.
	 */
	if ((bp = index (buffer, '\r')) != 0)
	    *bp = '\0';

	if (!strncmp (buffer, "250", 3))
	{
	    (void) puts (buffer + 4);	/* delete 250- code */
	}
	else if (!strncmp (buffer, "251", 3)) /* only given to VRFY */
	{
	    (void) puts (buffer);	/* unusual - leave 251 code in */
	}
	else if (!strncmp (buffer, "252", 3)) /* only given to VRFY */
	{				/* reconstruct the user name we sent */
	    if (orighost)
		(void) printf ("<%s @
 %s>\n", user, orighost);
	    else
		(void) puts (user);

	    (void) fputs (buffer, stderr);
	    (void) putc ('\n', stderr);	/* print warning to stderr */
	}
	else if (!strncmp (buffer, "551", 3)) /* only given to VRFY */
	{
	    (void) puts (buffer);	/* unusual - leave 551 code in */

	    (void) fputs (buffer, stderr);
	    (void) putc ('\n', stderr);	/* print warning to stderr */
	}
	else if (buffer[0] == '5' && (buffer[1] == '0' || buffer[2] == '0'))
	{				/* 50x syntax err; 550 list vs. user */
	    /*
	     * We try first with EXPN, then VRFY, since EXPN gives more info.
	     * A server which accepts EXPN but not VRFY can give misleading
	     * results for bad addresses.  The only server I have run across
	     * (@lists.psi.com) accepts VRFY but not EXPN, so this is okay.
	     * If some server does accept EXPN but not VRFY, then change this
	     * to retry only for 50x codes.  RFC-821 says that a VRFY on a list
	     * or an EXPN on a user is allowed to return 550, but I've never
	     * seen that happen.
	     */

	    switch (exitstatus)
	    {
	      case 0:			/* first failure; try without @host */
		if (orighost)		/* if we haven't already done so */
		{
		    command = "EXPN %s\r\n";
		    exitstatus = 1;
		    break;
		}
		/* fall through if !orighost */

	      case 1:			/* EXPN doesn't work; try VRFY */
		if (orighost)
		{
		    command = "VRFY %s @
 %s\r\n";
		    exitstatus = 2;
		    break;
		}
		/* fall through if !orighost */

	      case 2:
		command = "VRFY %s\r\n";
		exitstatus = 3;
		break;

	      case 3:
		if (buffer[1] == '5')	/* 55x implies bad username */
		    exitstatus = BADUSER;
		else
		    exitstatus = SMTPERR;

		(void) fputs (buffer, stderr);
		(void) putc ('\n', stderr);
		goto done;		/* we've had enough by now */
	    }
	    
	    (void) fprintf (fout, command, user, orighost);
	    (void) fflush (fout);
	    continue;
	}
	else
	{
	    (void) fputs (buffer, stderr);	/* failed bigtime */
	    (void) putc ('\n', stderr);	/* fputs doesn't add a newline */
	}
	
	if (buffer[3] == ' ')		/* no more responses coming */
	{
	    if (buffer[0] == '2')
		exitstatus = 0;		/* the last try worked */
	    else if (buffer[0] == '5' && buffer[1] == '5')
		exitstatus = BADUSER;	/* 55x implies bad username */
	    else
		exitstatus = SMTPERR;	/* SMTP error */
	    break;
	}
    }

  done:

    /*
     * Close the SMTP connection gracefully.
     */

    (void) fputs ("QUIT\r\n", fout);
    (void) fflush (fout);
    (void) fclose (fout);
    (void) fclose (fin);

    if (fflush (stdout))		/* in case we couldn't write output */
	exitstatus = 1;

    /*
     * And leave this nice program.
     */

    return (exitstatus);
}



Follow-Ups:
References:
Indexed By Date Previous: How do you handle brain dead intermediate sites
From: Michael Rutman <moose!moose @ svcdudes . com>
Next: Re: How to deal with bounces
From: Chris Siebenmann <cks @ hawkwind . utcs . toronto . edu>
Indexed By Thread Previous: Re: How to deal with bounces
From: "Michael H. Morse" <mmorse @ z . nsf . gov>
Next: Re: How to deal with bounces
From: Chris Siebenmann <cks @ hawkwind . utcs . toronto . edu>

Google
 
Search Internet Search www.greatcircle.com