/* pdbdist.c
 * PDB Residue Distance Calculation
 * Roger Sayle & Mansoor Saqi
 * January 1996
 * Version 1.2
 */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <math.h>

#ifndef True
#define True  1
#define False 0
#endif

#define HetAtmFlag  0x01
#define ChnBrkFlag  0x02
#define LigandFlag  0x04

typedef struct AtomRecord {
        unsigned short atmserno;
        unsigned short resserno;
        float occupancy;
        float bfactor;
        float x,y,z;
        char atmname[4];
        char resname[3];
        char insert;
        char altloc;
        char chain;
        char flag;
    } AtomRecord;


#define MAXATM      10000
static AtomRecord Atom[MAXATM];
static int AtomCount = 0;

static int LigResFlag;
static char LigResName[4];
static char LigResChain;
static int LigResNo;

static char Buffer[82];
static char *PDBName;
static char *OutName;
static FILE *PDBFile;
static FILE *OutFile;


static int ReadLine()
{
    register char *ptr;
    register int len;
    register int ch;

    if( feof(PDBFile) )
    {   *Buffer = 0;
        return( False );
    }

    ptr = Buffer;
    do {
        ch = getc(PDBFile);
        if( (ch=='\n') || (ch==EOF) )
        {   *ptr = 0;
            return( True );
        } else if( ch == '\r' )
        {   ch = getc(PDBFile);
            if( ch != '\n' )
                ungetc(ch,PDBFile);
            *ptr = 0;
            return( True );
        } else *ptr++ = ch;
    } while( ptr < Buffer+80 );
    *ptr = 0;

    /* skip to the end of the line! */
    do { ch = getc(PDBFile);
    } while( (ch!='\n') && (ch!='\r') && (ch!=EOF) );

    if( ch == '\r' )
    {   ch = getc(PDBFile);
        if( ch != '\n' )
            ungetc(ch,PDBFile);
    }
    return( True );
}


static float ReadPDBFloat( begin, len )
{
    static char temp[12];
    static float result;
    register int i;

    for( i=0; i<len; i++ )
        temp[i] = Buffer[i+begin];
    temp[i] = '\0';

    result = 0.0;
    sscanf(temp,"%g",&result);
    return( result );
}

static int ReadPDBInteger( begin, len )
{
    static int i,result;
    register char *ptr;
    register int ch;

    result = 0;
    ptr = &Buffer[begin];
    for( i=0; i<len; i++ )
        if( isdigit(ptr[i]) )
            result = (10*result) + (ptr[i]-'0');
    return( result );
}

static void ProcessPDBAtom( heta, brk )
    int heta, brk;
{
    register AtomRecord *ptr;
    register int i;

    if( AtomCount == MAXATM )
    {   fputs("Error: Too many atoms in PDB File!\n",stderr);
        exit(1);
    }

    ptr = &Atom[AtomCount];
    ptr->atmserno = ReadPDBInteger(6,5);
    ptr->resserno = ReadPDBInteger(22,4);

    ptr->occupancy = ReadPDBFloat(54,6);
    ptr->bfactor = ReadPDBFloat(60,6);

    ptr->x = ReadPDBFloat(30,8);
    ptr->y = ReadPDBFloat(38,8);
    ptr->z = ReadPDBFloat(46,8);

    ptr->altloc = Buffer[16];
    ptr->insert = Buffer[26];
    ptr->chain = Buffer[21];

    for( i=0; i<4; i++ )
        ptr->atmname[i] = Buffer[i+12];

    for( i=0; i<3; i++ )
        ptr->resname[i] = Buffer[i+17];

    ptr->flag = 0;
    if( heta ) ptr->flag |= HetAtmFlag;
    if( brk )  ptr->flag |= ChnBrkFlag;
    AtomCount++;
}


static void ProcessPDBFile()
{
    static int brk;

    brk = False;
    while( ReadLine() )
    {   if( !strncmp(Buffer,"ATOM",4) )
        {   ProcessPDBAtom(False,brk);
            brk = False;
        } else if( !strncmp(Buffer,"HETA",4) )
        {   ProcessPDBAtom(True,brk);
            brk = False;
        } else if( !strncmp(Buffer,"TER ",4) ||
                   !strncmp(Buffer,"END ",4) )
        {   brk = True;
        } else if( !strncmp(Buffer,"ENDM",4) )
            break;
    }
}


/* Filename extensions! */
#define MaxFileExt  3
static char *FileExt[] = { ".Z", ".gz", ".z" };


static void ReadPDBFile()
{
    static char command[180];
    static char buffer[120];
    register char *src,*dst;
    register char *ptr;
    register int comp;
    register int i;


    if( strcmp(PDBName,"-") )
    {   if( !(PDBFile=fopen(PDBName,"r")) )
        {   src = PDBName; ptr = buffer;
            while( *src ) *ptr++ = *src++;

            for( i=0; i<MaxFileExt; i++ )
            {   dst = ptr; src = FileExt[i];
                while( *dst++ = *src++ );
                if( (PDBFile=fopen(buffer,"r")) )
                    break;
            }
        } else strcpy(buffer,PDBName);

        if( !PDBFile )
        {   fprintf(stderr,"Error: Unable to open PDB file %s!\n",PDBName);
            exit(1);
        }
    } else PDBFile = stdin;


    i = getc(PDBFile);
    if( (PDBFile!=stdin) && (i==0x1f) )
    {   i = getc(PDBFile);
        fclose(PDBFile);

        if( i == 0x9d )
        {   sprintf(command,"uncompress -c %s 2> /dev/null\n",buffer);
        } else if( i == 0x8b )
        {   sprintf(command,"gzip -cdq %s 2> /dev/null\n",buffer);
        } else /* bad magic number! */
        {   fputs("Error: Unknown data compression format!\n",stderr);
            exit(2);
        }

        comp = True;
        if( !(PDBFile=popen(command,"r")) )
        {   fputs("Error: Unable to uncompress PDB file!\n",stderr);
            exit(3);
        }

    } else /* Uncompressed! */
    {   ungetc(i,PDBFile);
        comp = False;
    }

    ProcessPDBFile();

    if( comp )
    {  pclose(PDBFile);
    } else if( PDBFile != stdin )
       fclose(PDBFile);
}


static int IsSolvent( ptr )
    AtomRecord *ptr;
{
    switch( ptr->resname[0] )
    {   case('D'):  return( !strncmp(ptr->resname,"DOD",3) ||
                            !strncmp(ptr->resname,"D20",3) );

        case('H'):  return( !strncmp(ptr->resname,"HOH",3) ||
                            !strncmp(ptr->resname,"H20",3) );

        case('S'):  return( !strncmp(ptr->resname,"SOL",3) ||
                            !strncmp(ptr->resname,"SO4",3) ||
                            !strncmp(ptr->resname,"SUL",3) );

        case('W'):  return( !strncmp(ptr->resname,"WAT",3) );
        case('T'):  return( !strncmp(ptr->resname,"TIP",3) );
        case('P'):  return( !strncmp(ptr->resname,"P04",3) );
        default:    return( False );
    }
}


static int MatchWildName( res, mask )
    char *res;  char *mask;
{
    static char name[4];
    register int i,left;
    register char *ptr;

    ptr = name;
    for( i=0; i<3; i++ )
        if( *res != ' ' )
        {   *ptr++ = *res++;
        } else res++;
    *ptr = '\0';

    ptr = name;
    while( *mask )
    {   if( *ptr == *mask )
        {   ptr++;  mask++;
        } else if( *mask == '?' )
        {   if( !(*ptr++) ) 
                return( False );
            mask++;
        } else return( False );
    }
    return( !*ptr );
}



static int IsLigand( ptr )
    AtomRecord *ptr;
{
    if( LigResFlag )
    {   if( LigResNo && (LigResNo!=ptr->resserno) )
            return( False );
        if( LigResChain && (LigResChain!=ptr->chain) )
            return( False );
        if( LigResName[0] )
        {   return( MatchWildName(ptr->resname,LigResName) );
        } else return( True );
    } else return( (ptr->flag&HetAtmFlag) && !IsSolvent(ptr) );
}


static int DetermineLigand()
{
    register AtomRecord *ptr;
    register int count;
    register int i;

    count = 0;
    ptr = Atom;
    for( i=0; i<AtomCount; i++ )
    {   if( IsLigand(ptr) )
        {   ptr->flag |= LigandFlag;
            count++;
        }
        ptr++;
    }

    if( count>1 )
    {   fprintf(stderr,"%d ligand atoms found!\n",count);
    } else if( count == 1 )
    {   fputs("1 ligand atom found!\n",stderr);
    } else fputs("No ligand atoms found!\n",stderr);
    return( count );
}


static int IsCarbon( ptr )
    AtomRecord *ptr;
{
    return( (ptr->atmname[1]=='C') &&
            (ptr->atmname[0]==' ') );
}


static int DisplayRasMolAtom( ptr )
    AtomRecord *ptr;
{
    register int cols;
    register int flag;
    register int i;

    cols = 0;

    flag = False;
    for( i=0; i<3; i++ )
        if( (ptr->resname[i]!=' ') && !isalpha(ptr->resname[i]) )
            flag = True;

    if( flag )
    {   fputc('[',stderr);
        for( i=0; i<3; i++ )
            if( ptr->resname[i]!=' ' )
            {   fputc(ptr->resname[i],stderr);
                cols++;
            }
        fputc(']',stderr);
        cols+=2;
    } else
        for( i=0; i<3; i++ )
            if( ptr->resname[i]!=' ' )
            {   fputc(ptr->resname[i],stderr);
                cols++;
            }

    cols += fprintf(stderr,"%d",ptr->resserno);
    if( isdigit(ptr->chain) )
    {   fprintf(stderr,":%c",ptr->chain);
        cols += 2;
    } else if( ptr->chain != ' ' )
    {   fputc(ptr->chain,stderr);
        cols++;
    }

    cols++;
    fputc('.',stderr);
    for( i=0; i<4; i++ )
        if( ptr->atmname[i]!=' ' )
        {   fputc(ptr->atmname[i],stderr);
            cols++;
        }
    return( cols );
}


static double FindCloseContacts( src )
    AtomRecord *src;
{
    register AtomRecord *dst;
    register double dist,best;
    register double dx,dy,dz;
    register int i,j,init;

    dst = Atom;
    init = False;
    for( i=0; i<AtomCount; i++ )
    {   if( dst->flag & LigandFlag )
        {   dx = dst->x - src->x;
            dy = dst->y - src->y;
            dz = dst->z - src->z;
            dist = dx*dx + dy*dy + dz*dz;

            /* Contacts */
            if( (dist<12.96) && (dist>1.0) )
            {   if( !IsCarbon(src) && !IsCarbon(dst) )
                {   for( j=DisplayRasMolAtom(dst); j<15; j++ )
                        fputc(' ',stderr);
                    fprintf(stderr,"%5d ",dst->atmserno);

                    for( j=DisplayRasMolAtom(src); j<15; j++ )
                        fputc(' ',stderr);
                    fprintf(stderr,"%5d ",src->atmserno);
                    fprintf(stderr," %g\n",sqrt(dist));
                }
            }

            if( !init || (dist<best) )
            {   best = dist;
                init = True;
            }
        }
        dst++;
    }
    if( init )
    {   dist = sqrt(best);
        if( dist > 999.99 )
            dist = 999.99;
    } else dist = 0.0;
    return( dist );
}
    

static void CalculateDistance()
{
    register AtomRecord *ptr;
    register int i;

    ptr = Atom;
    for( i=0; i<AtomCount; i++ )
    {   if( !(ptr->flag&LigandFlag) )
        {   ptr->bfactor = FindCloseContacts(ptr);
        } else ptr->bfactor = 0.0;
        ptr++;
    }
}


static void WritePDBFile()
{
    register AtomRecord *ptr;
    register int i;

    if( OutName && strcmp(OutName,"-") )
    {   if( !(OutFile=fopen(OutName,"w")) )
        {   fprintf(stderr,"Error: Unable to open output file %s!\n",OutName);
            exit(1);
        }
    } else OutFile = stdout;

    ptr = Atom;
    for( i=0; i<AtomCount; i++ )
    {   if( i && (ptr->flag&ChnBrkFlag) )
            fputs("TER\n",OutFile);

        if( ptr->flag & HetAtmFlag )
        {      fputs("HETATM",OutFile);
        } else fputs("ATOM  ",OutFile);
        fprintf(OutFile,"%5d %.4s%c%.3s %c%4d%c   %8.3f%8.3f%8.3f%6.2f%6.2f\n",
                        ptr->atmserno, ptr->atmname, ptr->altloc,
                        ptr->resname, ptr->chain, ptr->resserno,
                        ptr->insert, ptr->x, ptr->y, ptr->z,
                        ptr->occupancy, ptr->bfactor );
        ptr++;
    }
    fputs("END\n",OutFile);

    if( OutFile != stdout )
        fclose(OutFile);
}


static void LigandSyntaxError()
{
    fputs("Error: Syntax error in ligand specification!\n",stderr);
    exit(1);
}


/* Avoid problems on Sun386i! */
static int ToUpper( ch )
    int ch;
{
    if( islower(ch) )
    {   return( toupper(ch) );
    } else return( ch );
}


static void ParseLigand( ptr )
    char *ptr;
{
    register int ch,i;
    register int neg;

    LigResName[0] = '\0';
    LigResChain = '\0';
    LigResNo = 0;

    ch = *ptr++;
    if( ch != ':' )
    {   /* Parse Residue Name */
        if( ch != '*' )
        {   if( ch == '[' )
            {   i = 0;
                while( (ch = *ptr++) != ']' )
                    if( ch && (i<3) )
                    {   LigResName[i++] = ToUpper(ch);
                    } else LigandSyntaxError();
                ch = *ptr++;
            } else
                for( i=0; i<3; i++ )
                    if( isalpha(ch) )
                    {   LigResName[i] = ToUpper(ch);
                        ch = *ptr++;
                    } else if( (ch=='?') || (ch=='%') )
                    {   LigResName[i] = '?';
                        ch = *ptr++;
                    } else break;

            if( i ) 
            {   LigResName[i] = '\0';
            } else LigandSyntaxError();
        } else ch = *ptr++;

        /* Parse Residue Number */
        if( ch != '*' )
        {   if( ch == '-' )
            {   ch = *ptr++;
                neg = True;
            } else neg = False;

            if( isdigit(ch) )
            {   LigResNo = ch-'0';
                while( isdigit(*ptr) )
                    LigResNo = 10*LigResNo + (*ptr++)-'0';
                ch = *ptr++;
            } else if( neg )
                LigandSyntaxError();
        } else ch = *ptr++;
    }

    /* Parse Chain Ident */
    if( ch == ':' )
        ch = *ptr++;

    if( isalnum(ch) )
    {   LigResChain = ToUpper(ch);
        ch = *ptr++;
    } else if( (ch=='?') || (ch=='%') || (ch=='*') )
        ch = *ptr++;

    if( ch ) LigandSyntaxError();
}


static void DisplayUsage()
{
    fputs("usage: pdbdist [-ligand <resname>] <pdbfile> [outfile]\n",stderr);
    exit(1);
}


static void ProcessCommandLine( argc, argv )
    int argc;  char *argv[];
{
    register int i,j;

    LigResFlag = False;

    j = 0;
    for( i=1; i<argc; i++ )
        if( argv[i][0] == '-' )
        {   if( !strcmp(argv[i],"-ligand") )
            {   if( ++i == argc ) DisplayUsage();
                ParseLigand(argv[i]);
                LigResFlag = True;
            } else DisplayUsage();

        } else switch( j++ )
        {   case(0): PDBName = argv[i];  break;
            case(1): OutName = argv[i];  break;
            default: DisplayUsage();
        }

    if( j<1 )
        DisplayUsage();
}


int main( argc, argv )
    int argc;  char *argv[];
{
    fputs("PDBdist contact calculation\n",stderr);
    fputs("R.Sayle & M.Saqi, January 1996\n",stderr);
    fputs("Version 1.2\n\n",stderr);

    ProcessCommandLine(argc,argv);

    ReadPDBFile();
    if( DetermineLigand() )
    {   CalculateDistance();
        WritePDBFile();
    }
    return(0);
}

Dr. Mansoor Saqi                    Email mass15599@ggr.co.uk
Bioinformatics Group                Phone +44 (0)81 966 2417
Dept. of Biomolecular Structure
Glaxo Group Research
Greenford, Middlx, UK.

