/*  `filter', v. 3.0   Latest mod: 8:15 Apr 19 1995
     A `grep'-like text searcher for multiple simultaneous keyword tests

Copyright 1995 by Joel Polowin
#7-419 Phillip St., Waterloo, Ontario, N2L 3X2   Canada
E-mail: polowin@hyper.com, Joel.Polowin@p8.f279.n221.z1.fidonet.org

Permission granted for free use and distribution;
I want credit/blame for writing it.  
 
If you see something wrong with it or it fails to work, PLEASE let me know!
*/

#define LENGTH 600 /* default max # characters for input lines */
 
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
 
char **myargv;
char *flag,*orflag,*line,*lowline;
int  nostring;

void syntax ()
{
   void cleanup();

   fprintf(stderr,"Syntax: filter [^option ...] [filename ...] string");
   fprintf(stderr," [string ...]\n  where each string is a term to be searched for");
   fprintf(stderr," in lines (default max. %d\n  chars) in file(s)",LENGTH);
   fprintf(stderr," `filename', prefixed by one of the following characters:\n");
   fprintf(stderr,"    +  to show lines which contain string\n");
   fprintf(stderr,"    -  to show lines which do not contain string\n");
   fprintf(stderr,"    =  to show lines which contain string, case sensitive\n");
   fprintf(stderr,"    _  (underscore) to show lines which do not contain string,\n");
   fprintf(stderr,"            case sensitive\n\n");
   fprintf(stderr,"For information about special options, see filter30.doc.\n");
   fprintf(stderr,"A string as above may be further prefixed with the letter 'o' to\n");
   fprintf(stderr,"  print the line if the current OR the preceding condition is true.");
   fprintf(stderr,"\nA string including blanks and the prefix may be enclosed in");
   fprintf(stderr," double quotes.\nStrings beginning with one of `$&^' designate"); 
   fprintf(stderr," file expansion; see filter30.doc.\nExamples:\n filter ^f1 armorial");
   fprintf(stderr," =Vert +argent -gules _Or -azur -purp \"_|B|\" > tempfile.txt\n");
   fprintf(stderr," type temp1.txt | filter +aardvark \"o+winged pig\" ");
   fprintf(stderr,"o+wombat +foil\n\nFilter utility v.3.0 (C) 1995 by Joel");
   fprintf(stderr," Polowin, #7-419 Phillip St., Waterloo,\nOnt., N2L 3X2  Canada.  ");  
   fprintf(stderr,"Permission granted for free use; I want credit/blame\nfor writing");
   fprintf(stderr," it.  polowin@hyper.com, Joel.Polowin@p8.f279.n221.z1.fidonet.org\n");
   cleanup(1);
}
 
void main(argc,argv)
int argc;
char *argv[];
{
   FILE *infile,*outfile;
   int  length=LENGTH;
   int  i,j,k,l,firststring,lowcase;
   int  firstfile,fuzzfact,fuzzcount;
   extern int nostring;
   void syntax();
   void strlow();
   void stringerr();
   char *delim=NULL,*escparse(),*myfgets();
   char test,parse=0,replaceflag=0,*repstring="";
   extern char *flag,*orflag;
   extern char **myargv;
   extern char *line,*lowline;
   char *prefix,*filename,*cptemp;
   char *readmode="r",*writemode="w",*outfilename=NULL;
   char *shorten();
   void cleanup();
 
   firststring=0;
   firstfile=1;
   fuzzfact=0;

   for(i=firstfile; i<argc; i++)   /* Find first search string... */
      {
      if((*argv[i]=='^')&&(i==firstfile))  /* Check for option flag */
	 {
	 switch(*(argv[i]+1))
	    {
	    case 'b':   /* Read/write files in binary mode */
	       readmode="rb";
	       writemode="wb";
	       break;

	    case 'd':   /* A user-defined string delimiter */
	       delim=escparse(argv[i]+2);
	       break;

	    case 'e':   /* parse search terms */
	       parse=1;
	       break;
	    
	    case 'f':   /* A 'fuzz factor' */
	       fuzzfact=atoi(argv[i]+2);
	       break;
	    
	    case 'l':   /* Change length for input lines */
	       length=atoi(argv[i]+2);
	       break;
	    
	    case 'o':   /* Output file name */
	       outfilename=argv[i]+2;
	       break;
	    
	    case 'r':   /* Replace line delim */
	       replaceflag=1;
	       repstring=escparse(argv[i]+2);
	       break;
	    
	    case '^':     /* A filename */
	       shorten(argv[i],1);
	       continue;

	    default:
	       fprintf(stderr,"Unknown option %s.\n\n",argv[i]);
	       fprintf(stderr,"Available options:\n");  
	       fprintf(stderr,"  b           (binary file read)\n");
	       fprintf(stderr,"  d<string>   (input file string delimiter)\n");
	       fprintf(stderr,"  e           (parse search terms for escape chars)\n");
	       fprintf(stderr,"  f<integer>  (term-matching \"fuzz factor\")\n");
	       fprintf(stderr,"  l<integer>  (input line length)\n");
	       fprintf(stderr,"  o<string>   (output file name)\n");
	       fprintf(stderr,"  r<string>   (replace string delim in output)\n");
	       fprintf(stderr,"  ^<string>   (file name actually begins with '^')\n\n");
	       fprintf(stderr,"Example: filter ^l6000 ^b ^d\\f\\n ^r\\n ^f1 ^^odd.doc ");
	       fprintf(stderr,"+refer +manu +silly\n\n"); 
	       cleanup(1);
	    }
	 firstfile++;
	 continue;
	 }
      
      if(*argv[i]=='=' || *argv[i]=='+' || *argv[i]=='-' || *argv[i]=='_')
	{
	test=*argv[i];
	for(j=1; test==*(argv[i]+j); j++) 
	   ;
	if(j%2 && firststring==0)   /* Count identical beginning flags; */
	   firststring=i;           /* even or odd?  Trim off half of them */
	shorten(argv[i],j/2);
	}
      }
 
   if(firststring==0)
      {
      fprintf(stderr,"Must specify a search string.\n");
      syntax();
      }
 
   length++;
   if((NULL==(line=malloc(length))) || (NULL==(lowline=malloc(length))))
	stringerr("input line storage");
   
   nostring=argc-firststring;
   if((NULL==(myargv=calloc((nostring+1),sizeof(char *)))) ||
      (NULL==(flag=calloc((nostring+1),1))) ||
      (NULL==(orflag=calloc((nostring+1),1))))
	stringerr("storage");
 
   lowcase=0;
   
   outfile=stdout;
   if((outfilename) && !(outfile=fopen(outfilename,writemode)))
      {
      fprintf(stderr,"Can't open file %s for output.\n",outfilename);
      cleanup(1);
      }
   
   j=0;
   for (i=firststring; i<argc; i++)
      {
      if (NULL==(myargv[++j]=malloc(strlen(argv[i])+1)))
	 stringerr("storage");
      strcpy(myargv[j],argv[i]);
      }
 
   for (i=1; i<=nostring; i++)
   {
      j=0;
      switch(*myargv[i])
      {
	case '+':
	   strlow(myargv[i]);
	   lowcase=1;
	case '=':
	   flag[i]=1;
	   break;
	case '-':
	   strlow(myargv[i]);
	   lowcase=1;
	case '_':
	   flag[i]=0;
	   break;
	case 'O':
	case 'o':
	   orflag[i-1]=1;
	   shorten(myargv[i],1);
	   i--;
	   j=1;
	   break;
	default:
	   fprintf(stderr,"Error in string no. %d: %s\n",i-1,myargv[i]);
	   syntax();
      }
 
   if((!j) && (((test=*(myargv[i]+1))=='$') || (test=='&') || (test=='^')))
      {
      for(j=1; test==*(myargv[i]+1+j); j++)
      ;  /* count identical flag chars */
 
      l=j/2;                   /*shift string to delete half */
      shorten(myargv[i]+1,l);      /* of flag chars */

      if(j%2) /* an odd number of flag chars: expand file */
	{
	if(test=='^') 
	   {
	   prefix="";
	   orflag[i-1]=0;
	   }
	else
	   {
	   switch(*myargv[i])   /* determine prefix for expanded terms */
	      {                 /* from current prefix and expansion type */
	      case '+':
		 if(test=='$') prefix="o+";
		 else prefix="+";
		 break;
	      case '=':
		 if(test=='$') prefix="o=";
		 else prefix="=";
		 break;
	      case '-':
		 if(test=='$') prefix="-";
		 else prefix="o-";
		 break;
	      case '_':
		 if(test=='$') prefix="_";
		 else prefix="o_";
		 break;
	      default:
		 fprintf(stderr,"Bugger-up in program!\n");
		 cleanup(1);
	      }
	   }
	
	filename=myargv[i]+2;
 
	if(!(infile=fopen(filename,readmode)))
	   {
	   fprintf(stderr,"Can't open search-term file %s\n",filename);
	   cleanup(1);
	   }
 
	if(test!='^') test=*(myargv[i]);  /* prefix of current term */
 
	for(j=0;;j++)   /* count new terms */
	   {
	   if(NULL==fgets(line,length,infile)) break;
	   if(length==strlen(line)+1) fprintf(stderr,
	     "* Warning: truncated search term file %s line\n%s\n",
	     filename,line);
	   }
 
	if (j==0)
	   {
	   fprintf(stderr,"* Warning: empty search term file %s\n",
	      filename);
	   j=1;
	   }
 
	rewind(infile);

	if(NULL==(cptemp=realloc(myargv,(nostring+j)*sizeof(char *))))
	   stringerr("file expansion");
	myargv=(char **)cptemp;
	
	if(NULL==(cptemp=realloc(flag,(nostring+j))))
	   stringerr("file expansion");
	flag=cptemp;

	if(NULL==(cptemp=realloc(orflag,(nostring+j))))
	   stringerr("file expansion");
	orflag=cptemp;
	for(k=0;k<j;k++)
	   orflag[nostring+k]=0;

	if(i<nostring)
	   for(k=nostring; k>i; k--)      /* shift old myargv to make room */
	   {
	   myargv[k+j-1]=myargv[k];
	   myargv[k]=NULL;
	   }
	nostring+=j-1;
 
	for(k=0;k<j;k++)
	   {
	   fgets(line,length,infile);
	   if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
	   if(k==0)
	      {
	      if(NULL==(cptemp=realloc(myargv[i],strlen(line)+2)))
		 stringerr("file expansion");
	      myargv[i]=cptemp;
	      if(test=='^') strcpy(myargv[i],line);
	      else
		 {
		 *(myargv[i])=test;
		 strcpy(myargv[i]+1,line);
		 }
	      }
	   else
	      {
	      if(NULL==(cptemp=malloc(strlen(line)+strlen(prefix)+1)))
		 stringerr("file expansion");
	      myargv[i+k]=cptemp;
	      strcpy(myargv[i+k],prefix);
	      strcat(myargv[i+k],line);
	      }
	   }
 
	fclose(infile);
	i--;
	}
      }
   if(parse) escparse(myargv[i]);  /* parse for escape chars */
   }

   if(!delim) replaceflag=0;
   
   k=firstfile-1;
   do
      {
      k++;
      if(firststring==firstfile) infile=stdin;
      else
	if(!(infile=fopen(argv[k],readmode)))
	   {
	   fprintf(stderr,"Can't open file %s for reading.\n",argv[k]);
	   syntax();
	   }
      l=0;
 
      for (;;)
	{
	if(delim)
	   {
	   if(NULL==myfgets(line,length,infile,delim,replaceflag)) break;
	   }
	else
	   if(NULL==fgets(line,length,infile)) break;
	if(length==strlen(line)+1) fprintf(stderr,"* Warning: truncated line \n%s\n",line);
	if(lowcase) strlow(strcpy(lowline,line));
	fuzzcount=0;
	for(i=1; i<=nostring; i++)
	   {
	   test=0;
	   switch(*myargv[i])
	      {
	      case '=':
	      case '_':
		 if(NULL!=strstr(line,(myargv[i]+1))) test=1;
		 break;
	      default:
		 if(NULL!=strstr(lowline,(myargv[i]+1))) test=1;
		 break;
	      }
	   if((test^flag[i]) && !(orflag[i]))
	      if (++fuzzcount>fuzzfact) break;
	   if(!(test^flag[i]))
	      while(orflag[i]) i++;
	   }
	if(i>nostring)
	   {
	   if(!(l) && firststring-1>firstfile)
	      {
	      fprintf(outfile,"File %s:\n",argv[k]);
	      l=1;
	      }
	   if(replaceflag)
	      fprintf(outfile,"%s%s",line,repstring);
	   else
	      fprintf(outfile,"%s",line);
	   }
	}
      if(infile!=stdin) fclose(infile);
      }
   while(k<firststring-1);
   if(outfile!=stdout) fclose(outfile);
   cleanup(0);
}
 
void strlow(string)
char *string;
{
   while (*string!='\0')
   {
      *string=tolower(*string);
      string++;
   }
}
void stringerr(string) 
char *string;
{
   void cleanup();

   fprintf(stderr,"Can't allocate memory for string %s.\n",string);
   cleanup(1);
}

void cleanup(errval)
int  errval;
{
   extern char **myargv;
   extern char *flag,*orflag,*line,*lowline;
   extern int nostring;
   int i;

   for(i=1; i<=nostring; i++)
      if(myargv[i]!=NULL) free(myargv[i]);
   if(orflag!=NULL) free(orflag);
   if(flag!=NULL) free(flag);
   if(line!=NULL) free(line);
   if(lowline!=NULL) free(lowline);
   exit(errval);
}
char *shorten(string,headsize)
char *string;
int headsize;
{
   char *point;
   point=string+headsize-1;
   do
      {
      point++;
      *(point-headsize)=*point; 
      }
   while (*point!='\0');
   return(string);
}


char *escparse(string)
char *string;
{
   char *place;
   char *shorten();
   void  syntax();
   int   digit;
   char  new;
   char *escin="'\"?\\abfnrtv";
   char *escout="'\"?\\\a\b\f\n\r\t\v";

   place=string-1;
   while(*(++place)!='\0')
      {
      if(*place!='\\')
	 continue;

      new=*(shorten(place,1));
      if(new=='\0') break;

      if(strchr(escin,new)!=NULL)
	 {
	 *place=*(escout+(strchr(escin,new)-escin));
	 continue;
	 }

      if(new=='x'|| new=='X')
	 {
	 sscanf(place+1,"%x",&digit);
	 while(isxdigit(*(place+1)))
	    shorten(place+1,1);
	    
	 if(digit<256)
	    *place=digit;
	 else
	    {
	    fprintf(stderr,"Error in hex digit in delimiter string.\n");
	    syntax();
	    }
	 continue;
	 }

      if(isdigit(new) && new!='8' && new!='9')
	 {
	 sscanf(place,"%o",&digit);
	 while(isdigit(new=*(place+1)) && new!='8' && new!='9')
	    shorten(place+1,1);
	    
	 if(digit<256)
	    *place=digit;
	 else
	    {
	    fprintf(stderr,"Error in octal digit in delimiter string.\n");
	    syntax();
	    }
	 continue;
	 }
      }
      return(string);
}

char *myfgets(string,length,infile,delim,replaceflag)
char string[];
int  length;
FILE *infile;
char delim[];
char replaceflag;
{
   int i,j=0,fred;

   length--;
   for(i=0;i<length;i++)
      {
      if(EOF==(fred=fgetc(infile)))
	 {
	 string[i]='\0';
	 if(i==0)
	    return(NULL);
	 else
	    return(string);
	 }
      string[i]=fred;
      if(delim[j]==fred)
	 {
	 if(strlen(delim)<=++j)
	    {
	    string[i+1]='\0';
	    if(replaceflag)
	       string[i+1-j]='\0';
	    return(string);
	    }
	 else
	    continue;
	 }
      else
	 j=0;
      }
   string[length]='\0';
   return(string);
}
 
/*  Revision history:
 
Version 1.0 September 1992.
 
1.1 Sep '92 fixed minor bugs
1.2 Sep '92 added 'or'-linking to keywords
1.4 Oct '92 fixed a minor error in string lengths, added size DEFINEs
1.5 Jan '94 increased string lengths, fixed a Stupid Newbie Error re:
	    assumption that *argv[] was writeable
2.0 Feb '94 added search-term file expansion and multiple text file
	    capability, including wildcards when system permits
2.1 Jun '94 added null-prefix search-term files and term-file `rewind'
2.2 Feb '95 added "fuzzy searching", removed fixed constraints on number
	    of search terms, fixed the argv[] thing *properly*
3.0 Apr '95 added generalized options: binary files, delimiters, fuzzy 
	    searching, line lengths, output file

*/
