/* ifile - intelligent mail filter for EXMH/MH
   ifile is Copyright (C) 1997  Jason Rennie <jr6b+@andrew.cmu.edu>

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program (see file 'COPYING'); if not, write to the Free
   Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA  02111-1307, USA.
   */
  
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/sem.h>

#include <time.h>
#include <ifile.h>        /* standard ifile library */

#define SEMKEY  10439838

int semid;
struct sembuf sops;

arguments args;
extern struct argp argp;
int msgs_read;        /* number of messages actually read in */

/* variables for keeping track of time/speed of ifile */
clock_t DMZ_start, DMZ_end, DMZ2_start;

/* ifilter specific function prototypes */
int cmp(const void *e1, const void *e2);

/* Main program */
/* written by Jason Rennie <jr6b+@andrew.cmu.edu> */
int 
main (int argc, char **argv)
{
  char *data_file = NULL;   /* full path of idata file */
  char *home_dir = NULL;    /* full path of user's home directory */
  FILE *MSG = NULL;         /* file pointer for a message */
  category_rating * ratings;
  ifile_db idata;
  htable ** message = NULL;
  int i;
  int db_read_result = 0, db_write_result = 0;
  char *file_name;
  int trimmed_words;

  /* Harry's semaphore stuff to protect two ifile jobs from stepping
   * on each other */
  /* Find the Semaphore id */
  if ((semid = semget(SEMKEY, 1, 0666)) < 0)
    if ((semid = semget(SEMKEY, 1, 0666|IPC_CREAT|IPC_EXCL)) < 0)
      {
	perror("semget");
	exit (-1);
      }

  /* Wait for Semaphore to clear */
  sops.sem_num = 0;
  sops.sem_op = 0;
  sops.sem_flg = 0;

  if (semop(semid, &sops, 1))
    {
      perror("semop");
      exit (-1);
    }
  
  /* Set the Semaphore to clear on exit */
  sops.sem_num = 0;
  sops.sem_op = 1;
  sops.sem_flg = SEM_UNDO;

  if (semop(semid, &sops, 1))
    {
      perror("semop");
      exit (-1);
    }
  
  ifile_init_args(&args);
  argp_parse (&argp, argc, argv, 0, 0, &args);
  
  ifile_verbosify(ifile_verbose, "%d file(s) passed\n", args.num_files);
  for (i=0; i < args.num_files; i++)
    ifile_verbosify(ifile_verbose, "file #%d: %s\n", i,
		    EXT_ARRAY_GET(args.file, char *, i));

  /* Get home directory */
  home_dir = getenv("HOME");
  if (home_dir == NULL)
    ifile_error("Fatal: HOME environment variable not defined!\n");
  ifile_verbosify(ifile_verbose, "home directory = %s\n", home_dir);

  /* Get the database file name */
  if (args.db_file != NULL)
    data_file = ifile_strdup (args.db_file);
  else
    data_file = ifile_sprintf("%s/%s", home_dir, DEFAULT_DB_FILE);

  /* remove the .idata file if requested */
  if (args.reset_data)
    {
      ifile_verbosify(ifile_progress, "Removing %s...\n", data_file);
      system(ifile_sprintf("rm %s", data_file));
    }

  ifile_db_init(&idata);
  ifile_open_log(argc, argv);
  ifile_default_lexer_init();

  /* argument variables that still need to be handled:
   * skip_header, minus_folder, plus_folder */

  /* read and lex the message(s) */
  if (args.read_message == TRUE)
    {
      msgs_read = 0;
      DMZ_start = clock();
      if (args.num_files > 0)
	{
	  ifile_verbosify(ifile_progress, "Reading messages...\n");
	  message = (htable **) malloc(args.num_files*sizeof(htable *));
	  for (i=0; i < args.num_files; i++)
	    {
	      file_name = EXT_ARRAY_GET(args.file, char *, i);
	      MSG = fopen(file_name, "r");
	      if (MSG == NULL)
		{
		  ifile_verbosify(ifile_quiet,
				  "Not able to open %s!  No action taken.\n",
				  file_name);
		  message[i] = NULL;
		}
	      else
		{
		  message[i] = ifile_read_message(MSG);
		  if (args.occur == TRUE)
		    ifile_bitify_document(message[i]);
		  if (message[i] != NULL)
		    msgs_read++;
		}
	      if (args.verbosity >= ifile_debug || args.print_tokens)
		ifile_print_message(message[i]);
	      fclose(MSG);
	    }
	}
      else
	{
	  message = (htable **) malloc(sizeof(htable *));
	  ifile_verbosify(ifile_quiet, "Reading message from standard input...\n");
	  message[0] = ifile_read_message(stdin);
	  msgs_read++;
	  if (args.verbosity >= ifile_debug || args.print_tokens)
	    ifile_print_message(message[0]);
	  args.num_files = 1;
	}
      DMZ_end = clock();
      ifile_verbosify(ifile_progress,
		      "Read %d messages.  Time used: %.3f sec\n", msgs_read,
		      ((float)(DMZ_end-DMZ_start))/CLOCKS_PER_SECOND);
    }

  /* Don't do anything else if we are printing tokens */
  if (args.print_tokens)
    exit (0);

  /* Now read the idata database */
  if (args.read_db == TRUE)
    db_read_result = ifile_read_db(data_file, &idata);

  /* Do LOOCV queries if requested */
  if (args.loocv_folder != NULL)
    for (i=0; i < args.num_files; i++)
      {
	ifile_del_db(args.loocv_folder, message[i], &idata);
	ratings = ifile_rate_categories(message[i], &idata);
	qsort(ratings, idata.num_folders, sizeof(category_rating), cmp);
	ifile_print_ratings(stdout, ratings, &idata);
	ifile_free(ratings);
	ifile_add_db(args.loocv_folder, message[i], &idata);
      }

  /* if a query was requested, make the calculations and output the results */
  if (args.query == TRUE)
    {
      for (i=0; i < args.num_files; i++)
	if (message[i] != NULL)
	  {
	    ratings = ifile_rate_categories(message[i], &idata);
	    qsort(ratings, idata.num_folders, sizeof(category_rating), cmp);
	    ifile_print_ratings(stdout, ratings, &idata);
	    if (args.query_insert)
	      ifile_add_db(ratings[0].category, message[i], &idata);
	    ifile_free(ratings);
	  }
    }

  if (args.write_db == TRUE)
    {
      if (args.plus_folder != NULL)
	for (i=0; i < args.num_files; i++)
	  if (message[i] != NULL)
	    ifile_add_db(args.plus_folder, message[i], &idata);

      if (args.minus_folder != NULL)
	for (i=0; i < args.num_files; i++)
	  if (message[i] != NULL)
	    ifile_del_db(args.minus_folder, message[i], &idata);

      if ((args.plus_folder != NULL || args.query_insert == TRUE) &&
	  args.minus_folder == NULL)
	{
	  trimmed_words = ifile_age_words(&idata, msgs_read);
	  ifile_verbosify(ifile_progress,
			  "Trimmed %d words due to lack of frequency\n",
			  trimmed_words);
	}

      db_write_result = ifile_write_db(data_file, &idata);
      if (db_read_result != 0 && db_write_result == 0)
	{
	  ifile_verbosify(ifile_quiet, "Created new %s file.\n", data_file);
	  /* set proper permissions */
	  system(ifile_sprintf("chmod 0600 %s\n", data_file));
	} 
    }

  ifile_close_log();
  
  return 0;
}


/* a comparison function for sorting */
/* Written by Jason Rennie <jr6b+@andrew.cmu.edu> for ifile */
int cmp (const void *e1, const void *e2)
{
  if (((category_rating *)e1)->rating > (((category_rating *)e2)->rating))
    return -1;
  else if (((category_rating *)e1)->rating < (((category_rating *)e2)->rating))
    return 1;
  else
    return 0;
}



