/*
 * $Id: ds.c,v 1.2 2004/10/30 13:38:23 mark Exp $
 *
 * NAME
 *      ds - set of routines to perform search operations on UNIX
 *      dictionary format file (i.e. ordered list of words in a flat
 *      text file).
 *
 * SYNOPSIS
 *      FILE *opendict(char *filename,int ignorecase)
 *         Opens dictionary file specified as argument.  Returns FILE
 *         pointer if successful, NULL otherwise.  If a dictionary
 *         file is currently open, it will be closed prior to opening
 *         the newly specified dictionary.  If nocase is set TRUE,
 *         the dictionary is assumed to be sorted ignoring case.
 *     int findentry(char *entry)
 *         Locates string pointed to by entry in the open dictionary file.
 *         Returns byte offset at which string was located, or negative
 *         value if string cannot be found.
 *     int setposition(int offset)
 *         Sets current position of file to offset bytes from file beginning.
 *         Returns TRUE if successful, FALSE otherwise.
 *     int getentry(char *str)
 *         Returns string found at the current position in the
 *         dictionary in str.  Str is empty string if error occurs.
 *         Also returns the file position at which the string was read.
 *         Getentry may be called repeatedly to return all strings in
 *         the dictionary.
 *
 * DESCRIPTION
 *     Provides a mechanism for searching dictionaries organised as an
 *     ordered list of newline separated words.  Fast searching is provided
 *     by a binary search algorithm.
 *
 * NOTES
 *
 * MODIFICATION HISTORY
 * Mnemonic    Date    Rel Who
 * DICT        01Oct22 1.0 mpw
 *     Written.
 *
 * Copyright (C) 2001, 2004 Mark Willson.
 *
 *
 * This file is part of the maf program.
 *
 * The maf program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * The maf program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <stdio.h>
#include <sys/stat.h>
#include <string.h>
#include "strutil.h"
#include "ds.h"

#define TRUE 1
#define FALSE 0
#define BUFSZ 80

 /*
  * Define the field name which contains the file descriptor in the
  * FILE structure.
  */

#ifdef __linux__
     #define FILENO _fileno
#else
     #define FILENO _file
#endif

static FILE *dict = NULL;   // dictionary file pointer
static int filesize;        // number of bytes in dictionary file
static char buf[BUFSZ+1];   // buffer to hold bytes read from dict file
static int ignorecase = FALSE;  // dict file is sorted ignoring case if TRUE

/*  -------------------------------------------------------------------------
 *  opendict will open the file specified in filename.  On successful open,
 *  the file size will be retained for future search operations.
 *  The nocase parameter should be set to TRUE if the dictionary
 *  file is sorted ignoring case (as is the default UNIX dictionary).
 *  The default is to assume the dictionary file is sorted respecting
 *  case.
 *
 *  The FILE pointer is returned if successful, NULL otherwise.
 *
 *  If a dictionary file is already open, it is closed silently.
 *
 */

FILE* opendict(char *filename,int nocase)
{
    struct stat stat_data;

    if (dict != NULL)
        fclose(dict);

    dict = fopen(filename,"rb");
    if (dict != NULL) {
        if (fstat(dict->FILENO,&stat_data)==0) {
            filesize = stat_data.st_size;
        }
        else {
            fclose(dict);
            dict = NULL;
        }
    }
    ignorecase = nocase;
    return(dict);
}

/*  -------------------------------------------------------------------------
 *  findentry will search the opened dictionary file for the string passed
 *  in the entry argument.  If found, it will return the byte offset where
 *  the string was located.
 *
 *  If the entry cannot be found in the file, -1 is returned.
 *
 *  Other errors return negative integers:
 *      -2 : internal buffer too small to contain a word
 *      -3 : fseek error
 */

int findentry(char *entry)
{
    int low,            // low point of probe area
        high,           // high point of probe area
        mid,            // mid probe point
        quit,           // set TRUE when probe area exhausted
        result,         // holds result of string comparison
        offset,         // offset into buf of first complete word
        i,              // loop counter
        count;          // holds number of bytes read through fread

    char *strlower();

    if (ignorecase) {
        strlower(entry);
    }

    low = 0;
    high = filesize;
    quit = FALSE;
    while (low <= high && !quit) {
        if (low+1 >= high)
            quit = TRUE;

        mid = (low+high)/2;
        // perform a read at probe point
        if (fseek(dict,mid,SEEK_SET) == EOF) {
            return(-3);
        }
        count = fread(buf,sizeof(char),BUFSZ,dict);
        buf[count] = '\0';  // ensure there is always a string terminator

        // turn buffer into a set of C strings
        for (i = 0; i < count; i++) {
            if (buf[i] == '\n') buf[i] = '\0';
        }

        // determine start of first whole word
        if (mid == 0) {
            // start of file, therefore we've got a whole word at the
            // buffer beginning
            offset = 0;
        }
        else {
            offset = strlen(buf);
            offset++;                   // include trailing \0 in word length
        }
        if (offset == BUFSZ) {
            return(-2);
        }

        if (ignorecase) {
            strlower(buf+offset);
        }
        result = strcmp(entry,buf+offset);
        if (result < 0) {
            high = mid;
        }
        else if (result > 0) {
            low = mid;
        }
        else {
            return(mid+offset);
        }
    }
    return(-1);
}

/*  -------------------------------------------------------------------------
 *  setposition will position the dictionary file to the offset (from the
 *  beginning of the file) specified.
 *
 *  Returns TRUE of positioning is successful, FALSE otherwise.
 *
 */

int setposition(int offset)
{
    // legal offset?
    if (offset < 0 || offset > filesize) {
        return(FALSE);
    }
    // position to required offset
    if (fseek(dict,offset,SEEK_SET) == EOF) {
        return(FALSE);
    }
    return(TRUE);
}

/*  -------------------------------------------------------------------------
 *
 * GETENTRY reads from the current file position and returns, via the
 * str argument, the first newline delimited string it finds, replacing
 * the newline with a '\0'.  The routine returns the position in the
 * file at which the string was read.  If end of file has been
 * reached, -1 is returned.
 *
 *  getentry may be called repeatedly to return all the strings contained in
 *  the open dictionary
 *
 */

int getentry(char *str)
{
    int pos;

    pos = ftell(dict);

    if(fgets(buf,BUFSZ,dict) != NULL) {
        buf[strlen(buf)-1] = '\0';
    }
    else {
        // set pos negative to indicate end of file
        pos = -1;
        buf[0] = '\0';
    }
    if (ignorecase) strlwr(buf);
    strcpy(str,buf);

    return(pos);
}

/*  ------------------------------------------------------------------------
 *  strlower will convert the string passed as an argument to lower case
 *  in place.
 *
 */

char *strlower(char* s)
{
        char *p;

        p = s;
        s--;
        while (*(++s)) if (*s >= 'A' && *s <= 'Z')  *s = *s | 040;
        return(p);
}
