/*************************************************************************************************
 * Interface of common features
 *                                                      Copyright (C) 2003-2004 Mikio Hirabayashi
 * This file is part of Estraier, a personal full-text search system.
 * Estraier is free software; you can redistribute it and/or modify it under the terms of the GNU
 * General Public License as published by the Free Software Foundation; either version 2 of the
 * License, or any later version.
 * Estraier is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with Estraier;
 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA.
 *************************************************************************************************/


#ifndef _ESTCOMMON_H                     /* duplication check */
#define _ESTCOMMON_H



/*************************************************************************************************
 * headers
 *************************************************************************************************/


#include <depot.h>
#include <curia.h>
#include <cabin.h>
#include <villa.h>
#include <odeum.h>

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdarg.h>
#include <limits.h>
#include <time.h>
#include <signal.h>

#ifdef MYHAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef MYHAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef MYHAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef MYHAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef MYHAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif
#ifdef MYHAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#ifdef MYHAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef MYHAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef MYHAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#ifdef MYHAVE_NETDB_H
#include <netdb.h>
#endif
#ifdef MYHAVE_FCNTL_H
#include <fcntl.h>
#endif

#if defined(__APPLE__) && defined(__MACH__)  /* for Mac OS X */
#define socklen_t   int
#endif




/*************************************************************************************************
 * macros
 *************************************************************************************************/


#ifndef _EST_VERSION                     /* dummy version */
#define _EST_VERSION   "0.0.0"
#endif
#ifndef _EST_PREFIX                      /* dummy prefix */
#define _EST_PREFIX    "/usr/local"
#endif
#ifndef _EST_BINDIR                      /* dummy bindir */
#define _EST_BINDIR    "/usr/local/bin"
#endif
#ifndef _EST_LEXEDIR                     /* dummy libexecdir */
#define _EST_LEXEDIR   "/usr/local/libexec"
#endif
#ifndef _EST_DATADIR                     /* dummy datadir */
#define _EST_DATADIR   "/usr/local/share/estraier"
#endif

#undef TRUE                              /* boolean true */
#define TRUE           1
#undef FALSE                             /* boolean false */
#define FALSE          0

#define ESTPATHCHR     '/'               /* delimiter character of path */
#define ESTEXTCHR      '.'               /* delimiter character of extension */
#define ESTCDIRSTR     "."               /* string of current directory */
#define ESTPDIRSTR     ".."              /* string of parent directory */

#define ESTLOCALE      "C"               /* name of the common locale */
#define ESTDBGFDENV    "ESTDBGFD"        /* environment variable for debug fd */
#define ESTMTDBNAME    "_mtime"          /* name of the database for last modified times */
#define ESTMTDBLRM     81                /* records in a leaf node of time database */
#define ESTMTDBNIM     192               /* records in a non-leaf node of time database */
#define ESTMTDBLCN     64                /* number of leaf cache of time database */
#define ESTMTDBNCN     32                /* number of non-leaf cache of time database */
#define ESTSCDBNAME    "_score"          /* name of the database for scores */
#define ESTSCDBBRAT    4                 /* ratio of bnum and dnum of the score database */
#define ESTSCDBDIVNUM  7                 /* division number of the score database */
#define ESTDTDBNAME    "_date"           /* name of the database for dates */
#define ESTDTDBBRAT    4                 /* ratio of bnum and dnum of the date database */
#define ESTWDLSNAME    "_wlist"          /* name of the text file for word list */
#define ESTFILTERFUNC  "estfilter"       /* name of dynamic linking functins for filter */
#define ESTPATHBUFSIZ  2048              /* size of a path buffer */
#define ESTNUMBUFSIZ   32                /* size of a number buffer */
#define ESTDATEBUFSIZ  1024              /* size of a date buffer */
#define ESTPETITBNUM   31                /* bucket number of a petit map */
#define ESTCJKPMIN     0x20              /* minimum plain of UCS-2 handled as CJK text */
#define ESTENCMISSMAX  16                /* max number of misses of encoding characters */
#define ESTMIMEFOLD    60                /* folding bytes of MIME message */
#define ESTMAXLOAD     0.9               /* max ratio of bucket loading */
#define ESTKEYNUM      32                /* number of keywords to store */
#define ESTWMINLEN     2                 /* min length of a word */
#define ESTWMAXLEN     32                /* max length of a word */

#if defined(MYSTRICT)                    /* split text strictly by space */
#define ESTISSTRICT    TRUE
#define ESTDELIMCHARS  "+,-.:;!\"#$%&'()*/<=>?@[\\]^`{|}~"
#else
#define ESTISSTRICT    FALSE
#define ESTDELIMCHARS  ""
#endif

#if defined(MYNOSTOPW)                   /* do not exclude stop words */
#define ESTISNOSTOPW   TRUE
#else
#define ESTISNOSTOPW   FALSE
#endif



/*************************************************************************************************
 * types and constants
 *************************************************************************************************/


enum {                                   /* mode of estdocaddtext */
  ESTDOCBOTH,                            /* register both of normal and appearance words */
  ESTDOCNONLY,                           /* register both of normal words only */
  ESTDOCAONLY                            /* register both of appearance words only */
};

/* type of the pointer to a filter function. */
typedef void (*ESTFILTER)(const char *infile, const char *outfile);

typedef struct {                         /* type of structure for a search word */
  int type;                              /* condition type */
  char *word;                            /* string of the word */
  int dnum;                              /* number of corresponding documents */
  CBLIST *evwords;                       /* evolved words from the regular expression */
} ESTWORD;

enum {                                   /* enumeration for condition types */
  ESTCONDAND,                            /* and search */
  ESTCONDOR,                             /* or search */
  ESTCONDNOT                             /* notand search */
};



/*************************************************************************************************
 * global variables
 *************************************************************************************************/


extern int estisregex;                   /* whether regex is supported */
extern int estisdlfunc;                  /* whether dlfunc is supported */
extern int estiscjkuni;                  /* whether cjkuni is supported */
extern int estischasen;                  /* whether chasen is supported */
extern int estismecab;                   /* whether mecab is supported */
extern int estiskakasi;                  /* whether kakasi is supported */



/*************************************************************************************************
 * functions
 *************************************************************************************************/


/* Make a document handle from a plain text.
   `uri' specifies the URI of a document.
   `text' specifies the data of the document.
   `size' specifies the size of the text.
   `code' specifies the character encoding of the text.  If it is `NULL', the encoding is
   detected automatically.
   The return value is a document handle for Odeum. */
ODDOC *estdocplain(const char *uri, const char *text, int size, const char *code);


/* Make a document handle from a HTML.
   `uri' specifies the URI of a document.
   `text' specifies the data of the document.
   `size' specifies the size of the text.
   `code' specifies the character encoding of the text.  If it is `NULL', the encoding is
   detected automatically.
   The return value is a document handle for Odeum. */
ODDOC *estdochtml(const char *uri, const char *text, int size, const char *code);


/* Make a document handle from a MIME.
   `uri' specifies the URI of a document.
   `text' specifies the string of the document.
   `code' specifies the character encoding of the text.  If it is `NULL', the encoding is
   `nude' specifies whether attributes of the inner content to be prior.
   detected automatically.
   The return value is a document handle for Odeum. */
ODDOC *estdocmime(const char *uri, const char *text, const char *code, int nude);


/* Break a text into words and register them to a document handle.
   `doc' specifies a document handle.
   `text' specifies a text.
   `size' specifies the size of the text.
   `code' specifies the character encoding of the text.  If it is `NULL', the encoding is
   detected automatically.
   `mode' specifies detailed behavior.  `ESTDOCBOTH' register both of normal and appearance
   words.  `ESTDOCBOTH' registers both of normal words only.  `ESTDOCAONLY' register both of
   appearance words only. */
void estdocaddtext(ODDOC *doc, const char *text, int size, const char *code, int mode);


/* Get the handle of the filter function in a dynamic linking library.
   `file' specifies the name of a dynamic linking library.
   The return value is the pointer to the function or NULL on failure. */
ESTFILTER estfilterget(const char *file);


/* Make a search words and their conditions from a search phrase.
   `phrase' specifies a search phrase whose encoding is UTF-8.
   `np' specifies the pointer to a variable which the number of elements of the return value
   is assigned.
   `norm' specifies whether to normalize each word.
   The return value is an array whose elements are structures composed of search words and
   their conditions.  The returned array should be released with `estfreewords'. */
ESTWORD *estsearchwords(const char *phrase, int *np, int norm);


/* Release regions of an array of search words.
   `words' specifies an array made with `estsearchwords'.
   `num' specifies the number of elements of the array. */
void estfreewords(ESTWORD *words, int num);


/* Get search result with search words made with `estsearchwords'.
   `odeum' specifies a database handle.
   `words' specifies an array made with `estsearchwords'.
   `wnum' specifies the number of elements of the array.
   `unit' specifies search unit to specify accuracy.
   `tfidf' specifies whether scores are tuned by TF-IDF method.
   `np' specifies the pointer to a variable to which the number of elements of the return value
   is assigned.
   `lp' specifies the pointer to a variable to which the number of documents leaved in the index
   is assgned.
   `regex' specifies whether search words are treated as regular expressions.
   `wild' specifies whether search words are treated as expressions with wild cards.
   `reevmax' specifies the max number of evolved words from regular expressions or expressions with
   wild cards.
   The return value is an array as with `odsearch'. */
ODPAIR *estsearch(ODEUM *odeum, ESTWORD *words, int wnum, int unit, int tfidf, int *np, int *lp,
                  int regex, int wild, int reevmax);


/* Initialize the iterator of a database.
   `odeum' specifies a database handle.
   `prefix' specifies a prefix of URIs.
   The return value is whether the processing is success or not. */
int estiterinit(ODEUM *odeum, const char *prefix);


/* Get the URI of the next document whose URI begins with a prefix.
   `odeum' specifies a database handle.
   `prefix' specifies a prefix of URIs.
   The return value is the URI of the next document or `NULL' if no document is left. */
char *estiternext(ODEUM *odeum, const char *prefix);


/* Resurge the cursor to the next document of the last deleted document.
   `odeum' specifies a database handle.
   `uri' specifies the URI of the last deleted document.
   The return value is whether the processing is success or not. */
int estiterresurge(ODEUM *odeum, const char *uri);


/* Set an environment variable.
   `name' specifies the name of an environment variable.
   `value' specifies the value of the variable. */
void estputenv(const char *name, const char *value);


/* Execute a command and get the result.
   `command' specifies a command line.
   `sp' specifies the pointer to a variable to which the size of the region of the return value
   is assigned.  If it is `NULL', it is not used.
   If successful, the return value is the pointer to the region containing the standard output
   of the command, else, it is `NULL'. */
char *estreadexec(const char *command, int *sp);


/* Make time data from a string of RFC822, RFC850, or ASCTIME.
   `str' specifies a date string of RFC822, RFC850, or ASCTIME.
   The return value is UNIX time or -1 if the string is invalid. */
int eststrmktime(const char *str);



#endif                                   /* duplication check */


/* END OF FILE */
