% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/KWIC.R
\encoding{UTF-8}
\name{KWIC}
\alias{KWIC}
\title{Create a KWIC Index}
\usage{
KWIC(x, fields, min.freq = 10)
}
\arguments{
\item{x}{A data frame from which KWIC index is to be generated.}

\item{fields}{A character vector with the names of fields(columns) of the 
data frame from which KWIC index is to be generated. The first field is 
considered as the primary key or identifier (see \strong{Details}).}

\item{min.freq}{Frequency of keywords are not computed if below 
\code{min.freq}. Default is 10.}
}
\value{
A list of class \code{KWIC} containing the following components: 
  \tabular{ll}{ \code{KWIC} \tab The KWIC index in the form of a data frame. 
  \cr \code{KeywordFreq} \tab A data frame of the keywords detected with 
  frequency greater than \code{min.freq}. \cr \code{Fields} \tab A character 
  vector with the names of the PGR database fields from which the keywords 
  were extracted. \cr }
}
\description{
\code{KWIC} creates a Keyword in Context index from PGR passport database 
fields.
}
\details{
The function generates a Keyword in Context index from a data frame of a PGR 
passport database based on the fields(columns) stated in the arguments, using
\code{\link[data.table]{data.table}} package.

The first element of vector \code{fields} is considered as the primary key or
identifier which uniquely identifies all rows in the data frame.

Cleaning of the data the input fields(columns) using the 
\code{\link[PGRdup]{DataClean}} function with appropriate arguments is 
suggested before running this function.
}
\examples{
# Load PGR passport database
GN <- GN1000

# Specify as a vector the database fields to be used
GNfields <- c("NationalID", "CollNo", "DonorID", "OtherID1", "OtherID2")

# Clean the data
GN[GNfields] <- lapply(GN[GNfields], function(x) DataClean(x))

\dontrun{

# Generate KWIC index
GNKWIC <- KWIC(GN, GNfields)
GNKWIC

# Retrieve the KWIC index from the KWIC object
KWIC <- GNKWIC[[1]]

# Retrieve the keyword frequencies from the KWIC object
KeywordFreq <- GNKWIC[[2]]

# Show error in case of duplicates and NULL values 
# in the primary key/ID field "NationalID"
GN[1001:1005,] <- GN[1:5,]
GN[1001,3] <- ""
GNKWIC <- KWIC(GN, GNfields)
}
}
\references{
Knüpffer, H. 1988. "The European Barley Database of the ECP/GR:
  An Introduction." \emph{Die Kulturpflanze} 36 (1): 135-62. 
  doi:\url{https://doi.org/10.1007/BF01986957}. \cr \cr Knüpffer, H., L.
  Frese, and M. W. M. Jongen. 1997. "Using Central Crop Databases: Searching 
  for Duplicates and Gaps." In \emph{Central Crop Databases: Tools for Plant 
  Genetic Resources Management. Report of a Workshop, Budapest, Hungary, 
  13-16 October 1996}, edited by E. Lipman, M. W. M. Jongen, T. J. L. van 
  Hintum, T. Gass, and L. Maggioni, 67-77. Rome, Italy and Wageningen, The 
  Netherlands: International Plant Genetic Resources Institute and Centre for
  Genetic Resources.
}
\seealso{
\code{\link[data.table]{data.table}}, 
  \code{\link[PGRdup]{print.KWIC}}
}
