% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lemmatize.R
\name{lemmatize_strings}
\alias{lemmatize_strings}
\title{Lemmatize a Vector of Strings}
\usage{
lemmatize_strings(x, dictionary = lexicon::hash_lemmas, ...)
}
\arguments{
\item{x}{A vector of strings.}

\item{dictionary}{A dictionary of base terms and lemmas to use for
replacement.  The first column should be the full word form in lower case
while the second column is the corresponding replacement lemma. The default
makes the dictionary from the text using
\code{\link[textstem]{make_lemma_dictionary}}.  For larger texts a
dictionary may take some time to compute.  It may be more useful to generate
the dictionary prior to running the unction and explicitly pass the
dictionary in.}

\item{\ldots}{Other arguments passed to \code{\link[textshape]{split_token}}.}
}
\value{
Returns a vector of lemmatized strings.
}
\description{
Lemmatize a vector of strings.
}
\note{
The lemmatizer splits the string apart into tokens for speed
optimization.  After the lemmatizing occurs the strings are pasted back
together.  The strings are not guaranteed to retain exact spacing of the
original.
}
\examples{
x <- c(
    'the dirtier dog has eaten the pies',
    'that shameful pooch is tricky and sneaky',
    "He opened and then reopened the food bag",
    'There are skies of blue and red roses too!',
    NA,
    "The doggies, well they aren't joyfully running.",
    "The daddies are coming over...",
    "This is 34.546 above"
)

## Default lexicon::hash_lemmas dictionary
lemmatize_strings(x)

## Hunspell dictionary
lemma_dictionary <- make_lemma_dictionary(x, engine = 'hunspell')
lemmatize_strings(x, dictionary = lemma_dictionary)

## Bigger data set
library(dplyr)
presidential_debates_2012$dialogue \%>\%
    lemmatize_strings() \%>\%
    head()

\dontrun{
## Treetagger dictionary
lemma_dictionary2 <- make_lemma_dictionary(x, engine = 'treetagger')
lemmatize_strings(x, lemma_dictionary2)

lemma_dictionary3 <- presidential_debates_2012$dialogue \%>\%
    make_lemma_dictionary(engine = 'treetagger')

presidential_debates_2012$dialogue \%>\%
     lemmatize_strings(lemma_dictionary3) \%>\%
     head()
}
}
\seealso{
\code{\link[textstem]{lemmatize_words}}
}

