% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/strcut.R
\name{strcut_loc}
\alias{strcut_loc}
\alias{strcut_brk}
\title{Cut Strings}
\usage{
strcut_loc(str, loc)

strcut_brk(str, type = "character", tolist = FALSE, n = -1L, ...)
}
\arguments{
\item{str}{a string or character vector.}

\item{loc}{Either one of the following:
\itemize{
\item the result from the \link{stri_locate_ith} function.
\item a matrix of 2 integer columns, with \code{nrow(loc)==length(str)},
giving the location range of the middle part.
\item a vector of length 2, giving the location range of the middle part.
}}

\item{type}{single string;
either the break iterator type,
one of \code{character}, \code{line_break}, \code{sentence}, \code{word},
or a custom set of ICU break iteration rules. \cr
\ifelse{html}{\href{https://stringi.gagolewski.com/rapi/about_search_boundaries.html}{\figure{aboutsearch-boundaries-blue.svg}{options: alt='[BOUNDARIES]'}}}{\href{https://stringi.gagolewski.com/rapi/about_search_boundaries.html}{about search: boundaries}} \cr}

\item{tolist}{logical, indicating if \code{strcut_brk} should return a list (\code{TRUE}),
or a matrix (\code{FALSE}, default).}

\item{n}{see \link[stringi]{stri_split_boundaries}.}

\item{...}{additional arguments to be passed to \link[stringi]{stri_split_boundaries}.}
}
\value{
For the \code{strcut_loc()} function: \cr
A character matrix with \code{length(str)} rows and 3 columns,
where for every row \code{i} it holds the following:
\itemize{
\item the first column contains the sub-string \bold{before} \code{loc[i,]},
or \code{NA} if \code{loc[i,]} contains \code{NA};
\item the second column contains the sub_string at \code{loc[i,]},
or the uncut string if \code{loc[i,]} contains \code{NA};
\item the third and last column contains the sub-string \bold{after} \code{loc[i,]},
or \code{NA} if \code{loc[i,]} contains \code{NA}. \cr
\cr
}

For the \code{strcut_brk()} function: \cr
A character matrix with \code{length(str)} rows and
a number of columns equal to the maximum number of pieces \code{str} was cut in. \cr
Empty places are filled with \code{NA}.
}
\description{
The \code{strcut_loc()} function
cuts every string in a character vector around a location range \code{loc},
such that every string is cut into the following parts:
\itemize{
\item the sub-string \bold{before} \code{loc};
\item the sub-string at \code{loc} itself;
\item the sub-string \bold{after} \code{loc}.
}

The location range \code{loc} would usually be matrix with 2 columns,
giving the start and end points of some pattern match. \cr
\cr
The \code{strcut_brk()} function
(a wrapper around \link[stringi]{stri_split_boundaries}\code{(..., tokens_only = FALSE)})
cuts every string into individual text breaks
(like character, word, line, or sentence boundaries). \cr
\cr
}
\details{
The main difference between the \code{strcut_} - functions
and \link[stringi]{stri_split} /  \link[base]{strsplit},
is that the latter generally removes the delimiter patterns in a string when cutting,
while the \code{strcut_}-functions do not attempt to remove parts of the string by default,
they only attempt to cut the strings into separate pieces.
Moreover, the \code{strcut_} - functions return a matrix by default. \cr
\cr
}
\examples{

x <- rep(paste0(1:10, collapse = ""), 10)
print(x)
loc <- stri_locate_ith(x, 1:10, fixed = as.character(1:10))
strcut_loc(x, loc)
strcut_loc(x, c(5,5))
strcut_loc(x, c(NA, NA))
strcut_loc(x, c(5, NA))
strcut_loc(x, c(NA, 5))

test <- "The\u00a0above-mentioned    features are very useful. " \%s+\%
"Spam, spam, eggs, bacon, and spam. 123 456 789"
strcut_brk(test, "line")
strcut_brk(test, "word")
strcut_brk(test, "sentence")
strcut_brk(test)
strcut_brk(test, n = 1)
strcut_brk(test, "line", tolist = TRUE)
strcut_brk(test, "word", tolist = TRUE)
strcut_brk(test, "sentence", tolist = TRUE)

}
\seealso{
\link{tinycodet_strings}
}
