% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggscatterstats.R
\name{ggscatterstats}
\alias{ggscatterstats}
\title{Scatterplot with marginal distributions and statistical results}
\usage{
ggscatterstats(
  data,
  x,
  y,
  type = "parametric",
  conf.level = 0.95,
  bf.prior = 0.707,
  bf.message = TRUE,
  label.var = NULL,
  label.expression = NULL,
  point.label.args = list(size = 3),
  formula = y ~ x,
  smooth.line.args = list(size = 1.5, color = "blue"),
  method = "lm",
  method.args = list(),
  point.args = list(size = 3, alpha = 0.4),
  point.width.jitter = 0,
  point.height.jitter = 0,
  marginal = TRUE,
  marginal.type = "histogram",
  margins = "both",
  marginal.size = 5,
  xfill = "#009E73",
  yfill = "#D55E00",
  xparams = list(fill = xfill),
  yparams = list(fill = yfill),
  centrality.parameter = "none",
  centrality.label.args = list(size = 3),
  vline.args = list(color = xfill, size = 1, linetype = "dashed"),
  hline.args = list(color = yfill, size = 1, linetype = "dashed"),
  results.subtitle = TRUE,
  xlab = NULL,
  ylab = NULL,
  title = NULL,
  subtitle = NULL,
  caption = NULL,
  beta = 0.1,
  k = 2L,
  ggtheme = ggplot2::theme_bw(),
  ggstatsplot.layer = TRUE,
  ggplot.component = NULL,
  output = "plot",
  ...
)
}
\arguments{
\item{data}{A dataframe (or a tibble) from which variables specified are to
be taken. A matrix or tables will \strong{not} be accepted.}

\item{x}{The column in \code{data} containing the explanatory variable to be
plotted on the \code{x}-axis. Can be entered either as a character string (e.g.,
\code{"x"}) or as a bare expression (e.g, \code{x}).}

\item{y}{The column in \code{data} containing the response (outcome) variable to
be plotted on the \code{y}-axis. Can be entered either as a character string
(e.g., \code{"y"}) or as a bare expression (e.g, \code{y}).}

\item{type}{Type of association between paired samples required
("\code{"parametric"}: Pearson's product moment correlation coefficient" or
"\code{"nonparametric"}: Spearman's rho" or "\code{"robust"}: percentage bend
correlation coefficient" or "\code{"bayes"}: Bayes Factor for Pearson's \emph{r}").
Corresponding abbreviations are also accepted: \code{"p"} (for
parametric/pearson), \code{"np"} (nonparametric/spearman), \code{"r"} (robust),
\code{"bf"} (for bayes factor), resp.}

\item{conf.level}{Scalar between 0 and 1. If unspecified, the defaults return
\verb{95\%} lower and upper confidence intervals (\code{0.95}).}

\item{bf.prior}{A number between \code{0.5} and \code{2} (default \code{0.707}), the prior
width to use in calculating Bayes factors.}

\item{bf.message}{Logical that decides whether to display Bayes Factor in
favor of the \emph{null} hypothesis. This argument is relevant only \strong{for
parametric test} (Default: \code{TRUE}).}

\item{label.var}{Variable to use for points labels. Can be entered either as
a character string (e.g., \code{"var1"}) or as a bare expression (e.g, \code{var1}).}

\item{label.expression}{An expression evaluating to a logical vector that
determines the subset of data points to label. This argument can be entered
either as a character string (e.g., \code{"y < 4 & z < 20"}) or as a bare
expression (e.g., \code{y < 4 & z < 20}).}

\item{point.label.args}{A list of additional aesthetic arguments to be passed
to \code{ggrepel::geom_label_repel} geom used to display the labels.}

\item{formula}{Formula to use in smoothing function, eg. \code{y ~ x},
\code{y ~ poly(x, 2)}, \code{y ~ log(x)}. \code{NULL} by default, in which case
\code{method = NULL} implies \code{formula = y ~ x} when there are fewer than 1,000
observations and \code{formula = y ~ s(x, bs = "cs")} otherwise.}

\item{smooth.line.args}{A list of additional aesthetic arguments to be passed
to \code{ggplot2::geom_smooth} geom used to display the regression line.}

\item{method}{Smoothing method (function) to use, accepts either
\code{NULL} or a character vector, e.g. \code{"lm"}, \code{"glm"}, \code{"gam"}, \code{"loess"}
or a function, e.g. \code{MASS::rlm} or \code{mgcv::gam}, \code{stats::lm}, or \code{stats::loess}.
\code{"auto"} is also accepted for backwards compatibility.  It is equivalent to
\code{NULL}.

For \code{method = NULL} the smoothing method is chosen based on the
size of the largest group (across all panels). \code{\link[stats:loess]{stats::loess()}} is
used for less than 1,000 observations; otherwise \code{\link[mgcv:gam]{mgcv::gam()}} is
used with \code{formula = y ~ s(x, bs = "cs")} with \code{method = "REML"}. Somewhat anecdotally,
\code{loess} gives a better appearance, but is \eqn{O(N^{2})}{O(N^2)} in memory,
so does not work for larger datasets.

If you have fewer than 1,000 observations but want to use the same \code{gam()}
model that \code{method = NULL} would use, then set
\verb{method = "gam", formula = y ~ s(x, bs = "cs")}.}

\item{method.args}{List of additional arguments passed on to the modelling
function defined by \code{method}.}

\item{point.args}{A list of additional aesthetic arguments to be passed
to \code{ggplot2::geom_point} geom used to display the raw data points.}

\item{point.width.jitter, point.height.jitter}{Degree of jitter in \code{x} and \code{y}
direction, respectively. Defaults to \code{0} (0\%) of the resolution of the
data. Note that the jitter should not be specified in the \code{point.args}
because this information will be passed to two different \code{geom}s: one
displaying the points and the other displaying the labels for these points.}

\item{marginal}{Decides whether \code{ggExtra::ggMarginal()} plots will be
displayed; the default is \code{TRUE}.}

\item{marginal.type}{Type of marginal distribution to be plotted on the axes
(\code{"histogram"}, \code{"boxplot"}, \code{"density"}, \code{"violin"}, \code{"densigram"}).}

\item{margins}{Along which margins to show the plots. One of: [both, x, y].}

\item{marginal.size}{Integer describing the relative size of the marginal
plots compared to the main plot. A size of \code{5} means that the main plot is
5x wider and 5x taller than the marginal plots.}

\item{xfill, yfill}{Character describing color fill for \code{x} and \code{y} axes
marginal distributions (default: \code{"#009E73"} (for \code{x}) and \code{"#D55E00"} (for
\code{y})). The same colors will also be used for the lines denoting centrality
parameters if \code{centrality.parameter} argument is set to \code{TRUE}. Note that
the defaults are colorblind-friendly.}

\item{xparams}{List of extra parameters to use only for the marginal plot along
the x axis.}

\item{yparams}{List of extra parameters to use only for the marginal plot along
the y axis.}

\item{centrality.parameter}{Decides \emph{which} measure of central tendency (\code{"mean"}
or \code{"median"}) is to be displayed as vertical (for \code{x}) and horizontal (for
\code{y}) lines. Note that mean values corresponds to arithmetic mean and not
geometric mean.}

\item{centrality.label.args}{A list of additional
aesthetic arguments to be passed to the \code{geom_label} used to display the
label corresponding to the centrality parameter and test value.}

\item{vline.args, hline.args}{A list of additional aesthetic arguments to be
passed to \code{ggplot2::geom_vline} and \code{ggplot2::geom_hline} geoms used to
display the centrality parameter labels on vertical and horizontal lines.}

\item{results.subtitle}{Decides whether the results of statistical tests are
to be displayed as a subtitle (Default: \code{TRUE}). If set to \code{FALSE}, only
the plot will be returned.}

\item{xlab}{Labels for \code{x} and \code{y} axis variables. If \code{NULL} (default),
variable names for \code{x} and \code{y} will be used.}

\item{ylab}{Labels for \code{x} and \code{y} axis variables. If \code{NULL} (default),
variable names for \code{x} and \code{y} will be used.}

\item{title}{The text for the plot title.}

\item{subtitle}{The text for the plot subtitle. Will work only if
\code{results.subtitle = FALSE}.}

\item{caption}{The text for the plot caption.}

\item{beta}{bending constant (Default: \code{0.1}). For more, see \code{?WRS2::pbcor}.}

\item{k}{Number of digits after decimal point (should be an integer)
(Default: \code{k = 2L}).}

\item{ggtheme}{A function, \code{ggplot2} theme name. Default value is
\code{ggplot2::theme_bw()}. Any of the \code{ggplot2} themes, or themes from
extension packages are allowed (e.g., \code{ggthemes::theme_fivethirtyeight()},
\code{hrbrthemes::theme_ipsum_ps()}, etc.).}

\item{ggstatsplot.layer}{Logical that decides whether \code{theme_ggstatsplot}
theme elements are to be displayed along with the selected \code{ggtheme}
(Default: \code{TRUE}). \code{theme_ggstatsplot} is an opinionated theme layer that
override some aspects of the selected \code{ggtheme}.}

\item{ggplot.component}{A \code{ggplot} component to be added to the plot prepared
by \code{ggstatsplot}. This argument is primarily helpful for \code{grouped_} variant
of the current function. Default is \code{NULL}. The argument should be entered
as a function.}

\item{output}{Character that describes what is to be returned: can be
\code{"plot"} (default) or \code{"subtitle"} or \code{"caption"}. Setting this to
\code{"subtitle"} will return the expression containing statistical results. If
you have set \code{results.subtitle = FALSE}, then this will return a \code{NULL}.
Setting this to \code{"caption"} will return the expression containing details
about Bayes Factor analysis, but valid only when \code{type = "parametric"} and
\code{bf.message = TRUE}, otherwise this will return a \code{NULL}. For functions
\code{ggpiestats} and \code{ggbarstats}, setting \code{output = "proptest"} will return a
dataframe containing results from proportion tests.}

\item{...}{Currently ignored.}
}
\description{
Scatterplots from \code{ggplot2} combined with marginal
histograms/boxplots/density plots with statistical details added as a
subtitle.
}
\note{
\itemize{
\item If you set \code{marginal = TRUE}, the resulting plot can't be further modified
with \code{ggplot2} functions since it is no longer a \code{ggplot} object. In case you
want a \code{ggplot} object, set \code{marginal = FALSE}. Also have a look at the
\code{ggplot.component} argument.
\item The plot uses \code{ggrepel::geom_label_repel} to attempt to keep labels
from over-lapping to the largest degree possible.  As a consequence plot
times will slow down massively (and the plot file will grow in size) if you
have a lot of labels that overlap.
}
}
\examples{
\donttest{
# to get reproducible results from bootstrapping
set.seed(123)
library(ggstatsplot)

# creating dataframe with rownames converted to a new column
mtcars_new <- as_tibble(mtcars, rownames = "car")

# simple function call with the defaults
ggstatsplot::ggscatterstats(
  data = mtcars_new,
  x = wt,
  y = mpg,
  type = "np",
  label.var = car,
  label.expression = wt < 4 & mpg < 20,
  centrality.parameter = "median"
)
}
}
\references{
\url{https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggscatterstats.html}
}
\seealso{
\code{\link{grouped_ggscatterstats}}, \code{\link{ggcorrmat}},
\code{\link{grouped_ggcorrmat}}
}
