% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tree_distance_nye.R
\encoding{UTF-8}
\name{NyeSimilarity}
\alias{NyeSimilarity}
\alias{NyeSplitSimilarity}
\title{Nye \emph{et al}. (2006) tree comparison}
\usage{
NyeSimilarity(
  tree1,
  tree2 = NULL,
  similarity = TRUE,
  normalize = FALSE,
  normalizeMax = !is.logical(normalize),
  reportMatching = FALSE,
  diag = TRUE
)

NyeSplitSimilarity(
  splits1,
  splits2,
  nTip = attr(splits1, "nTip"),
  reportMatching = FALSE
)
}
\arguments{
\item{tree1, tree2}{Trees of class \code{phylo}, with leaves labelled identically,
or lists of such trees to undergo pairwise comparison.  Where implemented,
\code{tree2 = NULL} will compute distances between each pair of trees in the list
\code{tree1} using a fast algorithm based on Day (1985).}

\item{similarity}{Logical specifying whether to report the result as a tree
similarity, rather than a difference.}

\item{normalize}{If a numeric value is provided, this will be used as a
maximum value against which to rescale results.
If \code{TRUE}, results will be rescaled against a maximum value calculated from
the specified tree sizes and topology, as specified in the "Normalization"
section below.
If \code{FALSE}, results will not be rescaled.}

\item{normalizeMax}{When calculating similarity, normalize against the
maximum number of splits that could have been present (\code{TRUE}),
or the number of splits that were actually observed (\code{FALSE})?
Defaults to the number of splits in the better-resolved tree; set
\code{normalize = pmin.int} to use the number of splits in the less resolved
tree.}

\item{reportMatching}{Logical specifying whether to return the clade
matchings as an attribute of the score.}

\item{diag}{Logical specifying whether to return similarities along the
diagonal, i.e. of each tree with itself.  Applies only if \code{tree2} is
a list identical to \code{tree1}, or \code{NULL}.}

\item{splits1, splits2}{Logical matrices where each row corresponds to a leaf,
either listed in the same order or bearing identical names (in any sequence),
and each column corresponds to a split, such that each leaf is identified as
a member of the ingroup (\code{TRUE}) or outgroup (\code{FALSE}) of the respective
split.}

\item{nTip}{(Optional) Integer specifying the number of leaves in each split.}
}
\value{
\code{NyeSimilarity()} returns an array of numerics providing the
distances between each  pair of trees in \code{tree1} and \code{tree2},
or \code{splits1} and \code{splits2}.
}
\description{
\code{NyeSimilarity()} and \code{NyeSplitSimilarity()} implement the
\href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds}
tree comparison metric of \insertCite{Nye2006;textual}{TreeDist}.
In short, this finds the optimal matching that pairs each branch from
one tree with a branch in the second, where matchings are scored according to
the size of the largest split that is consistent with both of them,
normalized against the Jaccard index.
A more detailed account is available in the
\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#nye-et-al--tree-similarity-metric}{vignettes}.
}
\details{
The measure is defined as a similarity score.  If \code{similarity = FALSE}, the
similarity score will be converted into a distance by doubling it and
subtracting it from the number of splits present in both trees.
This ensures consistency with \code{JaccardRobinsonFoulds}.

Note that \code{NyeSimilarity(tree1, tree2)} is equivalent to, but
slightly faster than, \code{\link{JaccardRobinsonFoulds}
(tree1, tree2, k = 1, allowConflict = TRUE)}.
}
\section{Normalization}{


If \code{normalize = TRUE} and \code{similarity = TRUE}, then results will be rescaled
from zero to one by dividing by the mean number of splits in the two trees
being compared.

You may wish to normalize instead against the number of splits present
in the smaller tree, which represents the maximum value possible for a pair
of trees with the specified topologies (\code{normalize = pmin.int}); the
number of splits in the most resolved tree (\code{normalize = pmax.int});
or the maximum value possible for any pair of trees with  \emph{n} leaves,
\emph{n} - 3 (\code{normalize = TreeTools::NTip(tree1) - 3L}).

If \code{normalize = TRUE} and \code{similarity = FALSE}, then results will be rescaled
from zero to one by dividing by the total number of splits in the pair
of trees being considered.

Trees need not contain identical leaves; scores are based on the leaves that
trees hold in common.  Check for unexpected differences in tip labelling
with \code{setdiff(TipLabels(tree1), TipLabels(tree2))}.
}

\examples{
library("TreeTools")
NyeSimilarity(BalancedTree(8), PectinateTree(8))
VisualizeMatching(NyeSimilarity, BalancedTree(8), PectinateTree(8))
NyeSimilarity(as.phylo(0:5, nTip = 8), PectinateTree(8))
NyeSimilarity(as.phylo(0:5, nTip = 8), similarity = FALSE)
}
\references{
\insertAllCited{}
}
\seealso{
Other tree distances: 
\code{\link{JaccardRobinsonFoulds}()},
\code{\link{KendallColijn}()},
\code{\link{MASTSize}()},
\code{\link{MatchingSplitDistance}()},
\code{\link{NNIDist}()},
\code{\link{PathDist}()},
\code{\link{Robinson-Foulds}},
\code{\link{SPRDist}()},
\code{\link{TreeDistance}()}
}
\author{
\href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
(\href{mailto:martin.smith@durham.ac.uk}{martin.smith@durham.ac.uk})
}
\concept{tree distances}
