% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/olink_normalization_n.R
\name{olink_normalization_n}
\alias{olink_normalization_n}
\title{Bridge and/or subset normalization of all proteins among multiple NPX
projects.}
\usage{
olink_normalization_n(norm_schema)
}
\arguments{
\item{norm_schema}{A tibble with more than 1 rows and (strictly) the
following columns: "order", "name", "data", "samples", "normalization_type",
"normalize_to". See "Details" for the structure of the data frame
(required)}
}
\value{
A "tibble" of NPX data in long format containing normalized NPX
values, including adjustment factors and name of project.
}
\description{
This function normalizes pairs of NPX projects (data frames) using shared
samples or subsets of samples.\cr\cr
}
\details{
This function is a wrapper of olink_normalization_bridge and
olink_normalization_subset.\cr\cr

The input of this function is a tibble that contains all the necessary
information to normalize multiple NPX projects. This tibble is called the
normalization schema. The basic idea is that every row of the data frame is
a separate project to be normalized. We assume that there is always one
baseline project that does not normalize to any other. All other project
normalize to one or more projects. The function handles projects that are
normalized in a chain, for example:
\itemize{
   \item{1.} project 2 normalizes to project 1, and project 3 normalizes to
   project 2.
   \item{2.} project 2 normalizes to project 1, and project 3 normalizes to
   the combined data frame of projects 1 and 2 (that is already normalized).
}

The function can also handle a mixed schema of bridge and subset
normalization.

Specifications of the normalization schema data frame:
\itemize{
   \item{order:} should strictly be a numeric or integer array with unique
   identifiers for each project. It is necessary that this array starts from
   1 and that it contains no NAs.
   \item{name:} should strictly be a character array with unique identifiers
   for each project. Each entry should represent the name of the project
   located in the same row. No NAs are allowed.
   \item{data:} a named list of NPX data frames representing the projects to
   be normalized. Names of the items of the list should be identical to
   "names". No NAs are allowed.
   \item{samples:} a two-level nested named list of sample identifiers from
   each NPX project from "data". Names of the first level of the nested list
   should be identical to "names" and to the names of the list from "data".
   Projects that will be used only as reference should have their
   corresponding element in the list as NA, while all other projects should
   contain a named list of 2 arrays containing identifiers of samples to be
   used for the calculation of adjustment factor. The names of the two
   arrays should be DF1 and DF2 corresponding to the reference project and
   the project in the current row, respectively. For bridge normalization
   arrays should be of equal length and the index of each entry should
   correspond to the same sample. For subset normalization arrays do not
   need to be of equal length and the order the samples appear in does not
   matter. DF1 might contain sample identifiers from more than one project
   as long as the project in the current row is to be normalized to multiple
   other projects.
   \item{normalization_type:} a character array containing the flags "Bridge"
   or "Subset". Projects that will be used only as reference should have
   their corresponding element in the array as NA, while all other projects
   should contain a flag. For the time being the flag "Median" is not
   supported.
   \item{normalize_to:} a character array pointing to the project this
   project is to be normalized to. Elements of the array should be
   exclusively from the "order" column. Elements of the array may be
   comma-separated if the project is to be normalized to multiple projects.
}
}
\examples{
\donttest{
#### Bridge normalization of two projects

# prepare datasets
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")
npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# Find overlapping samples, but exclude Olink control
overlap_samples <- dplyr::intersect(unique(npx_df1$SampleID),
                                    unique(npx_df2$SampleID))
overlap_samples_list <- list("DF1" = overlap_samples,
                             "DF2" = overlap_samples)

# create tibble for input
norm_schema_bridge <- dplyr::tibble(
  order              = c(1, 2),
  name               = c("NPX_DF1", "NPX_DF2"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = overlap_samples_list),
  normalization_type = c(NA_character_, "Bridge"),
  normalize_to       = c(NA_character_, "1")
)

# normalize
olink_normalization_n(norm_schema = norm_schema_bridge)

#### Subset normalization of two projects

# datasets
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")
npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# Find a suitable subset of samples from both projects, but exclude Olink
# controls and samples that fail QC.
df1_samples <- npx_df1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique() |>
  sample(size = 16, replace = FALSE)
df2_samples <- npx_df2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique() |>
  sample(size = 16, replace = FALSE)

# create named list
subset_samples_list <- list("DF1" = df1_samples,
                            "DF2" = df2_samples)

# create tibble for input
norm_schema_subset <- dplyr::tibble(
  order              = c(1, 2),
  name               = c("NPX_DF1", "NPX_DF2"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = subset_samples_list),
  normalization_type = c(NA_character_, "Subset"),
  normalize_to       = c(NA_character_, "1")
)

# Normalize
olink_normalization_n(norm_schema = norm_schema_subset)

#### Subset normalization  of two projects using all samples

# datasets
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")
npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# Find a suitable subset of samples from both projects, but exclude Olink
# controls and samples that fail QC.
df1_samples_all <- npx_df1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique()
df2_samples_all <- npx_df2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique()

# create named list
subset_samples_all_list <- list("DF1" = df1_samples_all,
                                "DF2" = df2_samples_all)

# create tibble for input
norm_schema_subset_all <- dplyr::tibble(
  order              = c(1, 2),
  name               = c("NPX_DF1", "NPX_DF2"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = subset_samples_all_list),
 normalization_type = c(NA_character_, "Subset"),
 normalize_to       = c(NA_character_, "1")
)

# Normalize
olink_normalization_n(norm_schema = norm_schema_subset_all)

#### Multi-project normalization using bridge and subset samples

## NPX data frames to bridge
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# manipulating the sample NPX datasets to create another two random ones
npx_df3 <- npx_data2 |>
  dplyr::mutate(SampleID = paste(SampleID, "_mod", sep = ""),
                PlateID = paste(PlateID, "_mod", sep = ""),
                NPX = sample(x = NPX, size = dplyr::n(), replace = FALSE)) |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

npx_df4 <- npx_data1 |>
  dplyr::mutate(SampleID = paste(SampleID, "_mod2", sep = ""),
                PlateID = paste(PlateID, "_mod2", sep = ""),
                NPX = sample(x = NPX, size = dplyr::n(), replace = FALSE)) |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

## samples to use for normalization
# Bridge samples with same identifiers between npx_df1 and npx_df2
overlap_samples <- dplyr::intersect(unique(npx_df1$SampleID),
                                    unique(npx_df2$SampleID))
overlap_samples_df1_df2 <- list("DF1" = overlap_samples,
                                "DF2" = overlap_samples)
rm(overlap_samples)

# Bridge samples with different identifiers between npx_df2 and npx_df3
overlap_samples_df2_df3 <- list("DF1" = sample(x = unique(npx_df2$SampleID),
                                               size = 10,
                                               replace = FALSE),
                                "DF2" = sample(x = unique(npx_df3$SampleID),
                                               size = 10,
                                               replace = FALSE))

# Samples to use for intensity normalization between npx_df4 and the
# normalized dataset of npx_df1 and npx_df2
overlap_samples_df12_df4 <- list("DF1" = sample(x = c(unique(npx_df1$SampleID),
                                                      unique(npx_df2$SampleID)),
                                                size = 100,
                                                replace = FALSE),
                                 "DF2" = sample(x = unique(npx_df4$SampleID),
                                                size = 40,
                                                replace = FALSE))

# create tibble for input
norm_schema_n <- dplyr::tibble(
  order              = c(1, 2, 3, 4),
  name               = c("NPX_DF1", "NPX_DF2", "NPX_DF3", "NPX_DF4"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2,
                            "NPX_DF3" = npx_df3,
                            "NPX_DF4" = npx_df4),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = overlap_samples_df1_df2,
                            "NPX_DF3" = overlap_samples_df2_df3,
                            "NPX_DF4" = overlap_samples_df12_df4),
  normalization_type = c(NA_character_, "Bridge", "Bridge", "Subset"),
  normalize_to       = c(NA_character_, "1", "2", "1,2")
)

olink_normalization_n(norm_schema = norm_schema_n)

}

}
\keyword{Bridge}
\keyword{Normalization;}
\keyword{Subset}
\keyword{normalization;}
