% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/etl.R
\name{etl}
\alias{etl}
\alias{etl.default}
\alias{is.etl}
\alias{print.etl}
\alias{summary.etl}
\title{Initialize an \code{etl} object}
\usage{
etl(x, db = NULL, dir = tempdir(), ...)

\method{etl}{default}(x, db = NULL, dir = tempdir(), ...)

\method{summary}{etl}(object, ...)

is.etl(object)

\method{print}{etl}(x, ...)
}
\arguments{
\item{x}{the name of the \code{etl} package that you wish to populate with data.
This determines the class of the resulting \code{\link{etl}} object, which
determines method dispatch of \code{etl_*()} functions. There is no default,
but you can use \code{mtcars} as an test example.}

\item{db}{a database connection that inherits from \code{\link[dplyr]{src_sql}}. It is
NULL by default, which results in a \code{\link[dplyr]{src_sqlite}} connection
being created in \code{dir}.}

\item{dir}{a directory to store the raw and processed data files}

\item{...}{arguments passed to methods (currently ignored)}

\item{object}{an object for which a summary is desired.}
}
\value{
For \code{\link{etl}}, an object of class \code{etl_x} and
\code{\link{etl}} that inherits
from \code{\link[dplyr]{src_sql}}

For \code{\link{is.etl}}, \code{TRUE} or \code{FALSE},
depending on whether \code{x} has class \code{\link{etl}}
}
\description{
Initialize an \code{etl} object
}
\details{
A constructor function that instantiates an \code{\link{etl}} object.
An \code{\link{etl}} object extends a \code{\link[dplyr]{src_sql}} object.
It also has attributes for:
\describe{
 \item{pkg}{the name of the \code{\link{etl}} package corresponding to the data source}
 \item{dir}{the directory where the raw and processed data are stored}
 \item{raw_dir}{the directory where the raw data files are stored}
 \item{load_dir}{the directory where the processed data files are stored}
 }
Just like any \code{\link[dplyr]{src_sql}} object, an \code{\link{etl}} object
is a data source backed by an SQL database. However, an \code{\link{etl}} object
has additional functionality based on the presumption that the SQL database
will be populated from data files stored on the local hard disk. The ETL functions
documented in \code{\link{etl_create}} provide the necessary funcitonality
for \strong{extract}ing data from the Internet to \code{raw_dir},
\strong{transform}ing those data
and placing the cleaned up data (usually in CSV format) into \code{load_dir},
and finally \strong{load}ing the clean data into the SQL database.
}
\examples{

# Instantiate the etl object
cars <- etl("mtcars")
str(cars)
is.etl(cars)
summary(cars)

\dontrun{
# connect to a PostgreSQL server
if (require(RPostgreSQL)) {
 db <- src_postgres("mtcars", user = "postgres", host = "localhost")
 cars <- etl("mtcars", db)
}
}

# Do it step-by-step
cars \%>\%
  etl_extract() \%>\%
  etl_transform() \%>\%
  etl_load()
src_tbls(cars)
cars \%>\%
  tbl("mtcars") \%>\%
  group_by(cyl) \%>\%
  summarize(N = n(), mean_mpg = mean(mpg))

# Do it all in one step
cars2 <- etl("mtcars")
cars2 \%>\%
  etl_update()
src_tbls(cars2)


# generic summary function provides information about the object
cars <- etl("mtcars")
summary(cars)
cars <- etl("mtcars")
# returns TRUE
is.etl(cars)

# returns FALSE
is.etl("hello world")
cars <- etl("mtcars") \%>\%
  etl_create()
cars
}
\seealso{
\code{\link{etl_create}}
}

