% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sinasc.R
\name{sinasc_data}
\alias{sinasc_data}
\title{Download SINASC Live Birth Microdata}
\usage{
sinasc_data(
  year,
  vars = NULL,
  uf = NULL,
  anomaly = NULL,
  parse = TRUE,
  col_types = NULL,
  cache = TRUE,
  cache_dir = NULL,
  lazy = FALSE,
  backend = c("arrow", "duckdb")
)
}
\arguments{
\item{year}{Integer. Year(s) of the data. Required.}

\item{vars}{Character vector. Variables to keep. If NULL (default),
returns all available variables. Use \code{\link[=sinasc_variables]{sinasc_variables()}} to see
available variables.}

\item{uf}{Character. Two-letter state abbreviation(s) to download.
If NULL (default), downloads all 27 states.
Example: \code{"SP"}, \code{c("SP", "RJ")}.}

\item{anomaly}{Character. CID-10 code pattern(s) to filter by congenital
anomaly (\code{CODANOMAL}). Supports partial matching (prefix).
If NULL (default), returns all records.
Example: \code{"Q90"} (Down syndrome), \code{"Q"} (all anomalies).}

\item{parse}{Logical. If TRUE (default), converts columns to
appropriate types (integer, double, Date) based on the variable
metadata. Use \code{\link[=sinasc_variables]{sinasc_variables()}} to see the target type for each
variable. Set to FALSE for backward-compatible all-character output.}

\item{col_types}{Named list. Override the default type for specific
columns. Names are column names, values are type strings:
\code{"character"}, \code{"integer"}, \code{"double"},
\code{"date_dmy"}, \code{"date_ymd"}, \code{"date_ym"}, \code{"date"}.
Example: \code{list(PESO = "character")} to keep PESO as character.}

\item{cache}{Logical. If TRUE (default), caches downloaded data for
faster future access.}

\item{cache_dir}{Character. Directory for caching. Default:
\code{tools::R_user_dir("healthbR", "cache")}.}

\item{lazy}{Logical. If TRUE, returns a lazy query object instead of a
tibble. Requires the \pkg{arrow} package. The lazy object supports
dplyr verbs (filter, select, mutate, etc.) which are pushed down
to the query engine before collecting into memory. Call
\code{dplyr::collect()} to materialize the result. Default: FALSE.}

\item{backend}{Character. Backend for lazy evaluation: \code{"arrow"}
(default) or \code{"duckdb"}. Only used when \code{lazy = TRUE}.
DuckDB backend requires the \pkg{duckdb} package.}
}
\value{
A tibble with live birth microdata. Includes columns \code{year}
and \code{uf_source} to identify the source when multiple years/states
are combined.
}
\description{
Downloads and returns live birth microdata from DATASUS FTP.
Each row represents one live birth record (Declaracao de Nascido Vivo).
Data is downloaded per state (UF) as compressed .dbc files, decompressed
internally, and returned as a tibble.
}
\details{
Data is downloaded from DATASUS FTP as .dbc files (one per state per year).
The .dbc format is decompressed internally using vendored C code from the
blast library. No external dependencies are required.

When \code{uf} is specified, only the requested state(s) are downloaded,
making the operation much faster than downloading the entire country.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# all births in Acre, 2022
ac_2022 <- sinasc_data(year = 2022, uf = "AC")

# births with anomalies in Sao Paulo, 2020-2022
anomalies_sp <- sinasc_data(year = 2020:2022, uf = "SP", anomaly = "Q")

# only key variables, Rio de Janeiro, 2022
sinasc_data(year = 2022, uf = "RJ",
            vars = c("DTNASC", "SEXO", "PESO",
                     "IDADEMAE", "PARTO", "CONSULTAS"))
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link[=censo_populacao]{censo_populacao()}} for population denominators to calculate
birth rates.

Other sinasc: 
\code{\link{sinasc_cache_status}()},
\code{\link{sinasc_clear_cache}()},
\code{\link{sinasc_dictionary}()},
\code{\link{sinasc_info}()},
\code{\link{sinasc_variables}()},
\code{\link{sinasc_years}()}
}
\concept{sinasc}
