% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sinan.R
\name{sinan_data}
\alias{sinan_data}
\title{Download SINAN Notifiable Disease Microdata}
\usage{
sinan_data(
  year,
  disease = "DENG",
  vars = NULL,
  parse = TRUE,
  col_types = NULL,
  cache = TRUE,
  cache_dir = NULL,
  lazy = FALSE,
  backend = c("arrow", "duckdb")
)
}
\arguments{
\item{year}{Integer. Year(s) of the data. Required.}

\item{disease}{Character. Disease code to download. Default: \code{"DENG"}
(Dengue). Use \code{\link[=sinan_diseases]{sinan_diseases()}} to see all available codes.}

\item{vars}{Character vector. Variables to keep. If NULL (default),
returns all available variables. Use \code{\link[=sinan_variables]{sinan_variables()}} to see
available variables.}

\item{parse}{Logical. If TRUE (default), converts columns to
appropriate types (integer, double, Date) based on the variable
metadata. Use \code{\link[=sinan_variables]{sinan_variables()}} to see the target type for each
variable. Set to FALSE for backward-compatible all-character output.}

\item{col_types}{Named list. Override the default type for specific
columns. Names are column names, values are type strings:
\code{"character"}, \code{"integer"}, \code{"double"},
\code{"date_dmy"}, \code{"date_ymd"}, \code{"date_ym"}, \code{"date"}.
Example: \code{list(DT_NOTIFIC = "character")} to keep DT_NOTIFIC as character.}

\item{cache}{Logical. If TRUE (default), caches downloaded data for
faster future access.}

\item{cache_dir}{Character. Directory for caching. Default:
\code{tools::R_user_dir("healthbR", "cache")}.}

\item{lazy}{Logical. If TRUE, returns a lazy query object instead of a
tibble. Requires the \pkg{arrow} package. The lazy object supports
dplyr verbs (filter, select, mutate, etc.) which are pushed down
to the query engine before collecting into memory. Call
\code{dplyr::collect()} to materialize the result. Default: FALSE.}

\item{backend}{Character. Backend for lazy evaluation: \code{"arrow"}
(default) or \code{"duckdb"}. Only used when \code{lazy = TRUE}.
DuckDB backend requires the \pkg{duckdb} package.}
}
\value{
A tibble with notifiable disease microdata. Includes columns
\code{year} and \code{disease} to identify the source when multiple years are
combined.
}
\description{
Downloads and returns notifiable disease microdata from DATASUS FTP.
Each row represents one notification record (Ficha de Notificacao).
Data is downloaded as national .dbc files (one file per disease per year),
decompressed internally, and returned as a tibble.
}
\details{
SINAN files are national (not per-state). Each file contains all
notifications for a given disease in a given year across all of Brazil.
To filter by state, use the \code{SG_UF_NOT} (UF of notification) or
\code{ID_MUNICIP} (municipality code) columns after download.

Data is downloaded from DATASUS FTP as .dbc files. The .dbc format is
decompressed internally using vendored C code from the blast library.
No external dependencies are required.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# dengue notifications, 2022
dengue_2022 <- sinan_data(year = 2022)

# tuberculosis, 2020-2022
tb <- sinan_data(year = 2020:2022, disease = "TUBE")

# only key variables
sinan_data(year = 2022, disease = "DENG",
           vars = c("DT_NOTIFIC", "CS_SEXO", "NU_IDADE_N",
                    "CS_RACA", "ID_MUNICIP", "CLASSI_FIN"))
\dontshow{\}) # examplesIf}
}
\seealso{
Other sinan: 
\code{\link{sinan_cache_status}()},
\code{\link{sinan_clear_cache}()},
\code{\link{sinan_dictionary}()},
\code{\link{sinan_diseases}()},
\code{\link{sinan_info}()},
\code{\link{sinan_variables}()},
\code{\link{sinan_years}()}
}
\concept{sinan}
