% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_qlm_coded.R
\name{as_qlm_coded}
\alias{as_qlm_coded}
\alias{as_qlm_coded.data.frame}
\alias{as_qlm_coded.default}
\title{Convert coded data to qlm_coded format}
\usage{
as_qlm_coded(
  x,
  id,
  name = NULL,
  is_gold = FALSE,
  codebook = NULL,
  texts = NULL,
  notes = NULL,
  metadata = list()
)

\method{as_qlm_coded}{data.frame}(
  x,
  id,
  name = NULL,
  is_gold = FALSE,
  codebook = NULL,
  texts = NULL,
  notes = NULL,
  metadata = list()
)

\method{as_qlm_coded}{default}(
  x,
  id,
  name = NULL,
  is_gold = FALSE,
  codebook = NULL,
  texts = NULL,
  notes = NULL,
  metadata = list()
)
}
\arguments{
\item{x}{A data frame or quanteda corpus object containing coded data.
For data frames: Must include a column with unit identifiers (default
\code{".id"}).
For corpus objects: Document variables (docvars) are treated as coded
variables, and document names are used as identifiers by default.}

\item{id}{For data frames: Name of the column containing unit identifiers
(supports both quoted and unquoted). Default is \code{NULL}, which looks for a
column named \code{".id"}. Can be an unquoted column name (\code{id = doc_id}) or a
quoted string (\code{id = "doc_id"}). For corpus objects: \code{NULL} (default) uses
document names from \code{names(x)}, or specify a docvar name (quoted or
unquoted) to use as identifiers.}

\item{name}{Character. a string identifying this coding run (e.g., "Coder_A",
"expert_rater", "Gold_Standard"). Default is \code{NULL}.}

\item{is_gold}{Logical. If \code{TRUE}, marks this object as a gold standard for
automatic detection by \code{\link[=qlm_validate]{qlm_validate()}}. When a gold standard object is
passed to \code{qlm_validate()}, the \verb{gold =} parameter becomes optional.
Default is \code{FALSE}.}

\item{codebook}{Optional list containing coding instructions. Can include:
\describe{
\item{\code{name}}{Name of the coding scheme}
\item{\code{instructions}}{Text describing coding instructions}
\item{\code{schema}}{NULL (not used for human coding)}
}
If \code{NULL} (default), a minimal placeholder codebook is created.}

\item{texts}{Optional vector of original texts or data that were coded.
Should correspond to the \code{.id} values in \code{data}. If provided, enables
more complete provenance tracking.}

\item{notes}{Optional character string with descriptive notes about this
coding. Useful for documenting details when viewing results in
\code{\link[=qlm_trail]{qlm_trail()}}. Default is \code{NULL}.}

\item{metadata}{Optional list of metadata about the coding process. Can
include any relevant information such as:
\describe{
\item{\code{coder_name}}{Name of the human coder}
\item{\code{coder_id}}{Identifier for the coder}
\item{\code{training}}{Description of coder training}
\item{\code{date}}{Date of coding}
}
The function automatically adds \code{timestamp}, \code{n_units}, \code{notes}, and
\code{source = "human"}.}
}
\value{
A \code{qlm_coded} object (tibble with additional class and attributes)
for provenance tracking. When \code{is_gold = TRUE}, the object is marked as
a gold standard in its attributes.
}
\description{
Converts a data frame or quanteda corpus of coded data (human-coded or from
external sources) into a \code{qlm_coded} object. This enables provenance tracking
and integration with \code{qlm_compare()}, \code{qlm_validate()}, and \code{qlm_trail()} for
coded data alongside LLM-coded results.
}
\details{
When printed, objects created with \code{as_qlm_coded()} display "Source: Human coder"
instead of model information, clearly distinguishing human from LLM coding.
\subsection{Gold Standards}{

Objects marked with \code{is_gold = TRUE} are automatically detected by
\code{\link[=qlm_validate]{qlm_validate()}}, allowing simpler syntax:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{# With is_gold = TRUE
gold <- as_qlm_coded(gold_data, name = "Expert", is_gold = TRUE)
qlm_validate(coded1, coded2, gold, by = "sentiment")  # gold = not needed!

# Without is_gold (or explicit gold =)
gold <- as_qlm_coded(gold_data, name = "Expert")
qlm_validate(coded1, coded2, gold = gold, by = "sentiment")
}\if{html}{\out{</div>}}
}
}
\examples{
# Basic usage with data frame (default .id column)
human_data <- data.frame(
  .id = 1:10,
  sentiment = sample(c("pos", "neg"), 10, replace = TRUE)
)

coder_a <- as_qlm_coded(human_data, name = "Coder_A")
coder_a

# Use custom id column with NSE (unquoted)
data_with_custom_id <- data.frame(
  doc_id = 1:10,
  sentiment = sample(c("pos", "neg"), 10, replace = TRUE)
)
coder_custom <- as_qlm_coded(data_with_custom_id, id = doc_id, name = "Coder_C")

# Or use quoted string
coder_custom2 <- as_qlm_coded(data_with_custom_id, id = "doc_id", name = "Coder_D")

# Create a gold standard from data frame
gold <- as_qlm_coded(
  human_data,
  name = "Expert",
  is_gold = TRUE
)

# Validate with automatic gold detection
coder_b_data <- data.frame(
  .id = 1:10,
  sentiment = sample(c("pos", "neg"), 10, replace = TRUE)
)
coder_b <- as_qlm_coded(coder_b_data, name = "Coder_B")

# No need for gold = when gold object is marked (NSE works for 'by' too)
qlm_validate(coder_a, coder_b, gold = gold, by = sentiment, level = "nominal")

# Create from corpus object (simplified workflow)
data("data_corpus_manifsentsUK2010sample")
crowd <- as_qlm_coded(
  data_corpus_manifsentsUK2010sample,
  is_gold = TRUE
)
# Document names automatically become .id, all docvars included

# Use a docvar as identifier with NSE (unquoted)
crowd_party <- as_qlm_coded(
  data_corpus_manifsentsUK2010sample,
  id = party,
  is_gold = TRUE
)

# Or use quoted string
crowd_party2 <- as_qlm_coded(
  data_corpus_manifsentsUK2010sample,
  id = "party",
  is_gold = TRUE
)

# With complete metadata
expert <- as_qlm_coded(
  human_data,
  name = "expert_rater",
  is_gold = TRUE,
  codebook = list(
    name = "Sentiment Analysis",
    instructions = "Code overall sentiment as positive or negative"
  ),
  metadata = list(
    coder_name = "Dr. Smith",
    coder_id = "EXP001",
    training = "5 years experience",
    date = "2024-01-15"
  )
)

}
\seealso{
\code{\link[=qlm_code]{qlm_code()}} for LLM coding, \code{\link[=qlm_compare]{qlm_compare()}} for inter-rater reliability,
\code{\link[=qlm_validate]{qlm_validate()}} for validation against gold standards, \code{\link[=qlm_trail]{qlm_trail()}} for
provenance tracking.
}
