REDCap Data Import with Sardine
This vignette demonstrates how to import data into REDCap projects using the sardine package.
Project Setup
First, create your REDCap project object.
# Load environment and create project
load_env()
project <- redcap_project()Basic Data Import
Simple Record Import
# Create new data to import
new_records <- tibble(
record_id = c("101", "102", "103"),
age = c(25, 34, 28),
gender = c("Female", "Male", "Female"),
study_group = c("Treatment", "Control", "Treatment")
)
# Import the data with safety checks
result <- project$import(new_records)
# Import checks if cache is stale and analyzes changes
# By default, overwrites are blocked (overwrite = FALSE)
print(result)Import with Safety Features
The import method includes built-in safety features:
# Import detects cache staleness
result <- project$import(new_records)
# Warning: Project cache is outdated (6 minutes old)
# Import will show: X new records, Y updated fields
# Allow overwrites if needed
result <- project$import(new_records, overwrite = TRUE)
# Confirmation prompt will show what's being overwritten
# Skip confirmation for automated workflows
result <- project$import(new_records, overwrite = TRUE, force = TRUE)
# Save a detailed change report
result <- project$import(
new_records,
overwrite = TRUE,
save_changes = "import_changes.csv"
)Understanding Import Results
# Import returns API response
cat("Import API response:\n")
print(result)
# Check import details
cat("Status:", result$status_code, "\n")
# For detailed change tracking, save a report
project$import(
new_records,
overwrite = TRUE,
save_changes = "detailed_changes.csv"
)
# CSV contains: record_id, field_name, old_value, new_valueCache Management After Import
Import automatically refreshes the cache after successful import:
# Before import - cached data count
old_count <- nrow(project$data)
cat("Records before import:", old_count, "\n")
# Import new data
project$import(new_records)
# Import automatically refreshes cache
# Cache is now current
new_count <- nrow(project$data)
cat("Records after refresh:", new_count, "\n")
cat("New records added:", new_count - old_count, "\n")Import Options
Overwrite Protection
By default, the import method protects against accidental overwrites:
# Default: overwrites are blocked
project$import(updated_data)
# Error if any existing data would be overwritten
# Allow overwrites with confirmation
project$import(
updated_data,
overwrite = TRUE
)
# Shows: "About to overwrite 5 existing fields. Continue? [y/N]"
# Skip confirmation for scripts
project$import(
updated_data,
overwrite = TRUE,
force = TRUE
)Change Detection
Import analyzes what will change before committing:
# Import shows change summary
result <- project$import(updated_data, overwrite = TRUE)
# Output:
# - 3 new records will be created
# - 5 existing records will be updated
# - 12 fields will be overwritten
#
# First few changes:
# record_id | field | old_value | new_value
# 001 | age | 30 | 31
# 002 | status | Active | Completed
# ...
# Save complete change report
project$import(
updated_data,
overwrite = TRUE,
save_changes = "changes.csv"
)
# Review changes.csv before continuingUpdating Existing Records
Partial Updates
# Update specific fields for existing records
updates <- tibble(
record_id = c("001", "002"),
follow_up_date = c("2025-03-01", "2025-03-15"),
status = c("Completed", "Active")
)
# Import with overwrite enabled
project$import(updates, overwrite = TRUE)
# Confirms: "Update 2 records across 2 fields?"
# Verify updates in refreshed cache
updated_records <- project$data |>
dplyr::filter(record_id %in% c("001", "002")) |>
dplyr::select(record_id, follow_up_date, status)
print(updated_records)Bulk Updates
# Update multiple records with calculated values
existing_data <- project$data
# Calculate BMI for all records
bmi_updates <- existing_data %>%
filter(!is.na(height) & !is.na(weight)) %>%
mutate(
bmi = round(weight / (height/100)^2, 1)
) %>%
select(record_id, bmi)
# Import calculated BMI values
project$import(bmi_updates, overwrite = TRUE)
# Shows how many BMI fields will be updatedWorking with Longitudinal Projects
Event-Specific Imports
# For longitudinal projects, include redcap_event_name
longitudinal_data <- tibble(
record_id = c("001", "001", "002", "002"),
redcap_event_name = c("baseline_arm_1", "month_3_arm_1", "baseline_arm_1", "month_3_arm_1"),
weight = c(75.5, 73.2, 82.1, 80.8),
bp_systolic = c(120, 118, 135, 130)
)
project$import(longitudinal_data, overwrite = TRUE)
# Change detection works across eventsImport Validation
Pre-Import Checks
The import method includes automatic cache validation and change detection, but you can add custom validation:
# Function to validate data before import
validate_import_data <- function(data, project) {
errors <- character()
warnings <- character()
# Check required fields
if (!"record_id" %in% names(data)) {
errors <- c(errors, "record_id field is required")
}
# Check for duplicate records
if (any(duplicated(data$record_id))) {
warnings <- c(warnings, "Duplicate record_ids found")
}
# Check field names against project metadata
project_fields <- names(project$data)
unknown_fields <- setdiff(names(data), project_fields)
if (length(unknown_fields) > 0) {
warnings <- c(warnings, paste("Unknown fields:", paste(unknown_fields, collapse = ", ")))
}
# Check for reasonable values (example for age)
if ("age" %in% names(data)) {
invalid_ages <- data$age < 0 | data$age > 120
if (any(invalid_ages, na.rm = TRUE)) {
warnings <- c(warnings, "Some age values seem unreasonable")
}
}
list(errors = errors, warnings = warnings)
}
# Validate before import
validation_result <- validate_import_data(new_records, project)
if (length(validation_result$errors) > 0) {
cat("Errors (must fix):\n")
cat(paste("- ", validation_result$errors, collapse = "\n"), "\n")
}
if (length(validation_result$warnings) > 0) {
cat("Warnings (review):\n")
cat(paste("- ", validation_result$warnings, collapse = "\n"), "\n")
}
# Only import if no errors
if (length(validation_result$errors) == 0) {
project$import(new_records)
}Post-Import Verification
# Import automatically refreshes cache, so verification is immediate
verify_import <- function(project, imported_data) {
# Check that records exist
imported_ids <- imported_data$record_id
existing_ids <- project$data$record_id
missing_records <- setdiff(imported_ids, existing_ids)
if (length(missing_records) > 0) {
cat("Warning: These records were not found after import:\n")
cat(paste(missing_records, collapse = ", "), "\n")
} else {
cat("Success: All records imported successfully\n")
}
# Check field values for a sample
sample_record <- imported_data[1, ]
actual_record <- project$data[project$data$record_id == sample_record$record_id[1], ]
cat("Sample verification for record", sample_record$record_id[1], ":\n")
for (field in names(sample_record)[-1]) { # Skip record_id
if (field %in% names(actual_record)) {
expected <- sample_record[[field]][1]
actual <- actual_record[[field]][1]
match <- identical(expected, actual)
cat(" ", field, ": ", expected, " -> ", actual, " (",
ifelse(match, "✓", "✗"), ")\n", sep = "")
}
}
}
# Use after import
project$import(new_records)
verify_import(project, new_records)Common Import Scenarios
Importing Survey Data
# Import survey responses
survey_responses <- tibble(
record_id = c("001", "002", "003"),
satisfaction_score = c(8, 9, 7),
would_recommend = c(1, 1, 0), # 1 = Yes, 0 = No
comments = c("Great experience", "Very helpful", "Could be better"),
survey_complete = c(2, 2, 2) # 2 = Complete
)
# Import with overwrite to update existing survey responses
project$import(survey_responses, overwrite = TRUE)Importing Calculated Fields
# Import derived/calculated values
calculated_data <- project$data %>%
mutate(
# Calculate risk score
risk_score = case_when(
age >= 65 ~ 3,
age >= 50 ~ 2,
age >= 35 ~ 1,
TRUE ~ 0
) + case_when(
smoking_status == "Current" ~ 2,
smoking_status == "Former" ~ 1,
TRUE ~ 0
),
# Categorize BMI
bmi_category = case_when(
bmi < 18.5 ~ "Underweight",
bmi < 25 ~ "Normal",
bmi < 30 ~ "Overweight",
TRUE ~ "Obese"
)
) %>%
select(record_id, risk_score, bmi_category)
# Import calculated values with change report
project$import(
calculated_data,
overwrite = TRUE,
save_changes = "calculated_updates.csv"
)Importing External Data
# Import data from external sources (CSV, database, etc.)
# Example: Lab results from external system
external_data <- read.csv("lab_results.csv") %>%
# Match to REDCap record IDs
rename(record_id = patient_id) %>%
# Select and rename fields to match REDCap
select(
record_id,
hemoglobin = hgb_value,
cholesterol = chol_total,
lab_date = collection_date
) %>%
# Format dates
mutate(lab_date = format(as.Date(lab_date), "%Y-%m-%d"))
# Validate before import
validation <- validate_import_data(external_data, project)
if (length(validation$errors) == 0) {
project$import(external_data, overwrite = TRUE)
}Error Handling
Handling Import Failures
# Robust import with error handling
safe_import <- function(project, data, max_retries = 3) {
for (attempt in 1:max_retries) {
tryCatch({
result <- project$import(data, overwrite = TRUE, force = TRUE)
cat("Import successful on attempt", attempt, "\n")
return(result)
}, error = function(e) {
cat("Import attempt", attempt, "failed:", e$message, "\n")
if (attempt == max_retries) {
stop("Import failed after", max_retries, "attempts")
}
# Wait before retry
Sys.sleep(2)
})
}
}
# Use safe import
result <- safe_import(project, new_records)Handling Large Imports
# Import large datasets in chunks
import_in_chunks <- function(project, data, chunk_size = 100) {
n_records <- nrow(data)
n_chunks <- ceiling(n_records / chunk_size)
cat("Importing", n_records, "records in", n_chunks, "chunks\n")
results <- list()
for (i in 1:n_chunks) {
start_idx <- (i - 1) * chunk_size + 1
end_idx <- min(i * chunk_size, n_records)
chunk_data <- data[start_idx:end_idx, ]
cat("Importing chunk", i, "of", n_chunks,
"(records", start_idx, "to", end_idx, ")\n")
result <- project$import(chunk_data, overwrite = TRUE, force = TRUE)
results[[i]] <- result
# Brief pause between chunks
if (i < n_chunks) Sys.sleep(1)
}
cat("All chunks imported successfully\n")
return(results)
}
# Use for large datasets
# results <- import_in_chunks(project, large_dataset)Best Practices
1. Always Validate Before Import
# Check data quality before import
data_quality_check <- function(data) {
issues <- list()
# Check for missing record IDs
if (any(is.na(data$record_id) | data$record_id == "")) {
issues$missing_ids <- "Some records have missing record_id"
}
# Check for reasonable data ranges
# Add your project-specific checks here
return(issues)
}
# Import includes automatic validation
# - Cache staleness check (warns if >5 minutes old)
# - Change detection (shows what will be modified)
# - Overwrite protection (blocks by default)2. Use Safety Features
# Import automatically refreshes cache
project$import(data, overwrite = TRUE)
# Save change reports for audit trails
project$import(
data,
overwrite = TRUE,
save_changes = "audit_trail.csv"
)
# Review reports before proceeding
changes <- read.csv("audit_trail.csv")
cat("Fields to be modified:", nrow(changes), "\n")3. Keep Import Logs
# Log all import activities
log_import <- function(project, data, result) {
log_entry <- list(
timestamp = Sys.time(),
records_imported = nrow(data),
import_result = result,
fields = names(data)
)
# Save to log file or database
# saveRDS(log_entry, paste0("import_log_", Sys.Date(), ".rds"))
}
# Or use built-in change reports
project$import(
data,
overwrite = TRUE,
save_changes = paste0("import_", Sys.Date(), ".csv")
)4. Test with Small Batches First
# Test import process with small sample
test_data <- head(large_dataset, 5)
test_result <- project$import(test_data, overwrite = TRUE)
# Review what changed
cat("Test import successful\n")
cat("Records in cache:", nrow(project$data), "\n")
# Proceed with full import
full_result <- project$import(
large_dataset,
overwrite = TRUE,
save_changes = "full_import_changes.csv"
)Summary
Key points for REDCap data import with sardine:
- Cache validation: Import checks if cached data is stale (>5 minutes old)
- Automatic refresh: Cache is automatically refreshed after successful import
-
Overwrite protection:
overwrite = FALSEby default prevents accidental data loss - Change detection: See exactly what will be modified before confirming
- Interactive confirmation: Import asks for approval before overwriting data
-
Change reports: Save detailed CSV reports of all
modifications (
save_changesparameter) -
Safety by default: Use
force = TRUEonly in automated workflows - Custom validation: Add project-specific checks before import
- Error handling: Use try-catch and chunking for robust imports
The sardine package makes REDCap data import straightforward while helping you maintain data integrity and consistency.
