REDCap Data Import • sardine

REDCap Data Import with Sardine

This vignette demonstrates how to import data into REDCap projects using the sardine package.

Setup

library(sardine)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tibble)

Project Setup

First, create your REDCap project object.

# Load environment and create project
load_env()
project <- redcap_project()

Basic Data Import

Simple Record Import

# Create new data to import
new_records <- tibble(
  record_id = c("101", "102", "103"),
  age = c(25, 34, 28),
  gender = c("Female", "Male", "Female"),
  study_group = c("Treatment", "Control", "Treatment")
)

# Import the data with safety checks
result <- project$import(new_records)
# Import checks if cache is stale and analyzes changes
# By default, overwrites are blocked (overwrite = FALSE)

print(result)

Import with Safety Features

The import method includes built-in safety features:

# Import detects cache staleness
result <- project$import(new_records)
# Warning: Project cache is outdated (6 minutes old)
# Import will show: X new records, Y updated fields

# Allow overwrites if needed
result <- project$import(new_records, overwrite = TRUE)
# Confirmation prompt will show what's being overwritten

# Skip confirmation for automated workflows
result <- project$import(new_records, overwrite = TRUE, force = TRUE)

# Save a detailed change report
result <- project$import(
  new_records,
  overwrite = TRUE,
  save_changes = "import_changes.csv"
)

Understanding Import Results

# Import returns API response
cat("Import API response:\n")
print(result)

# Check import details
cat("Status:", result$status_code, "\n")

# For detailed change tracking, save a report
project$import(
  new_records,
  overwrite = TRUE,
  save_changes = "detailed_changes.csv"
)
# CSV contains: record_id, field_name, old_value, new_value

Cache Management After Import

Import automatically refreshes the cache after successful import:

# Before import - cached data count
old_count <- nrow(project$data)
cat("Records before import:", old_count, "\n")

# Import new data
project$import(new_records)
# Import automatically refreshes cache

# Cache is now current
new_count <- nrow(project$data)
cat("Records after refresh:", new_count, "\n")
cat("New records added:", new_count - old_count, "\n")

Import Options

Overwrite Protection

By default, the import method protects against accidental overwrites:

# Default: overwrites are blocked
project$import(updated_data)
# Error if any existing data would be overwritten

# Allow overwrites with confirmation
project$import(
  updated_data,
  overwrite = TRUE
)
# Shows: "About to overwrite 5 existing fields. Continue? [y/N]"

# Skip confirmation for scripts
project$import(
  updated_data,
  overwrite = TRUE,
  force = TRUE
)

Change Detection

Import analyzes what will change before committing:

# Import shows change summary
result <- project$import(updated_data, overwrite = TRUE)
# Output:
# - 3 new records will be created
# - 5 existing records will be updated
# - 12 fields will be overwritten
# 
# First few changes:
# record_id | field      | old_value | new_value
# 001       | age        | 30        | 31
# 002       | status     | Active    | Completed
# ...

# Save complete change report
project$import(
  updated_data,
  overwrite = TRUE,
  save_changes = "changes.csv"
)
# Review changes.csv before continuing

Updating Existing Records

Partial Updates

# Update specific fields for existing records
updates <- tibble(
  record_id = c("001", "002"),
  follow_up_date = c("2025-03-01", "2025-03-15"),
  status = c("Completed", "Active")
)

# Import with overwrite enabled
project$import(updates, overwrite = TRUE)
# Confirms: "Update 2 records across 2 fields?"

# Verify updates in refreshed cache
updated_records <- project$data |>
  dplyr::filter(record_id %in% c("001", "002")) |>
  dplyr::select(record_id, follow_up_date, status)
print(updated_records)

Bulk Updates

# Update multiple records with calculated values
existing_data <- project$data

# Calculate BMI for all records
bmi_updates <- existing_data %>%
  filter(!is.na(height) & !is.na(weight)) %>%
  mutate(
    bmi = round(weight / (height/100)^2, 1)
  ) %>%
  select(record_id, bmi)

# Import calculated BMI values
project$import(bmi_updates, overwrite = TRUE)
# Shows how many BMI fields will be updated

Working with Longitudinal Projects

Event-Specific Imports

# For longitudinal projects, include redcap_event_name
longitudinal_data <- tibble(
  record_id = c("001", "001", "002", "002"),
  redcap_event_name = c("baseline_arm_1", "month_3_arm_1", "baseline_arm_1", "month_3_arm_1"),
  weight = c(75.5, 73.2, 82.1, 80.8),
  bp_systolic = c(120, 118, 135, 130)
)

project$import(longitudinal_data, overwrite = TRUE)
# Change detection works across events

Import Validation

Pre-Import Checks

The import method includes automatic cache validation and change detection, but you can add custom validation:

# Function to validate data before import
validate_import_data <- function(data, project) {
  errors <- character()
  warnings <- character()
  
  # Check required fields
  if (!"record_id" %in% names(data)) {
    errors <- c(errors, "record_id field is required")
  }
  
  # Check for duplicate records
  if (any(duplicated(data$record_id))) {
    warnings <- c(warnings, "Duplicate record_ids found")
  }
  
  # Check field names against project metadata
  project_fields <- names(project$data)
  unknown_fields <- setdiff(names(data), project_fields)
  if (length(unknown_fields) > 0) {
    warnings <- c(warnings, paste("Unknown fields:", paste(unknown_fields, collapse = ", ")))
  }
  
  # Check for reasonable values (example for age)
  if ("age" %in% names(data)) {
    invalid_ages <- data$age < 0 | data$age > 120
    if (any(invalid_ages, na.rm = TRUE)) {
      warnings <- c(warnings, "Some age values seem unreasonable")
    }
  }
  
  list(errors = errors, warnings = warnings)
}

# Validate before import
validation_result <- validate_import_data(new_records, project)

if (length(validation_result$errors) > 0) {
  cat("Errors (must fix):\n")
  cat(paste("- ", validation_result$errors, collapse = "\n"), "\n")
}

if (length(validation_result$warnings) > 0) {
  cat("Warnings (review):\n")
  cat(paste("- ", validation_result$warnings, collapse = "\n"), "\n")
}

# Only import if no errors
if (length(validation_result$errors) == 0) {
  project$import(new_records)
}

Post-Import Verification

# Import automatically refreshes cache, so verification is immediate
verify_import <- function(project, imported_data) {
  # Check that records exist
  imported_ids <- imported_data$record_id
  existing_ids <- project$data$record_id
  
  missing_records <- setdiff(imported_ids, existing_ids)
  
  if (length(missing_records) > 0) {
    cat("Warning: These records were not found after import:\n")
    cat(paste(missing_records, collapse = ", "), "\n")
  } else {
    cat("Success: All records imported successfully\n")
  }
  
  # Check field values for a sample
  sample_record <- imported_data[1, ]
  actual_record <- project$data[project$data$record_id == sample_record$record_id[1], ]
  
  cat("Sample verification for record", sample_record$record_id[1], ":\n")
  for (field in names(sample_record)[-1]) {  # Skip record_id
    if (field %in% names(actual_record)) {
      expected <- sample_record[[field]][1]
      actual <- actual_record[[field]][1]
      match <- identical(expected, actual)
      cat("  ", field, ": ", expected, " -> ", actual, " (", 
          ifelse(match, "✓", "✗"), ")\n", sep = "")
    }
  }
}

# Use after import
project$import(new_records)
verify_import(project, new_records)

Common Import Scenarios

Importing Survey Data

# Import survey responses
survey_responses <- tibble(
  record_id = c("001", "002", "003"),
  satisfaction_score = c(8, 9, 7),
  would_recommend = c(1, 1, 0),  # 1 = Yes, 0 = No
  comments = c("Great experience", "Very helpful", "Could be better"),
  survey_complete = c(2, 2, 2)  # 2 = Complete
)

# Import with overwrite to update existing survey responses
project$import(survey_responses, overwrite = TRUE)

Importing Calculated Fields

# Import derived/calculated values
calculated_data <- project$data %>%
  mutate(
    # Calculate risk score
    risk_score = case_when(
      age >= 65 ~ 3,
      age >= 50 ~ 2,
      age >= 35 ~ 1,
      TRUE ~ 0
    ) + case_when(
      smoking_status == "Current" ~ 2,
      smoking_status == "Former" ~ 1,
      TRUE ~ 0
    ),
    # Categorize BMI
    bmi_category = case_when(
      bmi < 18.5 ~ "Underweight",
      bmi < 25 ~ "Normal",
      bmi < 30 ~ "Overweight",
      TRUE ~ "Obese"
    )
  ) %>%
  select(record_id, risk_score, bmi_category)

# Import calculated values with change report
project$import(
  calculated_data,
  overwrite = TRUE,
  save_changes = "calculated_updates.csv"
)

Importing External Data

# Import data from external sources (CSV, database, etc.)
# Example: Lab results from external system
external_data <- read.csv("lab_results.csv") %>%
  # Match to REDCap record IDs
  rename(record_id = patient_id) %>%
  # Select and rename fields to match REDCap
  select(
    record_id,
    hemoglobin = hgb_value,
    cholesterol = chol_total,
    lab_date = collection_date
  ) %>%
  # Format dates
  mutate(lab_date = format(as.Date(lab_date), "%Y-%m-%d"))

# Validate before import
validation <- validate_import_data(external_data, project)
if (length(validation$errors) == 0) {
  project$import(external_data, overwrite = TRUE)
}

Error Handling

Handling Import Failures

# Robust import with error handling
safe_import <- function(project, data, max_retries = 3) {
  for (attempt in 1:max_retries) {
    tryCatch({
      result <- project$import(data, overwrite = TRUE, force = TRUE)
      cat("Import successful on attempt", attempt, "\n")
      return(result)
    }, error = function(e) {
      cat("Import attempt", attempt, "failed:", e$message, "\n")
      if (attempt == max_retries) {
        stop("Import failed after", max_retries, "attempts")
      }
      # Wait before retry
      Sys.sleep(2)
    })
  }
}

# Use safe import
result <- safe_import(project, new_records)

Handling Large Imports

# Import large datasets in chunks
import_in_chunks <- function(project, data, chunk_size = 100) {
  n_records <- nrow(data)
  n_chunks <- ceiling(n_records / chunk_size)
  
  cat("Importing", n_records, "records in", n_chunks, "chunks\n")
  
  results <- list()
  
  for (i in 1:n_chunks) {
    start_idx <- (i - 1) * chunk_size + 1
    end_idx <- min(i * chunk_size, n_records)
    
    chunk_data <- data[start_idx:end_idx, ]
    
    cat("Importing chunk", i, "of", n_chunks, 
        "(records", start_idx, "to", end_idx, ")\n")
    
    result <- project$import(chunk_data, overwrite = TRUE, force = TRUE)
    results[[i]] <- result
    
    # Brief pause between chunks
    if (i < n_chunks) Sys.sleep(1)
  }
  
  cat("All chunks imported successfully\n")
  
  return(results)
}

# Use for large datasets
# results <- import_in_chunks(project, large_dataset)

Best Practices

1. Always Validate Before Import

# Check data quality before import
data_quality_check <- function(data) {
  issues <- list()
  
  # Check for missing record IDs
  if (any(is.na(data$record_id) | data$record_id == "")) {
    issues$missing_ids <- "Some records have missing record_id"
  }
  
  # Check for reasonable data ranges
  # Add your project-specific checks here
  
  return(issues)
}

# Import includes automatic validation
# - Cache staleness check (warns if >5 minutes old)
# - Change detection (shows what will be modified)
# - Overwrite protection (blocks by default)

2. Use Safety Features

# Import automatically refreshes cache
project$import(data, overwrite = TRUE)

# Save change reports for audit trails
project$import(
  data,
  overwrite = TRUE,
  save_changes = "audit_trail.csv"
)

# Review reports before proceeding
changes <- read.csv("audit_trail.csv")
cat("Fields to be modified:", nrow(changes), "\n")

3. Keep Import Logs

# Log all import activities
log_import <- function(project, data, result) {
  log_entry <- list(
    timestamp = Sys.time(),
    records_imported = nrow(data),
    import_result = result,
    fields = names(data)
  )
  
  # Save to log file or database
  # saveRDS(log_entry, paste0("import_log_", Sys.Date(), ".rds"))
}

# Or use built-in change reports
project$import(
  data,
  overwrite = TRUE,
  save_changes = paste0("import_", Sys.Date(), ".csv")
)

4. Test with Small Batches First

# Test import process with small sample
test_data <- head(large_dataset, 5)
test_result <- project$import(test_data, overwrite = TRUE)

# Review what changed
cat("Test import successful\n")
cat("Records in cache:", nrow(project$data), "\n")

# Proceed with full import
full_result <- project$import(
  large_dataset,
  overwrite = TRUE,
  save_changes = "full_import_changes.csv"
)

Summary

Key points for REDCap data import with sardine:

Cache validation: Import checks if cached data is stale (>5 minutes old)
Automatic refresh: Cache is automatically refreshed after successful import
Overwrite protection: overwrite = FALSE by default prevents accidental data loss
Change detection: See exactly what will be modified before confirming
Interactive confirmation: Import asks for approval before overwriting data
Change reports: Save detailed CSV reports of all modifications (save_changes parameter)
Safety by default: Use force = TRUE only in automated workflows
Custom validation: Add project-specific checks before import
Error handling: Use try-catch and chunking for robust imports

The sardine package makes REDCap data import straightforward while helping you maintain data integrity and consistency.