Write Clustered Experiment Data to HDF5 — write_clustered_experiment

Writes neuroimaging data structured according to the H5ClusterExperiment specification into an HDF5 file.

This function takes R objects representing the mask, cluster definitions, run-specific data (either full voxel-level or summary time series), and associated metadata, and creates the corresponding HDF5 groups and datasets.

Usage

write_clustered_experiment_h5(
  filepath,
  mask,
  clusters,
  runs_data,
  cluster_metadata = NULL,
  overwrite = FALSE,
  compress = TRUE,
  verbose = TRUE
)

Arguments

filepath

Character string: the path to the HDF5 file to create. If the file exists, it will be overwritten.

mask

A `LogicalNeuroVol` object representing the brain mask.

clusters

A `ClusteredNeuroVol` object containing cluster assignments for voxels within the mask.

runs_data

A list where each element represents a single run/scan. Each element must be a list containing:

`scan_name`: (character) Unique identifier for the scan.
`type`: (character) Either "full" or "summary".
`data`:
- If `type` is "full", `data` must be a list where names are `cluster_<cid>` (e.g., `cluster_1`, `cluster_2`) and values are matrices `[nVoxelsInCluster, nTime]` containing the time series for that cluster.
- If `type` is "summary", `data` must be a single matrix `[nTime, nClusters]` containing the summary time series.
`metadata`: (Optional) A list of key-value pairs for scan-specific metadata. Can include `n_time` explicitly, otherwise it's inferred from data.

cluster_metadata

(Optional) A `data.frame` containing metadata for the clusters. Must contain at least a column named `cluster_id` matching the unique IDs in `clusters`. Other columns will be written as part of a compound dataset.

overwrite

Logical: If `TRUE`, overwrite the file if it exists. Default `FALSE`.

compress

Logical: If `TRUE`, apply GZIP compression to data arrays. Default `TRUE`.

verbose

Logical: Print progress messages? Default `TRUE`.

Value

Invisibly returns `NULL`. Called for its side effect of creating the HDF5 file.

Examples

if (FALSE) { # \dontrun{
if (requireNamespace("neuroim2", quietly = TRUE) &&
    requireNamespace("hdf5r", quietly = TRUE) &&
    exists("write_clustered_experiment_h5", where = "package:fmristore") &&
    !is.null(fmristore:::create_minimal_LogicalNeuroVol) &&
    !is.null(fmristore:::create_minimal_ClusteredNeuroVol)) {

  temp_h5_file <- NULL
  
  tryCatch({
    # 1. Create a temporary file path
    temp_h5_file <- tempfile(fileext = ".h5")
    
    # 2. Create minimal mask and clusters using helpers
    mask_vol <- fmristore:::create_minimal_LogicalNeuroVol(dims = c(5L, 5L, 2L))
    # Ensure clusters are within the mask and have some content
    # Create clusters that align with the mask's space
    clust_vol <- fmristore:::create_minimal_ClusteredNeuroVol(
      space = neuroim2::space(mask_vol), # Use mask's space
      mask = mask_vol@.Data,            # Use mask's data
      num_clusters = 2L
    )
    
    # 3. Prepare minimal runs_data
    # Get cluster IDs and number of voxels per cluster from clust_vol
    unique_cids <- sort(unique(clust_vol@clusters[clust_vol@clusters > 0]))
    n_time_run1 <- 10L
    n_time_run2 <- 8L
    
    # Run 1: Full data type
    run1_data_list <- list()
    if (length(unique_cids) > 0) {
      for (cid in unique_cids) {
        n_vox_in_cluster <- sum(clust_vol@clusters == cid)
        if (n_vox_in_cluster > 0) {
           # Data: nVoxInCluster x nTime
          run1_data_list[[paste0("cluster_", cid)]] <- matrix(
            rnorm(n_vox_in_cluster * n_time_run1), 
            nrow = n_vox_in_cluster, 
            ncol = n_time_run1
          )
        }
      }
    }
    
    run1 <- list(
      scan_name = "ScanA_Full",
      type = "full",
      data = run1_data_list,
      metadata = list(subject_id = "sub-01", task = "rest", n_time = n_time_run1)
    )
    
    # Run 2: Summary data type
    # Data: nTime x nClusters
    run2_summary_matrix <- matrix(
      rnorm(n_time_run2 * length(unique_cids)), 
      nrow = n_time_run2, 
      ncol = length(unique_cids)
    )
    colnames(run2_summary_matrix) <- paste0("cluster_", unique_cids) # Optional: for clarity
    
    run2 <- list(
      scan_name = "ScanB_Summary",
      type = "summary",
      data = run2_summary_matrix,
      metadata = list(subject_id = "sub-01", task = "task", n_time = n_time_run2)
    )
    
    runs_data_list <- list(run1, run2)
    
    # 4. Prepare minimal cluster_metadata (optional)
    cluster_meta_df <- NULL
    if (length(unique_cids) > 0) {
      cluster_meta_df <- data.frame(
        cluster_id = unique_cids,
        name = paste0("Region_", LETTERS[1:length(unique_cids)]),
        size_vox = sapply(unique_cids, function(id) sum(clust_vol@clusters == id))
      )
    }
    
    # 5. Call the function
    write_clustered_experiment_h5(
      filepath = temp_h5_file,
      mask = mask_vol,
      clusters = clust_vol,
      runs_data = runs_data_list,
      cluster_metadata = cluster_meta_df,
      overwrite = TRUE,
      verbose = FALSE 
    )
    
    # Verify file was created
    if (file.exists(temp_h5_file)) {
      cat("Successfully wrote clustered experiment to:", temp_h5_file, "\\n")
      # Optional: Basic check of the HDF5 file structure
      # h5f <- hdf5r::H5File$new(temp_h5_file, mode="r")
      # print(h5f$ls(recursive=TRUE))
      # h5f$close_all()
    }
    
  }, error = function(e) {
    message("write_clustered_experiment_h5 example failed: ", e$message)
    if (!is.null(temp_h5_file)) message("Temporary file was: ", temp_h5_file)
  }, finally = {
    # Clean up temporary file
    if (!is.null(temp_h5_file) && file.exists(temp_h5_file)) {
      unlink(temp_h5_file)
    }
  })
} else {
  message("Skipping write_clustered_experiment_h5 example: dependencies or helpers not available.")
}
} # }