Skip to contents

Why Use HDF5 for Neuroimaging?

Working with large fMRI datasets can quickly exhaust your computer’s memory. The fmristore package solves this problem by storing your data in HDF5 format, which:

  • Saves memory: Data stays on disk and is only loaded when you access it
  • Works seamlessly: Behaves just like regular R arrays
  • Scales effortlessly: Handle datasets larger than your RAM

Quick Start: 3D Brain Volumes

Let’s start with a simple example - storing a single brain volume:

# Create a small brain volume (5x5x3 voxels)
brain_data <- array(rnorm(75), dim = c(5, 5, 3))
brain_vol <- NeuroVol(brain_data, NeuroSpace(c(5, 5, 3)))

# Save to HDF5 format
h5_file <- tempfile(fileext = ".h5")
h5_brain <- as_h5(brain_vol, file = h5_file)
#> Successfully wrote NeuroVol to: /tmp/RtmpNAYJ3C/file21ec1a495cc5.h5

# Access works just like a regular array!
print(h5_brain[1:2, 1:2, 1])
#>            [,1]      [,2]
#> [1,] -1.4000435  1.148412
#> [2,]  0.2553171 -1.821818

# Don't forget to close when done
close(h5_brain)

Working with fMRI Time Series (4D Data)

Most fMRI data includes a time dimension. Here’s how to handle 4D data efficiently:

# Create fMRI data: 10x10x5 brain, 20 time points
fmri_data <- array(rnorm(10 * 10 * 5 * 20), dim = c(10, 10, 5, 20))
fmri_vec <- NeuroVec(fmri_data, NeuroSpace(c(10, 10, 5, 20)))

# Convert to HDF5
h5_file <- tempfile(fileext = ".h5")
h5_fmri <- as_h5(fmri_vec, file = h5_file)

# Extract time series for a single voxel
voxel_timeseries <- series(h5_fmri, i = 5, j = 5, k = 3)
plot(voxel_timeseries, type = "l",
  main = "fMRI Time Series for Voxel [5,5,3]",
  xlab = "Time", ylab = "Signal")


# Extract a single time point (3D volume)
volume_t10 <- h5_fmri[, , , 10]
print(paste("Volume at time 10 has dimensions:",
  paste(dim(volume_t10), collapse = "x")))
#> [1] "Volume at time 10 has dimensions: 10x10x5"

close(h5_fmri)

Loading Existing HDF5 Files

If you have HDF5 files created by fmristore or compatible tools:

# First, let's create an example file
example_data <- NeuroVol(array(1:27, dim = c(3, 3, 3)),
  NeuroSpace(c(3, 3, 3)))
h5_file <- tempfile(fileext = ".h5")
temp_h5 <- as_h5(example_data, file = h5_file)
#> Successfully wrote NeuroVol to: /tmp/RtmpNAYJ3C/file21ec1ce5a25a.h5
close(temp_h5)

# Now load it back
h5_brain <- H5NeuroVol(file_name = h5_file)

# Work with it naturally
print(h5_brain[, , 2])  # Get slice 2
#>      [,1] [,2] [,3]
#> [1,]   10   13   16
#> [2,]   11   14   17
#> [3,]   12   15   18
print(dim(h5_brain))  # Check dimensions
#> [1] 3 3 3

close(h5_brain)
unlink(h5_file)

Best Practices

1. Always Close Your Files

h5_data <- as_h5(my_data, file = "output.h5")
# ... do your work ...
close(h5_data)  # Essential!

2. Use Chunk Sizes Wisely

For better performance with large datasets:

# Optimize for accessing full volumes at each time point
h5_data <- as_h5(my_4d_data,
  file = "output.h5",
  chunk_dims = c(64, 64, 40, 1))

3. Memory vs Speed Trade-off

  • Small frequent access: HDF5 is perfect
  • Whole dataset operations: Consider loading into memory if it fits

Common Use Cases

Preprocessing Large Datasets

# Create example data
large_fmri <- NeuroVec(array(rnorm(50 * 50 * 30 * 100), c(50, 50, 30, 100)),
  NeuroSpace(c(50, 50, 30, 100)))
h5_file <- tempfile(fileext = ".h5")
h5_data <- as_h5(large_fmri, file = h5_file)

# Process one volume at a time (memory efficient)
n_timepoints <- dim(h5_data)[4]
means <- numeric(n_timepoints)

for (t in 1:n_timepoints) {
  volume_t <- h5_data[, , , t]
  means[t] <- mean(volume_t)
}

plot(means, type = "l",
  main = "Mean Brain Activity Over Time",
  xlab = "Time", ylab = "Mean Signal")


close(h5_data)
unlink(h5_file)

Region of Interest Analysis

# Create example data with a specific ROI
brain_data <- array(rnorm(20 * 20 * 10 * 50), c(20, 20, 10, 50))
roi_mask <- array(FALSE, c(20, 20, 10))
roi_mask[8:12, 8:12, 4:6] <- TRUE  # Define ROI

fmri_vec <- NeuroVec(brain_data, NeuroSpace(c(20, 20, 10, 50)))
h5_file <- tempfile(fileext = ".h5")
h5_fmri <- as_h5(fmri_vec, file = h5_file)

# Extract mean time series from ROI
roi_indices <- which(roi_mask, arr.ind = TRUE)
roi_timeseries <- matrix(0, nrow = nrow(roi_indices), ncol = 50)

for (i in 1:nrow(roi_indices)) {
  roi_timeseries[i, ] <- series(h5_fmri,
    i = roi_indices[i, 1],
    j = roi_indices[i, 2],
    k = roi_indices[i, 3])
}

mean_roi <- colMeans(roi_timeseries)
plot(mean_roi, type = "l",
  main = "Average ROI Time Series",
  xlab = "Time", ylab = "Signal")


close(h5_fmri)
unlink(h5_file)

Summary

The fmristore package makes working with large neuroimaging datasets simple:

  1. Convert your data to HDF5 with as_h5()
  2. Access it like regular R arrays
  3. Close when done

No more memory errors, no complicated code - just efficient neuroimaging data analysis!