diff --git a/Project.toml b/Project.toml index cbb3c30..27a5ae5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "UnfoldBIDS" uuid = "b54e767b-1ebd-4480-ac2a-f5f4d8853074" authors = ["Rene Skukies", "Benedikt Ehinger"] -version = "0.3.4" +version = "0.4" [deps] Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" @@ -12,11 +12,16 @@ DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568" -PyMNE = "6c5003b2-cbe8-491c-a0d1-70088e6a0fd6" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" Unfold = "181c99d8-e21b-4ff3-b70b-c233eddec679" +[weakdeps] +PyMNE = "6c5003b2-cbe8-491c-a0d1-70088e6a0fd6" + +[extensions] +MNEext = "PyMNE" + [compat] Artifacts = "1" CSV = "0.10" diff --git a/docs/Project.toml b/docs/Project.toml index 341d7c0..8f29cf7 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -10,6 +10,7 @@ Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" +PyMNE = "6c5003b2-cbe8-491c-a0d1-70088e6a0fd6" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Unfold = "181c99d8-e21b-4ff3-b70b-c233eddec679" UnfoldBIDS = "b54e767b-1ebd-4480-ac2a-f5f4d8853074" diff --git a/docs/literate/HowTo/ApplyPreprocessing.jl b/docs/literate/HowTo/ApplyPreprocessing.jl index ba92d30..9157a28 100644 --- a/docs/literate/HowTo/ApplyPreprocessing.jl +++ b/docs/literate/HowTo/ApplyPreprocessing.jl @@ -16,19 +16,21 @@ # You can exchange this function through an arbitrary function (applying MNE processing as needed), as long as it takes the raw MNE data object and returns a pyconverted Julia Array containing the data stream. For example # # ```julia +# using PyMNE +# # function raw_to_filtered_data(raw; channels::AbstractVector{<:Union{String,Integer}}=[], l_freq=0.5, h_freq=45) # # # Load data into memory -# raw.load_data() +# loaded = raw.copy.load_data() # Make a copy to not modify original raw # # # Re-reference to mastoids and add Cz back in -# UnfoldBIDS.PyMNE.add_reference_channels(raw, ref_channels=UnfoldBIDS.pylist(["Cz"]), copy=false) -# raw.set_eeg_reference(ref_channels=UnfoldBIDS.pylist(["RM", "LM"])) +# PyMNE.add_reference_channels(loaded, ref_channels=pylist(["Cz"]), copy=false) +# loaded.set_eeg_reference(ref_channels=pylist(["RM", "LM"])) # # # Filter data -# raw.filter(l_freq, h_freq, picks="eeg") +# loaded.filter(l_freq, h_freq, picks="eeg") # -# return UnfoldBIDS.pyconvert(Array, raw.get_data(picks=UnfoldBIDS.pylist(channels), units="uV")) +# return pyconvert(Array, loaded.get_data(picks=pylist(channels), units="uV")) # end # ``` # diff --git a/docs/literate/HowTo/group_average.jl b/docs/literate/HowTo/group_average.jl index e430cfc..ace0b35 100644 --- a/docs/literate/HowTo/group_average.jl +++ b/docs/literate/HowTo/group_average.jl @@ -1,16 +1,15 @@ # # Calculate group averages -using UnfoldBIDS +using UnfoldBIDS, PyMNE using Unfold using DataFrames using Statistics using CairoMakie, AlgebraOfGraphics using LazyArtifacts -using Main: @artifact_str # this is a workaround for Artifacts used in docs; locally you would `using LazyArtifacts` # ## Analysis # First let's redo the steps from the quickstart tutorial -sample_data_path = artifact"sample_BIDS" +sample_data_path = UnfoldBIDS.erp_core_example(); layout_df = bids_layout(sample_data_path, derivatives=false); data_df = load_bids_eeg_data(layout_df); diff --git a/docs/literate/tutorials/quickstart.jl b/docs/literate/tutorials/quickstart.jl index c77e1b7..250b521 100644 --- a/docs/literate/tutorials/quickstart.jl +++ b/docs/literate/tutorials/quickstart.jl @@ -1,16 +1,15 @@ # # Quickstart -using UnfoldBIDS +using UnfoldBIDS, PyMNE # PyMNE is needed because loading ins now an extension using Unfold using LazyArtifacts -using Main: @artifact_str # this is a workaround for Artifacts used in docs; locally you would `using LazyArtifacts` # ## Loading data # To load use UnfoldBIDS to find the paths to all subject specific data you can uye the bidsLayout function: # -sample_data_path = artifact"sample_BIDS" +sample_data_path = UnfoldBIDS.erp_core_example() layout_df = bids_layout(sample_data_path, derivatives=false) # This will give you a DataFrame containing the paths too the eeg files of all subjects plus their accompanying event files diff --git a/docs/src/index.md b/docs/src/index.md index 56e13f2..cfeecf4 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -17,7 +17,7 @@ If you need more information on BIDS, a quick overview and further reading can b ## Key features & usage - Find paths to all subject data within a BIDS dataset with one function 🔎 -- Apply MNE-preprocessing ⚒️ +- Apply MNE-preprocessing directly before the fit ⚒️ - Apply Unfold.jl style analysis on all subjects in one go ⚡ - Made using DataFrames.jl, ready for second level analysis 🏁 diff --git a/ext/MNEext.jl b/ext/MNEext.jl new file mode 100644 index 0000000..8b7cd8f --- /dev/null +++ b/ext/MNEext.jl @@ -0,0 +1,152 @@ +module MNEext +using UnfoldBIDS +using Unfold +using PyMNE +using DataFrames, DataFramesMeta +using ProgressBars + +""" + _load_bids_eeg_data(layout_df; verbose::Bool=true, kwargs...) + +Internal function to load BIDS EEG data given a bidsLayout DataFrame. + +- `verbose::Bool = true`\\ + Show ProgressBar +- `kwargs...`\\ + kwargs for CSV.read to load events from .tsv file; e.g. to specify delimeter +""" +function _load_bids_eeg_data(layout_df; verbose::Bool=true, kwargs...) + + # Initialize an empty dataframe + eeg_df = DataFrame() + + pbar = ProgressBar(total=size(layout_df, 1)) + + # Loop through each EEG data file + for row in eachrow(layout_df) + file_path = row.file + if verbose + update(pbar) + #@printf("Loading subject %s at:\n %s \n",row.subject, file_path) + end + + # Read in the EEG data as a dataframe using the appropriate reader + if endswith(file_path, ".edf") + eeg_raw = PyMNE.io.read_raw_edf(file_path, verbose="ERROR") + elseif endswith(file_path, ".vhdr") + eeg_raw = PyMNE.io.read_raw_brainvision(file_path, verbose="ERROR") + elseif endswith(file_path, ".fif") + eeg_raw = PyMNE.io.read_raw_fif(file_path, verbose="ERROR") + elseif endswith(file_path, ".set") + eeg_raw = PyMNE.io.read_raw_eeglab(file_path, verbose="ERROR") + end + + tmp_df = DataFrame(subject=row.subject, ses=row.ses, task=row.task, run=row.run, raw=eeg_raw) + + append!(eeg_df, tmp_df) + end + + # try to add events + try + events = UnfoldBIDS.load_events(layout_df; kwargs...) + eeg_df[!, :events] = events.events + catch + @warn "Something went wrong while adding events to DataFrame. Needs manual intervention." + end + + # Return the combined EEG data dataframe + return eeg_df +end + + +""" +_run_unfold(data_df, bf_vec; + remove_time_expanded_Xs=true, + extract_data = raw_to_data, + verbose::Bool=true, + kwargs...) + +Internal function to run Unfold models on DataFrame with MNE Raw objects. Requires PyMNE.jl to be loaded. +""" + +function _run_unfold(data_df::DataFrame, bf_vec; remove_time_expanded_Xs=true, extract_data::Function = raw_to_data, verbose::Bool=true, kwargs...) + + # Init results dataframe + results_df = DataFrame() + + # Check kwargs + fit_keys = (:fit, :contrasts, :eventcolumn, :solver, :show_progress, :eventfields, :show_warnings) + fit_kwargs = (; (k => v for (k, v) in pairs(kwargs) if k ∈ fit_keys)...) + extract_data_kwargs = (; (k => v for (k, v) in pairs(kwargs) if k ∉ fit_keys)...) + + # Init progress bar + pbar = ProgressBar(total=size(data_df, 1)) + + for row in eachrow(data_df) + + if verbose + update(pbar) + #@printf("Loading subject %s \n",row.subject) + end + + tmp_events = row.events + + # Assert if first eventfield in events + # @assert String(eventfields[1]) ∈ names(tmp_events) "Eventfield $(eventfields[1]) not found in events DataFrame. This field is required to define event onsets. Please set the eventfields argument to the collumn that defines your event onsets (in samples)." + + tmp_data = extract_data(row.raw; extract_data_kwargs...) + + # Check for type of model to fit + tmp = last(bf_vec[1]) + + # Fit Overlap Corrected Model if BasisFunction is used + if supertype(typeof(tmp[2])) == Unfold.BasisFunction + m = fit(UnfoldModel, bf_vec, tmp_events, tmp_data; fit_kwargs...) + + # Fit Mass-Univariate Model if time window tuple is used + elseif typeof(tmp[2]) == Tuple{Real, Real} + @assert size(bf_vec, 1) == 1 && bf_vec[1][1] != Any "Currently only one event type is supported for Mass-Univariate models with UnfoldBIDS. Please change your bf_vec accordingly." + + # Get sfreq from raw + sfreq = pyconvert(Real, row.raw.info["sfreq"]) + + # Epoch data + evts = @rsubset(tmp_events, :event .== bf_vec[1][1]) + data_epochs, times = Unfold.epoch(data = tmp_data, tbl = evts, τ = tmp[2], sfreq = sfreq); + + # Fit Mass-Univariate Model + m = fit(UnfoldModel, tmp[1], data_epochs, times; fit_kwargs...) + + end + + if remove_time_expanded_Xs && (m isa UnfoldLinearModel || m isa UnfoldLinearModelContinuousTime) + #m = typeof(m)(m.design, Unfold.DesignMatrix(designmatrix(m).formulas, missing, designmatrix(m).events), m.modelfit) + m.designmatrix = [ + typeof(m.designmatrix[k])( + Unfold.formulas(m)[k], + Unfold.empty_modelmatrix(designmatrix(m)[k]), + Unfold.events(m)[k], + ) for k = 1:length(m.designmatrix) + ] + end + + results = DataFrame(subject = row.subject, ses=row.ses, task=row.task, run=row.run, model = m) + + append!(results_df, results) + + + end + return results_df +end + +""" + raw_to_data(raw; channels::AbstractVector{<:Union{String,Integer}}=[]) + + +Function to get data from MNE raw object. Can choose specific channels; default loads all channels. +""" +function raw_to_data(raw; channels::AbstractVector{<:Union{String,Integer}}=["all"]) + return pyconvert(Array, raw.get_data(picks=pylist(channels), units="uV")) +end + +end # module MNEext \ No newline at end of file diff --git a/src/UnfoldBIDS.jl b/src/UnfoldBIDS.jl index 531dd46..d0fda31 100644 --- a/src/UnfoldBIDS.jl +++ b/src/UnfoldBIDS.jl @@ -5,7 +5,7 @@ using StatsModels, DataFrames, DataFramesMeta, Statistics, Printf using ProgressBars, Continuables using LazyArtifacts # file loading -using PyMNE, CSV +using CSV # unfold using Unfold @@ -30,4 +30,6 @@ export save_results, load_results import StatsModels.FormulaTerm # for exporting export FormulaTerm + + end diff --git a/src/load.jl b/src/load.jl index 6850621..012d692 100644 --- a/src/load.jl +++ b/src/load.jl @@ -154,45 +154,15 @@ Load data found with [`bids_layout`](@ref) into memory. """ function load_bids_eeg_data(layout_df; verbose::Bool=true, kwargs...) - # Initialize an empty dataframe - eeg_df = DataFrame() - - pbar = ProgressBar(total=size(layout_df, 1)) - - # Loop through each EEG data file - for row in eachrow(layout_df) - file_path = row.file - if verbose - update(pbar) - #@printf("Loading subject %s at:\n %s \n",row.subject, file_path) - end - - # Read in the EEG data as a dataframe using the appropriate reader - if endswith(file_path, ".edf") - eeg_raw = PyMNE.io.read_raw_edf(file_path, verbose="ERROR") - elseif endswith(file_path, ".vhdr") - eeg_raw = PyMNE.io.read_raw_brainvision(file_path, verbose="ERROR") - elseif endswith(file_path, ".fif") - eeg_raw = PyMNE.io.read_raw_fif(file_path, verbose="ERROR") - elseif endswith(file_path, ".set") - eeg_raw = PyMNE.io.read_raw_eeglab(file_path, verbose="ERROR") - end - - tmp_df = DataFrame(subject=row.subject, ses=row.ses, task=row.task, run=row.run, raw=eeg_raw) - - append!(eeg_df, tmp_df) - end - - # try to add events - try - events = UnfoldBIDS.load_events(layout_df; kwargs...) - eeg_df[!, :events] = events.events - catch - @warn "Something went wrong while adding events to DataFrame. Needs manual intervention." - end - - # Return the combined EEG data dataframe - return eeg_df + ext_mne = Base.get_extension(@__MODULE__, :MNEext) + if !isnothing(ext_mne) + eeg_df = ext_mne._load_bids_eeg_data(layout_df; verbose=verbose, kwargs...) + else + error("PyMNE is needed to handle MNE Raw objects. Please make sure to load PyMNE.jl explicitly. Use ]add PyMNE.jl and using PyMNE to install/ load it.") + end + + # Return the combined EEG data dataframe + return eeg_df end #----------------------------------------------------------------------------------------------- diff --git a/src/utils.jl b/src/utils.jl index eebd670..a59a2c7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,19 +1,23 @@ """ - run_unfold(dataDF, bfDict; - eventcolumn="event", + run_unfold(data_df, bf_vec; remove_time_expanded_Xs=true, extract_data = raw_to_data, verbose::Bool=true, kwargs...) -Run Unfold analysis on all data in dataDF. +Run Unfold analysis on all data in data_df. Needs to have PyMNE.jl loaded! + +## Arguments +- `data_df::DataFrame`\\ + DataFrame containing BIDS data as returned by `load_bids_data()`. Must contain collumns: :subject, :ses, :task, :run, :raw, :events +- `bf_vec`\\ + Basis function vector as expected by Unfold.jl's `fit()` function.\\ + Can be one of: \\ + - `["eventname" => (formula, basisfunction)]` for overlap corrected models + - `["eventname" => (formula, timewindow)]` for mass-univariate models, where indicates (start, stop) in seconds with `typeof(timewindow) = Tuple{Real, Real}` ## Keywords -- `eventcolumn::String = "event"`\\ - Which collumn Unfold should use during the analysis. -- `eventfields::Array=[:sample]`\\ - (optional, default `[:latency]`) Array of symbols, representing column names in `tbl`, which are passed to basisfunction event-wise. First field of array always defines eventonset in samples. - `remove_time_expanded_Xs::Bool = true`\\ Removes the timeexpanded designmatrix which significantly reduces the memory-consumption. This Xs is rarely needed, but can be recovered (look into the Unfold.load function) - `extract_data::function = raw_to_data`\\ @@ -21,61 +25,17 @@ Run Unfold analysis on all data in dataDF. - `verbose::Bool = true)`\\ Show ProgressBar or not. - `kwargs...`\\ - Will be passed to `extract_data` as function inputs. -""" -function run_unfold(dataDF, bfDict; eventcolumn="event", eventfields=[:sample],remove_time_expanded_Xs=true, extract_data::Function = raw_to_data, verbose::Bool=true, kwargs...) - - resultsDF = DataFrame() - - pbar = ProgressBar(total=size(dataDF, 1)) - - for row in eachrow(dataDF) - - if verbose - update(pbar) - #@printf("Loading subject %s \n",row.subject) - end - - tmpEvents = row.events - - # Assert if first eventfield in events - @assert String(eventfields[1]) ∈ names(tmpEvents) "Eventfield $(eventfields[1]) not found in events DataFrame. This field is required to define event onsets. Please set the eventfields argument to the collumn that defines your event onsets (in samples)." - - tmpData = extract_data(row.raw; kwargs...) - - - # Fit Model - m = fit(UnfoldModel, bfDict, tmpEvents, tmpData; eventcolumn=eventcolumn, eventfields=eventfields) - - - if remove_time_expanded_Xs && (m isa UnfoldLinearModel || m isa UnfoldLinearModelContinuousTime) - #m = typeof(m)(m.design, Unfold.DesignMatrix(designmatrix(m).formulas, missing, designmatrix(m).events), m.modelfit) - m.designmatrix = [ - typeof(m.designmatrix[k])( - Unfold.formulas(m)[k], - Unfold.empty_modelmatrix(designmatrix(m)[k]), - Unfold.events(m)[k], - ) for k = 1:length(m.designmatrix) - ] - end - - results = DataFrame(subject = row.subject, ses=row.ses, task=row.task, run=row.run, model = m) - - append!(resultsDF, results) - - - end - return resultsDF -end - -""" - raw_to_data(raw; channels::AbstractVector{<:Union{String,Integer}}=[]) - - -Function to get data from MNE raw object. Can choose specific channels; default loads all channels. -""" -function raw_to_data(raw; channels::AbstractVector{<:Union{String,Integer}}=["all"]) - return pyconvert(Array, raw.get_data(picks=pylist(channels), units="uV")) + Will be passed to *both* the `fit()` and `extract_data()` calls as function inputs.\\ + For possible kwargs to `fit()` please have a look at the Unfold.jl API: https://unfoldtoolbox.github.io/UnfoldDocs/Unfold.jl/stable/references/functions/ +""" +function run_unfold(args...; kwargs...) + ext_mne = Base.get_extension(@__MODULE__, :MNEext) + if !isnothing(ext_mne) + results_df = ext_mne._run_unfold(args...; kwargs...) + else + error("PyMNE is needed to handle MNE Raw objects. Please make sure to load PyMNE.jl explicitly. Use ]add PyMNE.jl and using PyMNE to install/ load it.") + end + return results_df end """ diff --git a/test/Project.toml b/test/Project.toml index 9a55a9c..62ecc4d 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,5 +2,6 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +PyMNE = "6c5003b2-cbe8-491c-a0d1-70088e6a0fd6" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Unfold = "181c99d8-e21b-4ff3-b70b-c233eddec679" diff --git a/test/runtests.jl b/test/runtests.jl index 4bed579..bc91693 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using Unfold, UnfoldBIDS +using Unfold, UnfoldBIDS, PyMNE using LazyArtifacts using Test using DataFrames