Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,17 @@ data_folder.zip/
├── <uv-vis reference folder>/
│ └── reference.txt
└── <echem_folder>/
└── <echem_file>
├── <echem_file_1>
├── <echem_file_2>
└── ...
```

where the `<uv-vis folder>` contains the UV-Vis scans (named as `*_<n>.txt`, which will be used for sorting), the `<uv-vis reference folder>` contains the reference spectrum, and the `<echem_folder>` contains a electrochemical data file that is loadable by `navani`.
where the `<uv-vis folder>` contains the UV-Vis scans (named as `*_<n>.txt`, which will be used for sorting), the `<uv-vis reference folder>` contains the reference spectrum, and the `<echem_folder>` contains one or more electrochemical data files loadable by [navani](https://github.com/be-smith/navani). Note if multiple files are present then navani will attempt to stitch them together, so do not upload duplicates (e.g processed and unprocessed versions of the same file).

#### XRD

There are two XRD plotting modes, against Temperature and against Electrochemistry, both pathways require a folder containing the xrd data and the log data as shown below, with the electrochemistry pathway requiring a third directory for the electrochemistry data.

```
data_folder.zip/
├── <xrd folder>/
Expand All @@ -127,18 +131,35 @@ data_folder.zip/
│ └── <N>-scan.dat
└── <log folder>/
└── time_series_log.csv
└── <echem folder>/ (optional)
├── <echem_file_1>
├── <echem_file_2>
└── ...

```

where the `<xrd folder>` contains the XRD scans (named as `0000-scan.dat`, `0001-scan.dat`, etc.), and the `<log folder>` contains a time series log file in CSV format, mapping scan number to the time series data, e.g.,
where the `<xrd folder>` contains the XRD scans (named as `0000-scan.dat`, `0001-scan.dat`, etc.), and the `<log folder>` contains a time series log file in CSV format, mapping scan number to the time series data, e.g. for the Temperature mode:

```csv
scan_number,temperature
scan_number,Temp
0000, 25.0
0001, 30.0
0002, 35.0
0003, 40.0
```

And for the electrochemistry mode:

```csv
scan_number,start_time,end_time
0000, 25.0, 2025-07-02 19:05:59.614000, 2025-07-02 19:06:56.167000
0001, 30.0, 2025-07-02 19:15:03.582000, 2025-07-02 19:15:59.280000
0002, 35.0, 2025-07-02 19:24:07.586000, 2025-07-02 19:25:04.074000
0003, 40.0, 2025-07-02 19:33:12.198000, 2025-07-02 19:34:08.917000
```

The `<echem folder>` contains one or more electrochemistry files readable by [navani](https://github.com/be-smith/navani), Note if multiple files are present then navani will attempt to stitch them together, so do not upload duplicates (e.g processed and unprocessed versions of the same file). For the XRD usecase it's currently expected that the echem file has a `Timestamp` column containing date-time information for matching to the log file (e.g Neware files).

## License

This project is released under the conditions of the MIT license. Please see [LICENSE](https://github.com/datalab-org/datalab-app-plugin-insitu/blob/main/LICENSE) for the full text of the license.
Expand Down
25 changes: 11 additions & 14 deletions src/datalab_app_plugin_insitu/apps/uvvis/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ def process_and_store_data(self, file_path: str | Path):
"Scan time is required for processing UV-Vis data. Should include the time between scans in seconds."
)
file_path = Path(file_path)
folders = self.get_available_folders(file_path)
self.data["available_folders"] = folders

if not self.data.get("uvvis_folder_name"):
raise ValueError("UV-Vis folder name is required")
Expand Down Expand Up @@ -158,9 +156,7 @@ def process_and_store_data(self, file_path: str | Path):

return data

def generate_insitu_uvvis_plot(
self, file_path: str | Path | None = None, link_plots: bool = False
):
def generate_insitu_uvvis_plot(self, file_path: Path | None = None, link_plots: bool = False):
"""Generate combined UVVis and electrochemical plots using the operando-style layout.

This method coordinates the creation of various plot components and combines
Expand All @@ -171,10 +167,9 @@ def generate_insitu_uvvis_plot(
rather than looking up in the database for attached files.

"""

if not file_path:
if "file_id" not in self.data:
raise ValueError("No file set in the DataBlock")
return
try:
from pydatalab.file_utils import get_file_info_by_id

Expand All @@ -191,14 +186,16 @@ def generate_insitu_uvvis_plot(
f"Unsupported file extension (must be one of {self.accepted_file_extensions})"
)

data = self.process_and_store_data(file_path)
folders = self.get_available_folders(file_path)
self.data["available_folders"] = folders

required_folders = ["uvvis_folder_name", "echem_folder_name", "uvvis_reference_folder_name"]

if (
self.data.get("uvvis_folder_name") is None
or self.data.get("echem_folder_name") is None
or self.data.get("uvvis_reference_folder_name") is None
):
raise ValueError("UV-Vis and Echem folder names must be set in the DataBlock")
for folder in required_folders:
if not self.data.get(folder):
return

data = self.process_and_store_data(file_path)

plot_data = prepare_uvvis_plot_data(
data["intensity_matrix"],
Expand Down
108 changes: 74 additions & 34 deletions src/datalab_app_plugin_insitu/apps/xrd/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@


class XRDInsituBlock(GenericInSituBlock):
"""This datablock processes in situ XRD data from an input .zip file containing two specific directories:
"""This datablock processes in-situ XRD data from an input .zip file containing two or three specific directories:

- XRD data directory: Contains multiple XRD patterns (.xy, or otherwise) measured at different times,
- Time series directory: Contains echem data (.txt) or temperature data files (.csv).
- XRD data directory: Contains multiple XRD patterns (.xy, or otherwise based on datalab XRDBlock) measured at different times,
- Time series directory: Contains the log data (.csv) with temperature or other time of measurement metadata,
- (Optional) Electrochemical data directory: Contains electrochemical data with voltage vs time data.

"""

Expand All @@ -27,21 +28,44 @@ class XRDInsituBlock(GenericInSituBlock):
available_folders: List[str] = []
xrd_folder_name = None
time_series_folder_name = None
echem_folder_name = None
folder_name = None
plotting_label_dict = {
"x_axis_label": "2Θ (°)",
"time_series_y_axis_label": "Experiment number",
"line_y_axis_label": "Intensity",
"time_series_x_axis_label": "Temperature (°C)",
"label_source": {
"label_template": "File # {file_num}, Exp. # {exp_num}, @ {temperature} °C",
"label_field_map": {
"exp_num": "exp_num",
"temperature": "Temperature",
"file_num": "file_num",
},
},
}

@property
def plotting_label_dict(self):
if self.data["time_series_source"] == "log":
return {
"x_axis_label": "2θ (°)",
"time_series_y_axis_label": "Experiment number",
"line_y_axis_label": "Intensity",
"time_series_x_axis_label": "Temperature (°C)",
"label_source": {
"label_template": "File # {file_num}, Exp. # {exp_num}, @ {temperature} °C",
"label_field_map": {
"exp_num": "exp_num",
"temperature": "Temperature",
"file_num": "file_num",
},
},
}
elif self.data["time_series_source"] == "echem":
return {
"x_axis_label": "2θ (°)",
"time_series_y_axis_label": "Time (s)",
"line_y_axis_label": "Intensity",
"time_series_x_axis_label": "Voltage (V)",
"label_source": {
"label_template": "File # {scan_number}, Exp. # {exp_num}, t = {time} s, V = {voltage} V",
"label_field_map": {
"exp_num": "exp_num",
"time": "time",
"voltage": "voltage",
"scan_number": "scan_number",
},
},
}
else:
raise ValueError(f"Unknown time_series_source: {self.data['time_series_source']}")

defaults = {
"start_exp": 1,
Expand All @@ -58,13 +82,9 @@ def _plot_function(self, file_path=None, link_plots=True):

def process_and_store_data(self, file_path: str | Path):
"""
Process all in situ XRD and electrochemical data and store results.
This method is a wrapper for processing both XRD and electrochemical data.
Process all in situ XRD, log and (optional) electrochemical data and store results.
This method is a wrapper for processing both XRD and electrochemical data and the log data.
"""
file_path = Path(file_path)
folders = self.get_available_folders(file_path)
self.data["available_folders"] = folders

xrd_folder_name = Path(self.data.get("xrd_folder_name"))
if not xrd_folder_name:
raise ValueError("XRD folder name is required")
Expand All @@ -75,15 +95,15 @@ def process_and_store_data(self, file_path: str | Path):

start_exp = int(self.data.get("start_exp", self.defaults["start_exp"]))
exclude_exp = self.data.get("exclude_exp", self.defaults["exclude_exp"])

try:
data = process_local_xrd_data(
file_path=file_path,
xrd_folder_name=xrd_folder_name,
log_folder_name=time_series_folder_name,
start_exp=start_exp,
exclude_exp=exclude_exp,
# Needs to be made more generic
time_series_source=self.data["time_series_source"],
echem_folder_name=self.data.get("echem_folder_name"),
)

num_samples, data_length = data["2D_data"].shape
Expand Down Expand Up @@ -112,7 +132,7 @@ def process_and_store_data(self, file_path: str | Path):
data["2D_data"],
sample_granularity=sample_granularity,
data_granularity=data_granularity,
method="linear",
method="max_pooling",
)

# Spectrai intensities is what the line plot uses - therefore keep every sample but reduce data length
Expand All @@ -121,17 +141,18 @@ def process_and_store_data(self, file_path: str | Path):
data["2D_data"],
sample_granularity=1,
data_granularity=data_granularity,
method="linear",
method="max_pooling",
)

# X values for the heatmap and the line plot
# Linear as these are coordinates not intensities
data["Two theta"] = self.subsample_data(
data["Two theta"],
data_granularity=data_granularity,
sample_granularity=1,
method="linear",
)

# Linear as these are coordinates not intensities
data["file_num_index"] = self.subsample_data(
data["file_num_index"],
sample_granularity=sample_granularity,
Expand All @@ -146,23 +167,25 @@ def process_and_store_data(self, file_path: str | Path):

return data

def generate_insitu_xrd_plot(
self, file_path: str | Path | None = None, link_plots: bool = False
):
"""Generate combined XRD and electrochemical plots using the operando-style layout.
def generate_insitu_xrd_plot(self, file_path: Path | None = None, link_plots: bool = False):
"""Generate combined XRD and electrochemical or Temperature plots using the operando-style layout.

This method coordinates the creation of various plot components and combines
them into a unified visualization.

Parameters:
file_path: Path to the zip file containing XRD and electrochemical data,
rather than looking up in the database for attached files.
link_plots: Boolean to indicate if the plots should be linked using bokeh js scripts.

"""

if self.data.get("time_series_source") not in ("log", "echem"):
raise ValueError(
"time_series_source must be set to either 'log' or 'echem' in the datablock data"
)
if not file_path:
if "file_id" not in self.data:
raise ValueError("No file set in the DataBlock")
return
try:
from pydatalab.file_utils import get_file_info_by_id
except ImportError:
Expand All @@ -178,6 +201,22 @@ def generate_insitu_xrd_plot(
f"Unsupported file extension (must be one of {self.accepted_file_extensions})"
)

folders = self.get_available_folders(file_path)
self.data["available_folders"] = folders

if self.data.get("time_series_source") == "log":
required_folders = ["xrd_folder_name", "time_series_folder_name"]
elif self.data.get("time_series_source") == "echem":
required_folders = ["xrd_folder_name", "time_series_folder_name", "echem_folder_name"]
else:
raise ValueError(
"time_series_source must be set to either 'log' or 'echem' in the datablock data"
)

for folder in required_folders:
if not self.data.get(folder):
return

data = self.process_and_store_data(file_path)

plot_data = prepare_xrd_plot_data(
Expand All @@ -191,6 +230,7 @@ def generate_insitu_xrd_plot(
"sample_granularity", self.defaults["sample_granularity"]
),
index_df=data["index_df"],
time_series_source=self.data["time_series_source"],
)

gp = create_linked_insitu_plots(
Expand Down
Loading
Loading