NeuralEnsemble · sbuergers · Apr 23, 2021 · Apr 23, 2021 · Apr 23, 2021 · Apr 23, 2021
diff --git a/neo/rawio/axonarawio.py b/neo/rawio/axonarawio.py
@@ -9,21 +9,27 @@
  data.bin - raw data file
 
 There are many other data formats from Axona, which we do not consider (yet).
-These are derivative from the raw continuous data (.bin) and could in principle
-be extracted from it (see file format overview for details).
+These are derivatives from the raw continuous data (.bin) and could in
+principle be extracted from it (see file format overview for details).
+
+Every recording contains extracellular electrophysiology (ecephy) data in
+stream 0. When available video tracking (pos) data is in stream 1. Currently
+two-spot tracking mode is assumed and implemented (not four-spot mode).
 
 Author: Steffen Buergers
 
 """
 
-from .baserawio import (BaseRawIO, _signal_channel_dtype, _signal_stream_dtype,
-                        _spike_channel_dtype, _event_channel_dtype,
-                        _common_sig_characteristics)
+from neo.rawio.baserawio import (
+    BaseRawIO, _signal_channel_dtype, _signal_stream_dtype,
+    _spike_channel_dtype, _event_channel_dtype
+)
 import numpy as np
 import os
 import re
-import contextlib
 import datetime
+import contextlib
+import mmap
 
 
 class AxonaRawIO(BaseRawIO):
@@ -35,21 +41,29 @@ class AxonaRawIO(BaseRawIO):
     .set file about the recording setup (see the above manual for details).
 
     Usage:
-        import neo.rawio
+        import neo
+
         r = neo.rawio.AxonaRawIO(
-            filename=os.path.join(dir_name, base_filename)
+            filename=os.path.join(dir_name, set_filename)
         )
         r.parse_header()
-        print(r)
-        raw_chunk = r.get_analogsignal_chunk(block_index=0, seg_index=0,
-                      i_start=0, i_stop=1024,  channel_names=channel_names)
-        float_chunk = reader.rescale_signal_raw_to_float(
-            raw_chunk, dtype='float64',
-            channel_indexes=[0, 3, 6]
+        print('Header information:\n', r)
+
+        raw_chunk = r.get_analogsignal_chunk(
+            block_index=0, seg_index=0, i_start=0, i_stop=5,
+            stream_index=0, channel_indexes=[0, 3, 6]
         )
+        print('Raw acquisition traces:\n', raw_chunk)
+
+        float_chunk = r.rescale_signal_raw_to_float(
+            raw_chunk, dtype=np.float64,
+            channel_indexes=[0, 3, 6],
+            stream_index=0
+        )
+        print('\nRaw acquisition traces in uV:\n', float_chunk)
     """
 
-    extensions = ['bin']  # Never used?
+    extensions = ['bin']
     rawmode = 'one-file'
 
     # In the .bin file, channels are arranged in a strange order.
@@ -81,37 +95,52 @@ def _parse_header(self):
         to raw data (.bin file) and prepare header dictionary in neo format.
         '''
 
-        # Get useful parameters from .set file
+        # Ecephys
         params = ['rawRate']
         params = self.get_set_file_parameters(params)
 
-        # Useful num. bytes per continuous data packet (.bin file)
         self.bytes_packet = 432
         self.bytes_data = 384
         self.bytes_head = 32
         self.bytes_tail = 16
 
-        # All ephys data has this data type
         self.data_dtype = np.int16
 
-        # There is no global header for .bin files
         self.global_header_size = 0
 
-        # Generally useful information
-        self.sr = int(params['rawRate'])
-        self.num_channels = len(self.get_active_tetrode()) * 4
+        self.sr_ecephys = int(params['rawRate'])
+        self.num_channels_ecephys = len(self.get_active_tetrode()) * 4
 
-        # How many 432byte packets does this data contain (<=> num. samples/3)?
         self.num_total_packets = int(os.path.getsize(self.bin_file)
                                      / self.bytes_packet)
-        self.num_total_samples = self.num_total_packets * 3
+        self.num_ecephys_samples = self.num_total_packets * 3
+        self.dur_ecephys = self.num_ecephys_samples / self.sr_ecephys
 
-        # Create np.memmap to .bin file
         self._raw_signals = np.memmap(
             self.bin_file, dtype=self.data_dtype, mode='r',
             offset=self.global_header_size
         )
 
+        # Pos (video tracked animal position)
+        # NOTE: In the file format Manual there are two recording modes
+        # for video tracking: Four-spot mode and two-spot mode. Here we
+        # only consider two-spot mode!
+        self.sr_pos = 100  # ideal SR, empirical SR might differ
+        with open(self.bin_file, 'rb') as f:
+            with contextlib.closing(
+                mmap.mmap(f.fileno(), self.sr_ecephys // 3 // self.sr_pos
+                          * self.bytes_packet, access=mmap.ACCESS_READ)
+            ) as mmap_obj:
+                self.contains_pos_tracking = mmap_obj.find(b'ADU2') > -1
+
+        if self.contains_pos_tracking:
+            self.num_channels_pos = 8
+            self.dur_pos = self.dur_ecephys
+            self.num_pos_samples = int(self.dur_pos * self.sr_pos)
+
+            fbin = open(self.bin_file, 'rb')
+            self.mmpos = mmap.mmap(fbin.fileno(), 0, access=mmap.ACCESS_READ)
+
         # Header dict
         self.header = {}
         self.header['nb_block'] = 1
@@ -134,26 +163,54 @@ def _parse_header(self):
             [tetr for tetr in self.get_active_tetrode() for _ in range(4)]
 
     def _get_signal_streams_header(self):
-        # create signals stream information (we always expect a single stream)
+        if self.contains_pos_tracking:
+            return np.array([('stream 0', '0'), ('stream 1', '1')],
+                            dtype=_signal_stream_dtype)
         return np.array([('stream 0', '0')], dtype=_signal_stream_dtype)
 
     def _segment_t_start(self, block_index, seg_index):
         return 0.
 
     def _segment_t_stop(self, block_index, seg_index):
-        return self.num_total_samples / self.sr
+        return self.num_ecephys_samples / self.sr_ecephys
 
-    def _get_signal_size(self, block_index, seg_index, channel_indexes=None):
-        return self.num_total_samples
+    def _get_signal_size(self, block_index, seg_index, stream_index):
+        if stream_index == 0:
+            return self.num_ecephys_samples
+        elif (stream_index == 1) and (self.contains_pos_tracking):
+            return self.num_pos_samples
+        return None
 
     def _get_signal_t_start(self, block_index, seg_index, stream_index):
         return 0.
 
     def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop,
                                 stream_index, channel_indexes):
         """
+        Return raw (continuous) signals as 2d numpy array for ecephys
+        data (stream 0; time x chan) or video tracking (stream 1; time x 9)
+        """
+        if stream_index == 1:
+            if self.contains_pos_tracking:
+                raw_signals = self._get_pos_analogsignal_chunk(
+                    i_start, i_stop, channel_indexes
+                )
+
+        elif stream_index == 0:
+            raw_signals = self._get_ecephys_analogsignal_chunk(
+                i_start, i_stop, channel_indexes
+            )
+        return raw_signals
+
+    # ------------------ HELPER METHODS --------------------
+    # These are credited largely to Geoff Barrett from the Hussaini lab:
+    # https://github.com/GeoffBarrett/BinConverter
+    # Adapted or modified by Steffen Buergers
+
+    def _get_ecephys_analogsignal_chunk(self, i_start, i_stop,
+                                        channel_indexes):
+        """
         Return raw (continuous) signals as 2d numpy array (time x chan).
-        Note that block_index and seg_index are always 1 (regardless of input).
 
         Raw data is in a single vector np.memmap with the following structure:
 
@@ -168,13 +225,12 @@ def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop,
         sample 3: 32b (head) + 128*2 (all channels 1st and 2nd entry) + ...
         """
 
-        # Set default values
         if i_start is None:
             i_start = 0
         if i_stop is None:
-            i_stop = self.num_total_samples
+            i_stop = self.num_ecephys_samples
         if channel_indexes is None:
-            channel_indexes = [i for i in range(self.num_channels)]
+            channel_indexes = [i for i in range(self.num_channels_ecephys)]
 
         num_samples = (i_stop - i_start)
 
@@ -207,10 +263,43 @@ def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop,
 
         return raw_signals
 
-    # ------------------ HELPER METHODS --------------------
-    # These are credited largely to Geoff Barrett from the Hussaini lab:
-    # https://github.com/GeoffBarrett/BinConverter
-    # Adapted or modified by Steffen Buergers
+    def _get_pos_analogsignal_chunk(self, i_start=0, i_stop=None,
+                                    channel_indexes=None):
+        """
+        Return np.array of video position tracking (Nsamp x 8 columns),
+        with columns being the following:
+
+        timestamp, x1, y1, x2, y2, npix1, npix2, total_pix, unused
+
+        Position samples are saved in the header of packets with the
+        flag 'ADU2'.
+        """
+        if channel_indexes is None:
+            channel_indexes = [i for i in range(self.num_channels_pos)]
+
+        pos = np.array([]).astype(float)
+
+        flags = np.ndarray(
+            (self.num_total_packets,), 'S4',
+            self.mmpos, 0, self.bytes_packet
+        )
+        ADU2_idx = np.where(flags == b'ADU2')
+
+        pos = np.ndarray(
+            (self.num_total_packets,), (np.int16, (1, 8)), self.mmpos, 16,
+            (self.bytes_packet,)
+        ).reshape((-1, 8))[ADU2_idx][:]
+
+        pos = np.hstack(
+            (ADU2_idx[0].reshape((-1, 1)), pos)
+        ).astype(float)
+
+        # The timestamp from the recording is dubious, create our own
+        packets_per_ms = self.sr_ecephys / 3000
+        pos[:, 0] = pos[:, 0] / packets_per_ms
+        pos = np.delete(pos, 1, 1)
+
+        return pos[i_start:i_stop, channel_indexes]
 
     def get_set_file_parameters(self, params):
         """
@@ -253,8 +342,6 @@ def get_active_tetrode(self):
         with open(self.set_file, encoding=self.set_file_encoding) as f:
             for line in f:
 
-                # The pattern to look for is collectMask_X Y,
-                # where X is the tetrode number, and Y is 1 or 0
                 if 'collectMask_' in line:
                     tetrode_str, tetrode_status = line.split(' ')
                     if int(tetrode_status) == 1:
@@ -279,7 +366,8 @@ def read_datetime(self):
                 if line.startswith('trial_date'):
                     date_string = re.findall(r'\d+\s\w+\s\d{4}$', line)[0]
                 if line.startswith('trial_time'):
-                    time_string = line[len('trial_time') + 1::].replace('\n', '')
+                    time_string = line[len('trial_time') + 1::]\
+                        .replace('\n', '')
 
         return datetime.datetime.strptime(date_string + ', ' + time_string,
                                           "%d %b %Y, %H:%M:%S")
@@ -293,7 +381,7 @@ def _get_channel_gain(self):
 
         Formula for .eeg and .X files, presumably also .bin files:
 
-        1000*adc_fullscale_mv / (gain_ch*128)
+        1000 * adc_fullscale_mv / (gain_ch*128)
         """
         gain_list = []
 
@@ -314,14 +402,18 @@ def _get_signal_chan_header(self):
         that recorded data. Each tuple contains the following information:
 
         channel name (1a, 1b, 1c, 1d, 2a, 2b, ...; num=tetrode, letter=elec),
-        channel id (1, 2, 3, 4, 5, ... N),
-        sampling rate,
+        channel id (0, 1, 2, 3, 4, ... N),
+        sampling rate (48000),
         data type (int16),
         unit (uV),
         gain,
         offset,
-        stream id
+        stream id (0)
+
+        When available, video tracking data is appended after the ecephys data.
         """
+
+        # Ecephys data
         active_tetrode_set = self.get_active_tetrode()
         num_active_tetrode = len(active_tetrode_set)
 
@@ -330,19 +422,30 @@ def _get_signal_chan_header(self):
         dtype = self.data_dtype
         units = 'uV'
         gain_list = self._get_channel_gain()
-        offset = 0  # What is the offset?
+        offset = 0
 
         sig_channels = []
         for itetr in range(num_active_tetrode):
 
             for ielec in range(elec_per_tetrode):
 
                 cntr = (itetr * elec_per_tetrode) + ielec
-                ch_name = '{}{}'.format(itetr, letters[ielec])
-                chan_id = str(cntr + 1)
+                ch_name = '{}{}'.format(itetr + 1, letters[ielec])
+                chan_id = str(cntr)
                 gain = gain_list[cntr]
                 stream_id = '0'
-                sig_channels.append((ch_name, chan_id, self.sr, dtype,
+                sig_channels.append((ch_name, chan_id, self.sr_ecephys, dtype,
                                      units, gain, offset, stream_id))
 
+        # Append video tracking data (two-spot mode)
+        if self.contains_pos_tracking:
+
+            pos_chan_names = 't,x1,y1,x2,y2,numpix1,numpix2,unused'.split(',')
+            pos_units = ['', '', '', '', '', 'pix', 'pix', '']
+            for i, (name, unit) in enumerate(zip(pos_chan_names, pos_units)):
+                sig_channels.append(
+                    (name, str(i), self.sr_pos, np.float64,
+                     unit, 1, 0, '1')
+                )
+
         return np.array(sig_channels, dtype=_signal_channel_dtype)