Source code for know.tests.test_slabsIter

from know.base import SlabsIter
from py2store import wrap_kvs, filt_iter, FilesOfZip
import soundfile as sf
import io
from graze import Graze
from typing import Mapping, Callable, Any, Protocol, runtime_checkable
import pytest
import logging
import pandas as pd


[docs]@wrap_kvs(obj_of_data=lambda b: sf.read(io.BytesIO(b), dtype='int16')[0]) @wrap_kvs( key_of_id=lambda _id: _id[len('sounds/') :], id_of_key=lambda key: f'sounds/{key}' ) @filt_iter(filt=lambda x: x.endswith('.wav')) class WfStore(FilesOfZip): """Waveform access. Keys are .wav filenames and values are numpy arrays of int16 waveform.""" pass
WfStoreType = Mapping WfStoreFactory = Callable[[Any], WfStoreType]
[docs]@runtime_checkable class WfStoreFactoryGetter(Protocol): def __call__(self, *args, **kwargs) -> WfStoreFactory: """Returns a WfStoreFactory, that is, a callable that makes wf stores"""
get_wf_store_cls: WfStoreFactoryGetter # This "declares" the coming function's "type" def get_wf_store_cls( key_prefix_for_sounds: str = 'sounds/', audio_file_extension='.wav', dtype='int16', other_soundfile_kwargs=None, ): other_soundfile_kwargs = other_soundfile_kwargs or {} @wrap_kvs( key_of_id=lambda _id: _id[len(key_prefix_for_sounds) :], id_of_key=lambda key: f'{key_prefix_for_sounds}{key}', obj_of_data=lambda b: sf.read( io.BytesIO(b), dtype=dtype, **other_soundfile_kwargs )[0], ) @filt_iter(filt=lambda x: x.endswith(audio_file_extension)) class WfStore(FilesOfZip): """Waveform access. Keys are .wav filenames and values are numpy arrays of int16 waveform.""" pass return WfStore def data_for_url( url: str, get_wf_store_factory: WfStoreFactoryGetter = get_wf_store_cls, key_to_annots_csv='plc_0.csv', ): g = Graze()[url] z = FilesOfZip(g) annotations = pd.read_csv(io.BytesIO(z[key_to_annots_csv]), header=0) wf_store_factory = get_wf_store_factory() wf_store = wf_store_factory(io.BytesIO(g)) return annotations, wf_store test_1 = dict( url='https://www.dropbox.com/sh/by3gchoa7fvkuol/AABDyntOqfz1jMPKGBWlQ53na?dl=0', project_sref_name='filename', ) @pytest.mark.parametrize('test_params', [test_1,]) def test_slabsiter(test_params): # get the data from a dropbox url annotations, wf_store = data_for_url( test_params['url'], get_wf_store_factory=get_wf_store_cls, key_to_annots_csv='plc_0.csv', ) # Make all the iterators needed. They are aligned in the sense that each "next" yields aligned data wf_iter = (wf_store[store_key] for store_key in pd.unique(annotations['filename'])) def iter_per_file(col='channel'): for filename in pd.unique(annotations['filename']): df_filename = annotations[annotations['filename'] == filename] yield list(df_filename[col]) phase_iter = iter_per_file('phase') channel_iter = iter_per_file('channel') session_iter = iter_per_file('session') # Making a slabs iter object def make_a_slabs_iter(): # Mocking the sensor readers audio_sensor_read = wf_iter.__next__ channel_read = channel_iter.__next__ session_read = session_iter.__next__ phase_read = phase_iter.__next__ return SlabsIter( audio=audio_sensor_read, channel=channel_read, phase=phase_read, session=session_read, # The next check_single_channel=lambda channel: len(set(channel)) == 1, n_sessions=lambda session: len(set(session)), log_something=lambda phase: logging.info( f'N phases found {len(set(phase))}' ), ) si = make_a_slabs_iter() first = next(si) second = next(si) # check that the dictionaries we get all contain the correct fields expected_keys = { k: None for k in [ 'audio', 'channel', 'phase', 'session', 'check_single_channel', 'n_sessions', 'log_something', ] }.keys() assert first.keys() == second.keys() == expected_keys # check that each slab contains a single channel assert first['check_single_channel'] == second['check_single_channel'] == True # check that each slab contains 100 sessions assert first['n_sessions'] == second['n_sessions'] == 100