Source code for ms3.expand_dcml

""" This is the same code as in the corpora repo as copied on September 24, 2020
and then adapted.
"""
import sys, re
from collections import defaultdict

import pandas as pd

from .utils import abs2rel_key, changes2list, DCML_REGEX, rel2abs_key, resolve_relative_keys, \
    series_is_minor, split_alternatives, transform
from .transformations import compute_chord_tones, labels2global_tonic, transpose_chord_tones_by_localkey
from .logger import function_logger


################################################################################
# Constants
################################################################################



[docs]class SliceMaker(object): """ This class serves for storing slice notation such as ``:3`` as a variable or passing it as function argument. Examples -------- .. code-block:: python SM = SliceMaker() some_function( slice_this, SM[3:8] ) select_all = SM[:] df.loc[select_all] """ def __getitem__(self, item): return item
SM = SliceMaker()
[docs]@function_logger def expand_labels(df, column='label', regex=None, rename={}, dropna=False, propagate=True, volta_structure=None, relative_to_global=False, chord_tones=True, absolute=False, all_in_c=False, skip_checks=False, ): """ Split harmony labels complying with the DCML syntax into columns holding their various features and allows for additional computations and transformations. Uses: :py:func:`compute_chord_tones`, :py:func:`features2type`, :py:func:`~.utils.labels2global_tonic`, :py:func:`propagate_keys`, :py:func:`propagate_pedal`, :py:func:`replace_special`, :py:func:`~.utils.roman_numeral2fifths`, :py:func:`~.utils.split_alternatives`, :py:func:`split_labels`, :py:func:`~.utils.transform`, :py:func:`transpose` Parameters ---------- df : :obj:`pandas.DataFrame` Dataframe where one column contains DCML chord labels. column : :obj:`str` Name of the column that holds the harmony labels. regex : :obj:`re.Pattern` Compiled regular expression used to split the labels. It needs to have named groups. The group names are used as column names unless replaced by ``cols``. rename : :obj:`dict`, optional Dictionary to map the regex's group names to deviating column names of your choice. dropna : :obj:`bool`, optional Pass True if you want to drop rows where ``column`` is NaN/<NA> propagate: :obj:`bool`, optional By default, information about global and local keys and about pedal points is spread throughout the DataFrame. Pass False if you only want to split the labels into their features. This ignores all following parameters because their expansions depend on information about keys. volta_structure: :obj:`dict`, optional {first_mc -> {volta_number -> [mc1, mc2...]} } dictionary as you can get it from ``Score.mscx.volta_structure``. This allows for correct propagation into second and other voltas. relative_to_global : :obj:`bool`, optional Pass True if you want all labels expressed with respect to the global key. This levels and eliminates the features `localkey` and `relativeroot`. chord_tones : :obj:`bool`, optional Pass True if you want to add four columns that contain information about each label's chord, added, root, and bass tones. The pitches are expressed as intervals relative to the respective chord's local key or, if ``relative_to_global=True``, to the globalkey. The intervals are represented as integers that represent stacks of fifths over the tonic, such that 0 = tonic, 1 = dominant, -1 = subdominant, 2 = supertonic etc. absolute : :obj:`bool`, optional Pass True if you want to transpose the relative `chord_tones` to the global key, which makes them absolute so they can be expressed as actual note names. This implies prior conversion of the chord_tones (but not of the labels) to the global tonic. all_in_c : :obj:`bool`, optional Pass True to transpose `chord_tones` to C major/minor. This performs the same transposition of chord tones as `relative_to_global` but without transposing the labels, too. This option clashes with `absolute=True`. Returns ------- :obj:`pandas.DataFrame` Original DataFrame plus additional columns with split features. """ assert sum((absolute, all_in_c)) < 2, "Chord tones can be either 'absolute' or 'all_in_c', not both." assert df.index.nlevels, f"""df has a MultiIndex of {df.index.nlevels} levels, implying that it has information from several pieces. Apply expand_labels() to one piece at a time.""" df = df.copy() if regex is None: regex = DCML_REGEX ### If the index is not unique, it has to be temporarily replaced tmp_index = not df.index.is_unique if tmp_index: ix = df.index df.reset_index(drop=True, inplace=True) for col in ['numeral', 'form', 'figbass', 'localkey', 'globalkey', 'phraseend']: if not col in rename: rename[col] = col if not skip_checks: ### Check for too many immediate repetitions not_nan = df[column].dropna() immediate_repetitions = not_nan == not_nan.shift() k = immediate_repetitions.sum() if k > 0: if k / len(not_nan.index) > 0.1: logger.warning( "DataFrame has many direct repetitions of labels.") else: logger.debug(f"Immediate repetition of labels:\n{not_nan[immediate_repetitions]}") ### Do the actual expansion df = split_alternatives(df, column=column, logger=logger) df = split_labels(df, label_column=column, regex=regex, rename=rename, dropna=dropna, skip_checks=skip_checks, logger=logger) df['chord_type'] = transform(df, features2type, [rename[col] for col in ['numeral', 'form', 'figbass']], logger=logger) df = replace_special(df, regex=regex, merge=True, cols=rename, logger=logger) key_cols = {col: rename[col] for col in ['localkey', 'globalkey']} if propagate: try: df = propagate_keys(df, volta_structure=volta_structure, add_bool=True, **key_cols, logger=logger) except: logger.error(f"propagate_keys() failed with\n{sys.exc_info()[1]}", extra={"message_id": (12, )}) try: df = propagate_pedal(df, cols=rename, logger=logger) except: logger.error(f"propagate_pedal() failed with\n{sys.exc_info()[1]}", extra={"message_id": (13, )}) else: if chord_tones: logger.info("Chord tones cannot be calculated without propagating keys.") if relative_to_global: logger.info("Cannot transpose labels without propagating keys.") not_a_chord = df.chord.isna() if chord_tones: key_cols_gapless = {col: (df[col].notna() | not_a_chord).all() for col in key_cols.values()} if propagate or all(key_cols_gapless.values()): ct = compute_chord_tones(df, expand=True, cols=rename, logger=logger) df = values_into_df(df, ct) if relative_to_global or absolute or all_in_c: df = transpose_chord_tones_by_localkey(df, by_global=absolute) if relative_to_global: labels2global_tonic(df, inplace=True, cols=rename, logger=logger) if tmp_index: df.index = ix return df
[docs]@function_logger def extract_features_from_labels(S, regex=None): """Applies .str.extract(regex) on the Series and returns a DataFrame with all named capturing groups.""" if regex is None: regex = DCML_REGEX if regex.__class__ != re.compile('').__class__: regex = re.compile(regex, re.VERBOSE) features = list(regex.groupindex.keys()) extracted = S.str.extract(regex, expand=True) return extracted[features].copy()
[docs]@function_logger def split_labels(df, label_column='label', regex=None, rename={}, dropna=False, inplace=False, skip_checks=False, **kwargs): """ Split harmony labels complying with the DCML syntax into columns holding their various features. Parameters ---------- df : :obj:`pandas.DataFrame` Dataframe where one column contains DCML chord labels. label_column : :obj:`str` Name of the column that holds the harmony labels. regex : :obj:`re.Pattern` Compiled regular expression used to split the labels. It needs to have named groups. The group names are used as column names unless replaced by `cols`. rename : :obj:`dict` Dictionary to map the regex's group names to deviating column names. dropna : :obj:`bool`, optional Pass True if you want to drop rows where ``column`` is NaN/<NA> inplace : :obj:`bool`, optional Pass True if you want to mutate ``df``. """ if regex is None: regex = DCML_REGEX if regex.__class__ != re.compile('').__class__: regex = re.compile(regex, re.VERBOSE) if not inplace: df = df.copy() if df[label_column].isna().any(): if dropna: logger.debug(f"Removing NaN values from label column {label_column}...") df = df[df[label_column].notna()] else: logger.debug(f"{label_column} contains NaN values.") logger.debug(f"Applying RegEx to column {label_column}...") spl = extract_features_from_labels(df[label_column], regex=regex, logger=logger) if len(rename) > 0: spl.rename(columns=rename, inplace=True) df = values_into_df(df, spl) if not skip_checks: syntax_errors = spl.isna().all(axis=1) & df[label_column].notna() if syntax_errors.any(): logger.warning(f"The following labels do not match the regEx:\n{df.loc[syntax_errors, :label_column].to_string()}") if not inplace: return df
def values_into_df(df, new_values): features = list(new_values.columns) update_columns = [col for col in features if col in df.columns] new_columns = [col for col in features if col not in df.columns] if len(update_columns) > 0: df.loc[:, update_columns] = df[update_columns].fillna(new_values[update_columns]) if len(new_columns) > 0: df = pd.concat([df, new_values[new_columns]], axis=1) if len(update_columns) > 0: all_other_columns = [col for col in df.columns if col not in features] column_order = all_other_columns + features df = df[column_order].copy() return df
[docs]@function_logger def features2type(numeral, form=None, figbass=None): """ Turns a combination of the three chord features into a chord type. Returns ------- 'M': Major triad 'm': Minor triad 'o': Diminished triad '+': Augmented triad 'mm7': Minor seventh chord 'Mm7': Dominant seventh chord 'MM7': Major seventh chord 'mM7': Minor major seventh chord 'o7': Diminished seventh chord '%7': Half-diminished seventh chord '+7': Augmented (minor) seventh chord '+M7': Augmented major seventh chord """ if pd.isnull(numeral) or numeral in ['Fr', 'Ger', 'It']: return numeral form, figbass = tuple('' if pd.isnull(val) else val for val in (form, figbass)) # triads if figbass in ['', '6', '64']: if form in ['o', '+']: return form if form in ['%', 'M', '+M']: if figbass != '': logger.error(f"{form} is a seventh chord and cannot have figbass '{figbass}'") return None # else: go down, interpret as seventh chord else: return 'm' if numeral.islower() else 'M' # seventh chords if form in ['o', '%', '+', '+M']: return f"{form}7" triad = 'm' if numeral.islower() else 'M' seventh = 'M' if form == 'M' else 'm' return f"{triad}{seventh}7"
[docs]@function_logger def replace_special(df, regex, merge=False, inplace=False, cols={}, special_map={}): """ | Move special symbols in the `numeral` column to a separate column and replace them by the explicit chords they stand for. | In particular, this function replaces the symbols `It`, `Ger`, and `Fr`. Uses: :py:func:`merge_changes` Parameters ---------- df : :obj:`pandas.DataFrame` Dataframe containing DCML chord labels that have been split by split_labels(). regex : :obj:`re.Pattern` Compiled regular expression used to split the labels replacing the special symbols.It needs to have named groups. The group names are used as column names unless replaced by `cols`. merge : :obj:`bool`, optional False: By default, existing values, except `figbass`, are overwritten. True: Merge existing with new values (for `changes` and `relativeroot`). cols : :obj:`dict`, optional The special symbols appear in the column `numeral` and are moved to the column `special`. In case the column names for ``['numeral','form', 'figbass', 'changes', 'relativeroot', 'special']`` deviate, pass a dict, such as .. code-block:: python {'numeral': 'numeral_col_name', 'form': 'form_col_name 'figbass': 'figbass_col_name', 'changes': 'changes_col_name', 'relativeroot': 'relativeroot_col_name', 'special': 'special_col_name'} special_map : :obj:`dict`, optional In case you want to add or alter special symbols to be replaced, pass a replacement map, e.g. {'N': 'bII6'}. The column 'figbass' is only altered if it's None to allow for inversions of special chords. inplace : :obj:`bool`, optional Pass True if you want to mutate ``df``. """ if not inplace: df = df.copy() ### If the index is not unique, it has to be temporarily replaced tmp_index = not df.index.is_unique if tmp_index: ix = df.index df.reset_index(drop=True, inplace=True) special2label = { 'It': 'viio6(b3)/V', 'Ger': 'viio65(b3)/V', 'Fr': 'V43(b5)/V', } special2label.update(special_map) features = ['numeral', 'form', 'figbass', 'changes', 'relativeroot'] for col in features + ['special']: if not col in cols: cols[col] = col feature_cols = list(cols.values()) missing = [cols[f] for f in features if not cols[f] in df.columns] assert len( missing) == 0, f"These columns are missing from the DataFrame: {missing}. Either use split_labels() first or give correct `cols` parameter." select_all_special = df[df[cols['numeral']].isin(special2label.keys())].index logger.debug(f"Moving special symbols from {cols['numeral']} to {cols['special']}...") if not cols['special'] in df.columns: df.insert(df.columns.get_loc(cols['numeral']), cols['special'], pd.NA) df.loc[select_all_special, cols['special']] = df.loc[select_all_special, cols['numeral']] def repl_spec(frame, special, instead): """Check if the selected parts are empty and replace ``special`` by ``instead``.""" new_vals = re.match(regex, instead) if new_vals is None: logger.warning(f"{instead} is not a valid label which could replace {special}. Skipped.") return frame else: new_vals = new_vals.groupdict() for f in features: if new_vals[f] is not None: replace_this = SM[:] # by default, replace entire column if f == 'figbass': # only empty figbass is replaced, with the exception of `Ger6` and `Fr6` if special in ['Fr', 'Ger']: # For these symbols, a wrong `figbass` == 6 is accepted and replaced replace_this = (frame[cols['figbass']] == '6') | frame[cols['figbass']].isna() else: replace_this = frame[cols['figbass']].isna() elif f != 'numeral': # numerals always replaced completely not_empty = frame[cols[f]].notna() if not_empty.any(): if f in ['changes', 'relativeroot'] and merge: if f == 'changes': frame.loc[not_empty, cols[f]] = frame.loc[not_empty, cols[f]].apply(merge_changes, args=(new_vals[f],)) elif f == 'relativeroot': frame.loc[not_empty, cols[f]] = frame.loc[not_empty, cols[f]].apply( lambda x: f"{new_vals[f]}/{x}") logger.debug( f"While replacing {special}, the existing '{f}'-values have been merged with '{new_vals[f]}', resulting in :\n{frame.loc[not_empty, cols[f]]}") replace_this = ~not_empty else: logger.warning( f"While replacing {special}, the following existing '{f}'-values have been overwritten with {new_vals[f]}:\n{frame.loc[not_empty, cols[f]]}") frame.loc[replace_this, cols[f]] = new_vals[f] return frame for special, instead in special2label.items(): select_special = df[cols['special']] == special df.loc[select_special, feature_cols] = repl_spec(df.loc[select_special, feature_cols].copy(), instead=instead, special=special) if df[cols['special']].isna().all(): df.drop(columns=cols['special'], inplace=True) if tmp_index: df.index = ix if not inplace: return df
[docs]def merge_changes(left, right, *args): """ Merge two `changes` into one, e.g. `b3` and `+#7` to `+#7b3`. Uses: :py:func:`changes2list` """ all_changes = [changes2list(changes, sort=False) for changes in (left, right, *args)] res = sum(all_changes, []) res = sorted(res, key=lambda x: int(x[3]), reverse=True) return ''.join(e[0] for e in res)
[docs]@function_logger def propagate_keys(df, volta_structure=None, globalkey='globalkey', localkey='localkey', add_bool=True): """ | Propagate information about global keys and local keys throughout the dataframe. | Pass split harmonies for one piece at a time. For concatenated pieces, use apply(). Uses: :py:func:`series_is_minor` Parameters ---------- df : :obj:`pandas.DataFrame` Dataframe containing DCML chord labels that have been split by split_labels(). volta_structure: :obj:`dict`, optional {first_mc -> {volta_number -> [mc1, mc2...]} } dictionary as you can get it from ``Score.mscx.volta_structure``. This allows for correct propagation into second and other voltas. globalkey, localkey : :obj:`str`, optional In case you renamed the columns, pass column names. add_bool : :obj:`bool`, optional Pass True if you want to add two boolean columns which are true if the respective key is a minor key. """ df = df.copy() nunique = df[globalkey].nunique() assert nunique > 0, "No global key specified." if nunique > 1: raise NotImplementedError("Several global keys not accepted at the moment.") logger.debug('Extending global key to all harmonies') global_key = df[globalkey].iloc[0] if pd.isnull(global_key): global_key = df[globalkey].dropna().iloc[0] logger.warning( f"Global key is not specified in the first label. Using '{global_key}' from index {df[df[globalkey] == global_key].index[0]}") df.loc[:, globalkey] = global_key global_minor = series_is_minor(df[globalkey]) logger.debug('Extending local keys to all harmonies') if pd.isnull(df[localkey].iloc[0]): one = 'i' if global_minor.iloc[0] else 'I' df.iloc[0, df.columns.get_loc(localkey)] = one if volta_structure is not None and volta_structure != {}: if 'mc' in df.columns: volta_mcs = defaultdict(list) for volta_dict in volta_structure.values(): for volta_no, mcs in volta_dict.items(): volta_mcs[volta_no].extend(mcs) volta_exclusion = {volta_no: [mc for vn, mcs in volta_mcs.items() for mc in mcs if vn != volta_no] for volta_no in volta_mcs.keys()} for volta_no in sorted(volta_exclusion.keys(), reverse=True): selector = ~df.mc.isin(volta_exclusion[volta_no]) df.loc[selector, localkey] = df.loc[selector, localkey].fillna(method='ffill') else: logger.info("Dataframe needs to have a 'mc' column. Ignoring volta_structure.") df[localkey].fillna(method='ffill', inplace=True) else: df[localkey].fillna(method='ffill', inplace=True) if add_bool: gm = f"{globalkey}_is_minor" lm = f"{localkey}_is_minor" df[gm] = global_minor if df[localkey].str.contains('/').any(): lk = transform(df, resolve_relative_keys, [localkey, gm], logger=logger) else: lk = df[localkey] local_minor = series_is_minor(lk) df[lm] = local_minor return df
[docs]@function_logger def propagate_pedal(df, relative=True, drop_pedalend=True, cols={}): """ Propagate the pedal note for all chords within square brackets. By default, the note is expressed in relation to each label's localkey. Uses: :py:func:`rel2abs_key`, :py:func:`abs2rel_key` Parameters ---------- df : :obj:`pandas.DataFrame` Dataframe containing DCML chord labels that have been split by split_labels() and where the keys have been propagated using propagate_keys(). relative : :obj:`bool`, optional Pass False if you want the pedal note to stay the same even if the localkey changes. drop_pedalend : :obj:`bool`, optional Pass False if you don't want the column with the ending brackets to be dropped. cols : :obj:`dict`, optional In case the column names for ``['pedal','pedalend', 'globalkey', 'localkey']`` deviate, pass a dict, such as .. code-block:: python {'pedal': 'pedal_col_name', 'pedalend': 'pedalend_col_name', 'globalkey': 'globalkey_col_name', 'localkey': 'localkey_col_name'} """ df = df.copy() ### If the index is not unique, it has to be temporarily replaced tmp_index = not df.index.is_unique if tmp_index: ix = df.index df.reset_index(drop=True, inplace=True) features = ['pedal', 'pedalend', 'globalkey', 'localkey'] for col in features: if not col in cols: cols[col] = col pedal, pedalend = cols['pedal'], cols['pedalend'] logger.debug('Extending pedal notes to concerned harmonies') beginnings = df.loc[df[pedal].notna(), ['mc', pedal]] endings = df.loc[df[pedalend].notna(), ['mc', pedalend]] n_b, n_e = len(beginnings), len(endings) def make_comparison(): return pd.concat([beginnings.reset_index(drop=True), endings.reset_index(drop=True)], axis=1).astype({'mc': 'Int64'}) assert n_b == n_e, f"{n_b} organ points started, {n_e} ended:\n{make_comparison()}" if relative: assert df[cols[ 'localkey']].notna().all(), "Local keys must first be propagated using propagate_keys(), no NaNs allowed." for (fro, ped), to in zip(beginnings[pedal].items(), endings[pedalend].index): try: section = df.loc[fro:to].index except: logger.error( f"Slicing of the DataFrame did not work from {fro} to {to}. Index looks like this:\n{df.head().index}") localkeys = df.loc[section, cols['localkey']] if localkeys.nunique() > 1: first_localkey = localkeys.iloc[0] globalkeys = df.loc[section, cols['globalkey']].unique() assert len(globalkeys) == 1, "Several globalkeys appearing within the same organ point." global_minor = globalkeys[0].islower() # if the localkey changes during the pedal point, the reference changes and the Roman numeral indicating # the pedal note needs to be adapted key2pedal = { key: ped if key == first_localkey else abs2rel_key(rel2abs_key(ped, first_localkey, global_minor, logger=logger), key, global_minor) for key in localkeys.unique()} logger.debug( f"Pedal note {ped} has been transposed relative to other local keys within a global {'minor' if global_minor else 'major'} context: {key2pedal}") pedals = pd.Series([key2pedal[key] for key in localkeys], index=section) else: pedals = pd.Series(ped, index=section) df.loc[section, pedal] = pedals if drop_pedalend: df = df.drop(columns=pedalend) if tmp_index: df.index = ix return df
######################################################################################################################## # MOMENTARILY NOT IN USE: ########################################################################################################################