Source code for VMO_Score.segmentation

"""Module implementing the segmentation of an audio file using the package
VMO."""

import numpy as np
import librosa
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import vmo
import vmo.analysis as van
import utils
import collections
import reprlib


Region = collections.namedtuple('Region', 'min max')


[docs]class Section(collections.namedtuple('Section', 'label region')): """Class representing a section of the segmentation""" def __cmp__(self, other): return cmp(self.label, other.label)
[docs]class Segmentation(object): """Class representing a segmentation""" def __init__(self, audio_path): self._oracle = None self._segmentation = [] self.generate(audio_path) @property def oracle(self): """VMO Oracle generated with the segmentation. Returns: VMO:Oracle """ return self._oracle @property def segmentation(self): """List of sections generated by the segmentation. Returns: [Section] """ return self._segmentation def __iter__(self): return iter(self.segmentation) def __repr__(self): sections = reprlib.repr(*self) return 'Segmentation({})'.format(sections) def __str__(self): return str(list(self))
[docs] def generate(self, audio_path, sr=44100, hop_length=512): """Segmentation of an audio recording using the Constant Q Transform (CQT) Args: audio_path (str): A string representing the path of the audio file sr (int): Sampling rate (default: 44100) hop_length (int): Number of samples between successive CQT columns (default: 512) """ # Extract feature y, sr = librosa.load(audio_path, sr=sr) cqt = librosa.core.cqt(y, sr=sr, hop_length=hop_length) beat = range(1, len(y), 30) # create synthetic beat of 33 frames cqt_sync = librosa.feature.sync(cqt, beat, aggregate=np.median) cqt_stack = librosa.feature.stack_memory(cqt_sync, n_steps=3) # Find ideal threshold r = (0, 1.1, 0.1) threshold = vmo.find_threshold(cqt_stack.T, r=r, dim=cqt_stack.shape[0]) ideal_t = threshold[0][1] # Build Oracle cqt_vmo = vmo.build_oracle(cqt_stack.T, flag='a', threshold=ideal_t, dim=cqt_stack.shape[0]) # Symbol spectral method = 'symbol_spectral' est_boundaries, est_labels = van.segmentation(cqt_vmo, method=method, connectivity='lrs') est_intervals = zip(est_boundaries[:-1], est_boundaries[1:]) ulabel, invind = np.unique(est_labels, return_inverse=True) # Setting attributes self._oracle = cqt_vmo self._segmentation = [Section(l, Region(i1, i2)) for l, (i1, i2) in zip(invind, est_intervals)]
[docs] def output_png(self, filename): """Generate a PNG file of the segmentation Args: filename (str): A string representing the path of the generated file """ if len(self.segmentation) != 0: norm = mpl.colors.Normalize(vmin=min(self.segmentation).label, vmax=max(self.segmentation).label) cmap = cm.get_cmap('Accent') m = cm.ScalarMappable(norm=norm, cmap=cmap) plt.figure(figsize=(12, 2)) plt.title('Segmentation') print self.segmentation for i, interval in self.segmentation: plt.fill_between([interval.min, interval.max], 0, 1, color=m.to_rgba(i)) plt.xlim(0, self.segmentation[-1].region.max) plt.yticks([]) plt.xticks([]) plt.savefig(filename, bbox_inches='tight') plt.close("all")
[docs] def save_oracle(self, filename): """Save the oracle to disk Args: filename (str): Filename to write the oracle """ if self.oracle is not None: utils.save_shelf(filename, 'oracle', self.oracle)