"""Module implementing the segmentation of an audio file using the package
VMO."""
import numpy as np
import librosa
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import vmo
import vmo.analysis as van
import utils
import collections
import reprlib
Region = collections.namedtuple('Region', 'min max')
[docs]class Section(collections.namedtuple('Section', 'label region')):
"""Class representing a section of the segmentation"""
def __cmp__(self, other):
return cmp(self.label, other.label)
[docs]class Segmentation(object):
"""Class representing a segmentation"""
def __init__(self, audio_path):
self._oracle = None
self._segmentation = []
self.generate(audio_path)
@property
def oracle(self):
"""VMO Oracle generated with the segmentation.
Returns:
VMO:Oracle
"""
return self._oracle
@property
def segmentation(self):
"""List of sections generated by the segmentation.
Returns:
[Section]
"""
return self._segmentation
def __iter__(self):
return iter(self.segmentation)
def __repr__(self):
sections = reprlib.repr(*self)
return 'Segmentation({})'.format(sections)
def __str__(self):
return str(list(self))
[docs] def generate(self, audio_path, sr=44100, hop_length=512):
"""Segmentation of an audio recording using the Constant Q Transform
(CQT)
Args:
audio_path (str): A string representing the path of the audio file
sr (int): Sampling rate (default: 44100)
hop_length (int): Number of samples between successive CQT columns
(default: 512)
"""
# Extract feature
y, sr = librosa.load(audio_path, sr=sr)
cqt = librosa.core.cqt(y, sr=sr, hop_length=hop_length)
beat = range(1, len(y), 30) # create synthetic beat of 33 frames
cqt_sync = librosa.feature.sync(cqt, beat, aggregate=np.median)
cqt_stack = librosa.feature.stack_memory(cqt_sync, n_steps=3)
# Find ideal threshold
r = (0, 1.1, 0.1)
threshold = vmo.find_threshold(cqt_stack.T, r=r,
dim=cqt_stack.shape[0])
ideal_t = threshold[0][1]
# Build Oracle
cqt_vmo = vmo.build_oracle(cqt_stack.T, flag='a', threshold=ideal_t,
dim=cqt_stack.shape[0])
# Symbol spectral
method = 'symbol_spectral'
est_boundaries, est_labels = van.segmentation(cqt_vmo, method=method,
connectivity='lrs')
est_intervals = zip(est_boundaries[:-1], est_boundaries[1:])
ulabel, invind = np.unique(est_labels, return_inverse=True)
# Setting attributes
self._oracle = cqt_vmo
self._segmentation = [Section(l, Region(i1, i2))
for l, (i1, i2) in zip(invind, est_intervals)]
[docs] def output_png(self, filename):
"""Generate a PNG file of the segmentation
Args:
filename (str): A string representing the path of the generated file
"""
if len(self.segmentation) != 0:
norm = mpl.colors.Normalize(vmin=min(self.segmentation).label,
vmax=max(self.segmentation).label)
cmap = cm.get_cmap('Accent')
m = cm.ScalarMappable(norm=norm, cmap=cmap)
plt.figure(figsize=(12, 2))
plt.title('Segmentation')
print self.segmentation
for i, interval in self.segmentation:
plt.fill_between([interval.min, interval.max], 0, 1,
color=m.to_rgba(i))
plt.xlim(0, self.segmentation[-1].region.max)
plt.yticks([])
plt.xticks([])
plt.savefig(filename, bbox_inches='tight')
plt.close("all")
[docs] def save_oracle(self, filename):
"""Save the oracle to disk
Args:
filename (str): Filename to write the oracle
"""
if self.oracle is not None:
utils.save_shelf(filename, 'oracle', self.oracle)