Source code for auditok.signal

"""
Module for basic audio signal processing and array operations.

.. autosummary::
    :toctree: generated/

    to_array
    extract_single_channel
    compute_average_channel
    compute_average_channel_stereo
    separate_channels
    calculate_energy_single_channel
    calculate_energy_multichannel
"""
from array import array as array_
import audioop
import math

FORMAT = {1: "b", 2: "h", 4: "i"}
_EPSILON = 1e-10


[docs]def to_array(data, sample_width, channels): """Extract individual channels of audio data and return a list of arrays of numeric samples. This will always return a list of `array.array` objects (one per channel) even if audio data is mono. Parameters ---------- data : bytes raw audio data. sample_width : int size in bytes of one audio sample (one channel considered). Returns ------- samples_arrays : list list of arrays of audio samples. """ fmt = FORMAT[sample_width] if channels == 1: return [array_(fmt, data)] return separate_channels(data, fmt, channels)
[docs]def extract_single_channel(data, fmt, channels, selected): samples = array_(fmt, data) return samples[selected::channels]
[docs]def compute_average_channel(data, fmt, channels): """ Compute and return average channel of multi-channel audio data. If the number of channels is 2, use :func:`compute_average_channel_stereo` (much faster). This function uses satandard `array` module to convert `bytes` data into an array of numeric values. Parameters ---------- data : bytes multi-channel audio data to mix down. fmt : str format (single character) to pass to `array.array` to convert `data` into an array of samples. This should be "b" if audio data's sample width is 1, "h" if it's 2 and "i" if it's 4. channels : int number of channels of audio data. Returns ------- mono_audio : bytes mixed down audio data. """ all_channels = array_(fmt, data) mono_channels = [ array_(fmt, all_channels[ch::channels]) for ch in range(channels) ] avg_arr = array_( fmt, (round(sum(samples) / channels) for samples in zip(*mono_channels)), ) return avg_arr
[docs]def compute_average_channel_stereo(data, sample_width): """Compute and return average channel of stereo audio data. This function should be used when the number of channels is exactly 2 because in that case we can use standard `audioop` module which *much* faster then calling :func:`compute_average_channel`. Parameters ---------- data : bytes 2-channel audio data to mix down. sample_width : int size in bytes of one audio sample (one channel considered). Returns ------- mono_audio : bytes mixed down audio data. """ fmt = FORMAT[sample_width] arr = array_(fmt, audioop.tomono(data, sample_width, 0.5, 0.5)) return arr
[docs]def separate_channels(data, fmt, channels): """Create a list of arrays of audio samples (`array.array` objects), one for each channel. Parameters ---------- data : bytes multi-channel audio data to mix down. fmt : str format (single character) to pass to `array.array` to convert `data` into an array of samples. This should be "b" if audio data's sample width is 1, "h" if it's 2 and "i" if it's 4. channels : int number of channels of audio data. Returns ------- channels_arr : list list of audio channels, each as a standard `array.array`. """ all_channels = array_(fmt, data) mono_channels = [ array_(fmt, all_channels[ch::channels]) for ch in range(channels) ] return mono_channels
[docs]def calculate_energy_single_channel(data, sample_width): """Calculate the energy of mono audio data. Energy is computed as: .. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605 where `a_i` is the i-th audio sample and `N` is the number of audio samples in data. Parameters ---------- data : bytes single-channel audio data. sample_width : int size in bytes of one audio sample. Returns ------- energy : float energy of audio signal. """ energy_sqrt = max(audioop.rms(data, sample_width), _EPSILON) return 20 * math.log10(energy_sqrt)
[docs]def calculate_energy_multichannel(x, sample_width, aggregation_fn=max): """Calculate the energy of multi-channel audio data. Energy is calculated channel-wise. An aggregation function is applied to the resulting energies (default: `max`). Also see :func:`calculate_energy_single_channel`. Parameters ---------- data : bytes single-channel audio data. sample_width : int size in bytes of one audio sample (one channel considered). aggregation_fn : callable, default: max aggregation function to apply to the resulting per-channel energies. Returns ------- energy : float aggregated energy of multi-channel audio signal. """ energies = (calculate_energy_single_channel(xi, sample_width) for xi in x) return aggregation_fn(energies)