Source code for emd.spectra

#!/usr/bin/python

# vim: set expandtab ts=4 sw=4:

"""
Routines relating to frequency transforms and power-spectra.

Frequency Transform Routines:
  frequency_transform
  quadrature_transform
  phase_from_complex_signal
  freq_from_phase
  phase_from_freq
  phase_angle

Power Spectra:
  holospectrum
  hilberthuang
  hilberthuang_1d

Power Spectra Helpers:
  define_hist_bins
  define_hist_bins_from_data

"""

import logging

import numpy as np

from . import cycles, imftools
from ._sift_core import interp_envelope
from .support import ensure_2d, ensure_equal_dims, ensure_vector

# Housekeeping for logging
logger = logging.getLogger(__name__)

# Sential value for observations outside histogram range
DROP_SENTINAL = np.iinfo(np.int32).min

##


[docs] def frequency_transform(imf, sample_rate, method, smooth_freq=3, smooth_phase=5): """Compute instantaneous phase, frequency and amplitude from a set of IMFs. Several approaches are implemented from [1]_ and [2]_. Parameters ---------- imf : ndarray Input array of IMFs. sample_rate : float Sampling frequency of the signal in Hz method : {'hilbert','quad','direct_quad','nht'} The method for computing the frequency stats smooth_phase : int Length of window when smoothing the unwrapped phase (Default value = 31) Returns ------- IP : ndarray Array of instantaneous phase estimates IF : ndarray Array of instantaneous frequency estimates IA : ndarray Array of instantaneous amplitude estimates References ---------- .. [1] Huang, N. E., Shen, Z., Long, S. R., Wu, M. C., Shih, H. H., Zheng, Q., … Liu, H. H. (1998). The empirical mode decomposition and the Hilbert spectrum for nonlinear and non-stationary time series analysis. Proceedings of the Royal Society of London. Series A: Mathematical, Physical and Engineering Sciences, 454(1971), 903–995. https://doi.org/10.1098/rspa.1998.0193 .. [2] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 """ from scipy.signal import hilbert logger.info('STARTED: compute frequency stats') imf = ensure_2d([imf], ['imf'], 'frequency_transform') logger.debug('computing on {0} samples over {1} imfs at sample rate {2}'.format(imf.shape[0], imf.shape[1], sample_rate)) # Each case here should compute the analytic form of the imfs and the # instantaneous amplitude. if method == 'hilbert': logger.info('Using Hilbert transform') analytic_signal = hilbert(imf, axis=0) # Estimate instantaneous amplitudes directly from analytic signal iamp = np.abs(analytic_signal) elif method == 'nht': logger.info('Using Amplitude-Normalised Hilbert transform') n_imf = imftools.amplitude_normalise(imf) analytic_signal = hilbert(n_imf, axis=0) orig_dim = imf.ndim if imf.ndim == 2: imf = imf[:, :, None] # Estimate inst amplitudes with spline interpolation iamp = np.zeros_like(imf) for ii in range(imf.shape[1]): for jj in range(imf.shape[2]): iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj], mode='upper') if orig_dim == 2: iamp = iamp[:, :, 0] elif method == 'ctrl': logger.info('Using Control Points - CURRENTLY BROKEN') orig_dim = imf.ndim if imf.ndim == 2: imf = imf[:, :, None] # Get phase from control points iphase = np.zeros_like(imf) for ii in range(imf.shape[1]): for jj in range(imf.shape[2]): good_cycles = cycles.get_cycle_inds_from_waveform(imf[:, ii, jj], cycle_start='asc') ctrl = cycles.get_control_points(imf[:, ii, jj], good_cycles) iphase[:, ii, jj] = phase_from_control_points(ctrl, good_cycles) iphase[:, ii, jj] = np.unwrap(iphase[:, ii, jj]) # Estimate inst amplitudes with spline interpolation iamp = np.zeros_like(imf) for ii in range(imf.shape[1]): for jj in range(imf.shape[2]): iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj], mode='upper') if orig_dim == 2: iamp = iamp[:, :, 0] iphase = iphase[:, :, 0] elif method == 'quad': logger.info('Using Quadrature transform') analytic_signal = quadrature_transform(imf) orig_dim = imf.ndim if imf.ndim == 2: imf = imf[:, :, None] # Estimate inst amplitudes with spline interpolation iamp = np.zeros_like(imf) for ii in range(imf.shape[1]): for jj in range(imf.shape[2]): iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj], mode='upper') if orig_dim == 2: iamp = iamp[:, :, 0] elif method == 'direct_quad': logger.info('Using Direct-Quadrature transform') raise ValueError('direct_quad method is broken!') n_imf = imftools.amplitude_normalise(imf.copy()) iphase = np.unwrap(phase_angle(n_imf)) iamp = np.zeros_like(imf) for ii in range(imf.shape[1]): iamp[:, ii] = interp_envelope(imf[:, ii, None], mode='combined') else: logger.error("Method '{0}' not recognised".format(method)) raise ValueError("Method '{0}' not recognised\nPlease use one of 'hilbert','nht' or 'quad'".format(method)) if method != 'ctrl': # Compute unwrapped phase for frequency estimation iphase = phase_from_complex_signal(analytic_signal, smoothing=smooth_phase, ret_phase='unwrapped') # Compute inst. freq from phase ifreq = freq_from_phase(iphase, sample_rate, savgol_width=smooth_freq) # Return wrapped phase iphase = imftools.wrap_phase(iphase) logger.info('COMPLETED: compute frequency stats. Returning {0} imfs'.format(iphase.shape[1])) return iphase, ifreq, iamp
#%% ----------------------------------------------------- # Frequency stat utils def quadrature_transform(X, fix_zerocrossings=False): """Compute the quadrature transform on a set of time-series. This algorithm is defined in equation 34 of [1]_. The return is a complex array with the input data as the real part and the quadrature transform as the imaginary part. Parameters ---------- X : ndarray Array containing time-series to transform Returns ------- quad_signal : ndarray Complex valued array containing the quadrature transformed signal References ---------- .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 """ nX = imftools.amplitude_normalise(X.copy(), clip=False) nX = nX / (np.abs(nX).max() + 1e-8) # Clip any remaining points outside -1 < x < 1 # Avoid occasional 'invalid value encountered' RuntimeWarning in sqrt using # where argument in ufunc tmp = 1 - nX**2 good_vals = (tmp != 0) & (np.isnan(tmp) == False) # noqa: E712 imagX = np.sqrt(tmp, out=tmp, where=good_vals) # Add warning here.... if np.all(np.isreal(imagX)) == False: # noqa: E712 imagX = imagX.real mask = ((np.diff(nX, axis=0) > 0) * -2) + 1 mask[mask == 0] = -1 mask = np.r_[mask, mask[-1, None, :]] q = imagX * mask if fix_zerocrossings: q = _fix_quadrature_zero_crossings(q) return nX + 1j * q def _fix_quadrature_zero_crossings(quad): """Numerical 'fix' for instability around zero in quadrature signals. EXPERIMENTAL WORK-IN-PROGRESS FUNCTION!! Use with caution. Replaces sample closest to zero with the average of the four surrounding points. This is needed as the direct quadrature method involves squaring the raw signal - this is normally fine but explodes when close to zero. """ quad_fix = quad.copy() for ii in range(quad.shape[1]): # Find all zero crossings zc = np.where(np.diff(np.sign(quad[:, ii]), axis=0) != 0)[0] # Drop crossings we don't have surrounding samples for zc = zc[(zc >= 1) & (zc < quad.shape[0]-3)] # Delay-embedding array around zero-crossing zz = np.vstack((quad[zc-1, ii], quad[zc, ii], quad[zc+1, ii], quad[zc+2, ii], quad[zc+3, ii])).T # Take a copy for output and replace 'fixed' zero-crossing point. quad_fix[zc+1, ii] = zz[:, np.array((0, 4))].mean(axis=1) quad_fix[zc, ii] = np.average(zz[:, np.array((0, 4))], weights=[3/4, 1/4], axis=1) quad_fix[zc+2, ii] = np.average(zz[:, np.array((0, 4))], weights=[1/4, 3/4], axis=1) from scipy.ndimage import median_filter quad_fix = median_filter(quad_fix, (7, 1)) return quad_fix
[docs] def phase_from_complex_signal(complex_signal, smoothing=None, ret_phase='wrapped', phase_jump='ascending'): """Compute the instantaneous phase from a complex signal. The complex input may be obtained from either the Hilbert Transform or by Direct Quadrature. Parameters ---------- complex_signal : complex ndarray Complex valued input array smoothing : int Integer window length used in phase smoothing (Default value = None) ret_phase : {'wrapped','unwrapped'} Flag indicating whether to return the wrapped or unwrapped phase (Default value = 'wrapped') phase_jump : {'ascending','peak','descending','trough'} Flag indicating where in the cycle the phase jump should be (Default value = 'ascending') Returns ------- IP : ndarray Array of instantaneous phase values """ # Compute unwrapped phase iphase = np.unwrap(np.angle(complex_signal), axis=0) orig_dim = iphase.ndim if iphase.ndim == 2: iphase = iphase[:, :, None] # Apply smoothing if requested from scipy.signal import medfilt if smoothing is not None: for ii in range(iphase.shape[1]): for jj in range(iphase.shape[2]): iphase[:, ii, jj] = medfilt(iphase[:, ii, jj], smoothing) if orig_dim == 2: iphase = iphase[:, :, 0] # Set phase jump point to requested part of cycle if phase_jump == 'ascending': iphase = iphase + np.pi / 2 elif phase_jump == 'peak': pass # do nothing elif phase_jump == 'descending': iphase = iphase - np.pi / 2 elif phase_jump == 'trough': iphase = iphase + np.pi if ret_phase == 'wrapped': return imftools.wrap_phase(iphase) elif ret_phase == 'unwrapped': return iphase
[docs] def freq_from_phase(iphase, sample_rate, savgol_width=3): """Compute the instantaneous frequency from the instantaneous phase. A savitsky-golay filter is used to compute the derivative of the phase and can be smoothed by specifying a longer savgol_width (minimum value=3). Parameters ---------- iphase : ndarray Input array containing the unwrapped instantaneous phase time-course sample_rate : float The sampling frequency of the data savgol_width : int >= 3 The window length of the Savitsky-Golay filter window Returns ------- IF : ndarray Array containing the instantaneous frequencies """ from scipy.signal import savgol_filter # Differential of instantaneous phase iphase = savgol_filter(iphase, savgol_width, 1, deriv=1, axis=0) # Convert to freq ifrequency = iphase / (2.0 * np.pi) * sample_rate return ifrequency
[docs] def phase_from_freq(ifrequency, sample_rate, phase_start=-np.pi): """Compute the instantaneous phase of a signal from its instantaneous frequency. Parameters ---------- ifrequency : ndarray Input array containing the instantaneous frequencies of a signal sample_rate : float The sampling frequency of the data phase_start : float Start value of the phase output (Default value = -np.pi) Returns ------- IP : ndarray The instantaneous phase of the signal """ iphase_diff = (ifrequency / sample_rate) * (2 * np.pi) iphase = phase_start + np.cumsum(iphase_diff, axis=0) return iphase
def phase_from_control_points(ctrl, cycles): """Compute instantaneous phase from control points.""" from scipy import interpolate as interp cycles = ensure_vector([cycles], ['cycles'], 'phase_from_control_points') ip = np.zeros_like(cycles, dtype=float) phase_y = np.array([0, np.pi / 2, np.pi, 3 * np.pi / 2, 2 * np.pi]) for jj in range(1, cycles.max() + 1): if np.any(np.isnan(ctrl[jj-1, :])): continue f = interp.interp1d(ctrl[jj-1, :], phase_y, kind='linear') ph = f(np.arange(0, ctrl[jj-1, -1] + 1)) ip[cycles == jj] = ph return ip def direct_quadrature(fm): """Compute the quadrature transform on a set of time-series. This algorithm is defined in equation 35 of [1]. Section 3.2 of 'on instantaneous frequency' THIS IS IN DEVELOPMENT Parameters ---------- fm : ndarray Input signal containing a frequency-modulated signal References ---------- .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 """ ph = phase_angle(fm) # We'll have occasional nans where fm==1 or -1 inds = np.argwhere(np.isnan(ph)) vals = (ph[inds[:, 0] - 1, :] + ph[inds[:, 0] + 1, :]) / 2 ph[inds[:, 0]] = vals return ph def phase_angle(fm): """Compute the phase angle of a set of time-series. This algorithm is defined in equation 35 of [1]_. THIS IS IN DEVELOPMENT Parameters ---------- X : ndarray Array containing time-series to transform Returns ------- quad_signal : ndarray Complex valued array containing the quadrature transformed signal References ---------- .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 """ return np.arctan(fm / np.lib.scimath.sqrt(1 - np.power(fm, 2))) #%% ----------------------------------------------------- # Time-frequency spectra
[docs] def hilberthuang(IF, IA, edges=None, sum_time=True, sum_imfs=True, mode='power', sample_rate=1, scaling=None, return_sparse=False, return_Gb_limit=10): """Compute a Hilbert-Huang transform (HHT). The Hilbert-Huang transform takes the instataneous frequency and instantaneous amplitude of a time-series and represents the energy of a signal across time and frequency [1]_. The full Hilbert-Huang array is 3-dimensional [nfrequencies x ntimes x nimfs]. By default, the returned holospectrum is summed across time and IMFs, returning only the frequency dimension.- this behaviour can be tuned with the sum_time and sum_imfs arguments. Setting return_sparse to True is strongly recommended returning very large arrays. Parameters ---------- IF : ndarray 2D first level instantaneous frequencies IA : ndarray 2D first level instantaneous amplitudes edges : {ndarray, tuple or None} Definition of the frequency bins used in the spectrum. This may be: * array_like vector of bin edge values (as defined by emd.spectra.define_hist_bins) * a tuple of values that can be passed to emd.spectra.define_hist_bins (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz) * None in which case a sensible set of bins will be defined from the input data (this is the default option) sum_time : boolean Flag indicating whether to sum across time dimension sum_imfs : boolean Flag indicating whether to sum across IMF dimension mode : {'power','amplitude'} Flag indicating whether to sum the power or amplitudes (Default value = 'power') scaling : {'density', 'spectrum', None} Switch specifying the normalisation or scaling applied to the spectrum. sample_rate : float Sampling rate of the data used in 'density' scaling return_sparse : bool Flag indicating whether to return the full or sparse form(Default value = False) return_Gb_limit : {float, None} Maximum array size in Gb that will be returned if a non-sparse/dense array is being returned (default = 10). If the function return would exceed this size, the function will raise an error. If set to None, then no limit is imposed. Sparse arrays are always returned. Returns ------- f : ndarray Vector of histogram bin centers for each frequency hht : ndarray 2D array containing the Hilbert-Huang Transform Notes ----- Run a HHT with an automatically generated set of histogram bins: >>> f, hht = emd.spectra.hilberthuang(IF, IA, sample_rate=512) Run a HHT and return the spectrum for each IMF separately >>> f, hht = emd.spectra.hilberthuang(IF, IA,, sample_rate=512, sum_imfs=False) Run a HHT with 49 bins, linearly spaced between 1 and 50Hz >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512) Run a HHT with 49 bins, logarithmically spaced between 0.001 and 50Hz >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49, 'log'), sample_rate=512) Run a HHT with an externally generated set of histogram bin edges >>> my_edges = np.array([0.5, 2, 5, 11, 22]) >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=my_edges sample_rate=512) Run a HHT and return the full time dimension - the HHT is summed over time by default >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512, sum_time=False) Run a HHT and return a memory efficient sparse array - this is strongly recommended for very large HHTs >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512, >>> sum_time=False, return_sparse=True) If return_sparse is set to True the returned array is a sparse matrix in COOrdinate form using sparse package (sparse.COO). This is much more memory efficient than the full form but may not behave as expected in all functions expecting full arrays. References ---------- .. [1] Huang, N. E., Shen, Z., Long, S. R., Wu, M. C., Shih, H. H., Zheng, Q., … Liu, H. H. (1998). The empirical mode decomposition and the Hilbert spectrum for nonlinear and non-stationary time series analysis. Proceedings of the Royal Society of London. Series A: Mathematical, Physical and Engineering Sciences, 454(1971), 903–995. https://doi.org/10.1098/rspa.1998.0193 """ # Housekeeeping IF, IA = ensure_2d([IF, IA], ['IF', 'IA'], 'hilberthuang') ensure_equal_dims((IF, IA), ('IF', 'IA'), 'hilberthuang') logger.info('STARTED: compute Hilbert-Huang Transform') logger.debug('computing on {0} samples over {1} IMFs '.format(IF.shape[0], IF.shape[1])) edges, bins = _histogram_bin_relay(edges, IF.flatten()) logger.debug('Freq bins: {0} to {1} in {2} steps'.format(edges[0], edges[-1], len(edges))) # Begin computation spec = _base_spectra(IF, IA, edges) sum_dims = np.where([0, sum_time, sum_imfs])[0] spec = _post_process_spectra(spec, sum_dims=sum_dims, mode=mode, time_dim=1, sample_rate=sample_rate, scaling=scaling, return_sparse=False, return_Gb_limit=return_Gb_limit) logger.info('COMPLETED: Hilbert-Huang Transform - output size {0}'.format(spec.shape)) return bins, spec
[docs] def holospectrum(IF, IF2, IA2, edges=None, edges2=None, sum_time=True, sum_first_imfs=True, sum_second_imfs=True, mode='power', sample_rate=1, scaling=None, return_sparse=False, return_Gb_limit=10): """Compute a Holospectrum. Holospectra are computed from the first and second layer frequecy statistics of a dataset. The Holospectrum represents the energy of a signal across time, carrier frequency and amplitude-modulation frequency [1]_. The full Holospctrum is a 5-dimensional array: [nfrequencise x namplitude_frequencies x time x first_imfs x second_imfs] By default, the returned holospectrum is summed across time and IMFs, returning only the first two dimensions - this behaviour can be tuned with the sum_time, sum_first_imfs and sum_second_imfs arguments. WARNING: returning the full Holospectrum can create some enormous arrays! Setting return_sparse=True is VERY strongly recommended if you want to work with the raw time and IMF dimensions. Parameters ---------- IF : ndarray 2D first level instantaneous frequencies IF2 : ndarray 3D second level instantaneous frequencies IA2 : ndarray 3D second level instantaneous amplitudes edges : {ndarray, tuple or None} Definition of the frequency bins used for carrier frequencies in the spectrum. This may be: * array_like vector of bin edge values (as defined by emd.spectra.define_hist_bins) * a tuple of values that can be passed to emd.spectra.define_hist_bins (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz) * None in which case a sensible set of bins will be defined from the input data (this is the default option) edges2 : {ndarray, tuple or None} Definition of the frequency bins used for amplitude modulation frequencies in the spectrum. The options are the same as for `edges`. sum_time : boolean Flag indicating whether to sum across time dimension sum_first_imfs : boolean Flag indicating whether to sum across first-layer IMF dimension sum_second_imfs : boolean Flag indicating whether to sum across the second-layer IMF dimension mode : {'power','amplitude'} Flag indicating whether to sum the power or amplitude (Default value = 'power') scaling : {'density', 'spectrum', None} Switch specifying the normalisation or scaling applied to the spectrum. sample_rate : float Sampling rate of the data used in 'density' scaling return_sparse : boolean Flag indicating whether to return a sparse or dense (normal numpy) array. return_Gb_limit : {float, None} Maximum array size in Gb that will be returned if a non-sparse/dense array is being returned (default = 10). If the function return would exceed this size, the function will raise an error. If set to None, then no limit is imposed. Sparse arrays are always returned. Returns ------- f_carrier : ndarray Vector of histogram bin centers for each carrier (first-level) frequency f_am : ndarray Vector of histogram bin centers for each amplitude modulation (second-level) frequency holo : ndarray Holospectrum of input data. Notes ----- Run a Holospectrum with an automatically generated set of histogram bins: >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512) Run a Holospectrum and return the spectrum for each first and second level IMF separately >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512, >>> sum_first_imfs=False, sum_second_imfs=False) Run a Holospectrum with 49 carrier frequency bins linearly spaced between 1 and 50Hz and 32 amplitude modulation frequency bins logarithmicly spaced between 0.1 and 20Hz >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512, >>> edges=(1, 50, 49), >>> edges2=(0.1, 20, 32, 'log')) Run a Holospectrum without summing over the time dimensions and return the result in a memory efficient sparse array - this is strongly recommended for very large HHTs >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512, >>> edges=(1, 50, 49), >>> edges2=(0.1, 20, 32, 'log', >>> sum_time=False, return_sparse=True) If return_sparse is set to True the returned array is a sparse matrix in COOrdinate form using sparse package (sparse.COO). This is much more memory efficient than the full form but may not behave as expected in all functions expecting full arrays. References ---------- .. [1] Huang, N. E., Hu, K., Yang, A. C. C., Chang, H.-C., Jia, D., Liang, W.-K., … Wu, Z. (2016). On Holo-Hilbert spectral analysis: a full informational spectral representation for nonlinear and non-stationary data. Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences, 374(2065), 20150206. https://doi.org/10.1098/rsta.2015.0206 """ # Housekeeping logger.info('STARTED: compute Holospectrum') out = ensure_2d((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum') IF, IF2, IA2 = out ensure_equal_dims((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum', dim=0) ensure_equal_dims((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum', dim=1) msg = 'computing on {0} samples over {1} first-level IMFs and {2} second level IMFs' logger.debug(msg.format(IF2.shape[0], IF2.shape[1], IF2.shape[2])) edges, bins = _histogram_bin_relay(edges, IF.flatten()) logger.debug('First level freq bins: {0} to {1} in {2} steps'.format(edges[0], edges[-1], len(edges))) edges2, bins2 = _histogram_bin_relay(edges2, IF2.flatten()) logger.debug('Second level freq bins: {0} to {1} in {2} steps'.format(edges2[0], edges2[-1], len(edges2))) # Begin computation holo = _higher_order_spectra(IF, IF2, IA2, edges, edges2) sum_dims = np.where([0, 0, sum_time, sum_first_imfs, sum_second_imfs])[0] holo = _post_process_spectra(holo, sum_dims=sum_dims, mode='power', time_dim=2, sample_rate=sample_rate, scaling=scaling, return_sparse=False, return_Gb_limit=return_Gb_limit) logger.info('COMPLETED: Holospectrum - output size {0}'.format(holo.shape)) return bins, bins2, holo
[docs] def hilbertmarginal(IF, IA, order=2, freq_edges=None, amp_edges=None, sum_time=True, sum_imfs=True, sample_rate=1, scaling=None, return_sparse=False, return_Gb_limit=10): """Compute a generalised Hilbert marginal spectrum. This is an experimental function which probably implements the method introduced in Huang et al (2008) _[1]. This creates a 2D amplitude-frequency representation of the signal. Parameters ---------- IF : ndarray 2D first level instantaneous frequencies IA : ndarray 2D first level instantaneous amplitudes order : int Power to which amplitude is raised before spectrum computation. freq_edges: {ndarray, tuple or None} Definition of the frequency bins used in the spectrum. This may be: * array_like vector of bin edge values (as defined by emd.spectra.define_hist_bins) * a tuple of values that can be passed to emd.spectra.define_hist_bins (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz) * None in which case a sensible set of bins will be defined from the input data (this is the default option) amp_edges : {ndarray, tuple or None} Definition of amplitude bins used in spectrum. Format options are the same as for `freq_edges`. sum_time : boolean Flag indicating whether to sum across time dimension sum_imfs : boolean Flag indicating whether to sum across IMF dimension sample_rate : float Sampling rate of the data used in 'density' scaling scaling : {'density', 'spectrum', None} Switch specifying the normalisation or scaling applied to the spectrum. return_sparse : bool Flag indicating whether to return the full or sparse form(Default value = True) return_Gb_limit : {float, None} Maximum array size in Gb that will be returned if a non-sparse/dense array is being returned (default = 10). If the function return would exceed this size, the function will raise an error. If set to None, then no limit is imposed. Sparse arrays are always returned. Returns ------- a : ndarray Vector of histogram bin centers for each amplitude f : ndarray Vector of histogram bin centers for each frequency hht : ndarray 2D array containing the Hilbert-Huang Transform References ---------- .. [1] Huang, Y. X., Schmitt, F. G., Lu, Z. M., & Liu, Y. L. (2008). An amplitude-frequency study of turbulent scaling intermittency using Empirical Mode Decomposition and Hilbert Spectral Analysis. In EPL (Europhysics Letters) (Vol. 84, Issue 4, p. 40010). IOP Publishing. https://doi.org/10.1209/0295-5075/84/40010 """ logger.info('STARTED: compute Hilbert-Marginal spectrum') IF, IA = ensure_2d([IF, IA], ['IF', 'IA'], 'hilberthuang') ensure_equal_dims((IF, IA), ('IF', 'IA'), 'hilberthuang') freq_edges, freq_bins = _histogram_bin_relay(freq_edges, IF.flatten()) logger.debug('Freq bins: {0} to {1} in {2} steps'.format(freq_edges[0], freq_edges[-1], len(freq_edges))) amp_edges, amp_bins = _histogram_bin_relay(amp_edges, IA.flatten()) logger.debug('Amp bins: {0} to {1} in {2} steps'.format(amp_edges[0], amp_edges[-1], len(amp_edges))) # Compute HOS - distribution of amplitude across amplitude, frequency, # time and IMF hima = _higher_order_spectra(IA, IF[:, :, None], IA[:, :, None], amp_edges, freq_edges) # hima is a spase array of dimensions # [len(edges), len(edges2), num_samples, num_imfs, 1] # Get amplitude values in broadcastable shape A_values = np.reshape(IA, (1, 1, IA.shape[0], IA.shape[1], 1))**order # Scale hima by amplitude values and resolution dA = np.diff(amp_bins)[0] hima = hima * A_values * dA # Create PDF histogram sum_dims = np.where([0, 0, sum_time, sum_imfs, 1])[0] hima = hima.sum(axis=sum_dims) hima = hima / IF.size # Post-process - summing as already been done hima = _post_process_spectra(hima, mode=None, sample_rate=sample_rate, scaling=scaling, return_sparse=return_sparse, return_Gb_limit=return_Gb_limit) logger.info('COMPLETED: Hilbert-Marginal Spectrum - output size {0}'.format(hima.shape)) return amp_bins, freq_bins, hima
def _post_process_spectra(spec, sum_dims=None, mode='power', scaling=None, time_dim=1, sample_rate=1, return_sparse=False, return_Gb_limit=10): """Apply standard processes to input spectrum. This function implements a set of processing options common to all hilbert-huang based spectra. Parameters ---------- spec : ndarray 2 or 3d input spectrum, usually a sparse array sum_dims : int or list of int Flag indicating whether to sum across time dimension mode : {'power', 'amplitude'} Switch specifying whether the distribution should return amplitude or power (amplitude squared) values. scaling : {'density', 'spectrum', None} Switch specifying the normalisation or scaling applied to the spectrum. time_dim : int Axis index of the dimension across time. This is used when applying some normalisations or scalings. return_sparse : boolean Flag indicating whether to return a sparse or dense (normal numpy) array. return_Gb_limit : {float, None} Maximum array size in Gb that will be returned if a non-sparse/dense array is being returned (default = 10). If the function return would exceed this size, the function will raise an error. If set to None, then no limit is imposed. Sparse arrays are always returned. Returns ------- ndarray processed power spectrum See Also -------- hilberthuang, holospectrum, hilbertmarginal """ # No housekeeping here - assume that inputs have been sanitised by higher level functions. if mode == 'power': logger.debug('Squaring amplitude to compute power') spec = spec**2 if scaling == 'density': logger.debug("Applying scaling: 'density'.") spec = spec / (sample_rate * spec.shape[time_dim]) elif scaling == 'spectrum': logger.debug("Applying scaling: 'spectrum'.") spec = spec / spec.shape[time_dim] elif scaling is None: pass else: logger.error('Unknown scaling: {0}'.format(scaling)) raise ValueError('Unknown scaling: {0}'.format(scaling)) if (sum_dims is not None) and (len(sum_dims) > 0): orig_dim = spec.shape spec = spec.sum(axis=sum_dims) msg = "Summing across dimensions {0}. Input dims ({1}) -> output dims ({2})" logger.debug(msg.format(sum_dims, orig_dim, spec.shape)) if (return_sparse is False) and (return_Gb_limit is not None): byte_size = spec.size * 8 # sparse arrays don't have itemsize attr so assuming 8 for now if (byte_size / (1024**3)) > return_Gb_limit: msg = "Converting the output to dense format will create a very large array\n" msg += "This spectrum is about to return a {0}Gb numpy array - the limit is set to {1}Gb.\n" msg += "please either set 'return_sparse' to True to get a memory-efficient sparse array, or \n" msg += "change 'return_Gb_limit' if you really want the dense array..." logger.warning(msg.format(byte_size / (1024**3), return_Gb_limit)) raise RuntimeError(msg.format(byte_size / (1024**3), return_Gb_limit)) spec = spec.todense() msg = 'Converting output to dense array - size {0}Gb' logger.debug(msg.format(byte_size / (1024**3), return_Gb_limit)) else: msg = 'Returning a sparse array - size {0}Gb' logger.debug(msg.format(byte_size / (1024**3), return_Gb_limit)) return spec def _base_spectra(X, Z, x_edges): """Compute a 2-dimensional Hilbert-Huang distribution. This is a helper function for constructing a sparse array representation of a two dimensional distribution of power. This function would not normally be called by the user. Parameters ---------- X : ndarray 2d array of values defining the first dimension, usually [samples x imfs] Z : ndarray 2d array of amplitude or power values matching the size of input X x_edges : ndarray Vector array containing bin edges for input X Returns ------- sparse_array Sparse array representation of two dimensional distribution. See Also -------- hilberthuang """ # No housekeeping here - assume that inputs have been sanitised by higher level functions. # Find bin indices for first dimension x_inds = _digitize(X, x_edges) # Find bin indices for time dimension - cast to match input shape t_inds = np.broadcast_to(np.arange(x_inds.shape[0])[:, np.newaxis], x_inds.shape) # Find bin indices for IMF dimension - cast to match input shape i_inds = np.broadcast_to(np.arange(X.shape[1])[np.newaxis, :], x_inds.shape) # Create vectorised COO coordinate array coords = np.c_[x_inds.flatten(), t_inds.flatten(), i_inds.flatten()].T # Vectorise amplitude values to match coordinates Z = Z.flatten() # Drop observations which lie outside specified bin edges drops = np.any(coords == DROP_SENTINAL, axis=0) coords = np.delete(coords, drops, axis=1) Z = np.delete(Z, drops) # Compute final shape final_shape = (x_edges.shape[0]-1, x_inds.shape[0], x_inds.shape[1]) # Create sparse spectrum from sparse import COO s = COO(coords, Z, shape=final_shape) return s def _higher_order_spectra(X, Y, Z, x_edges, y_edges): """Compute a 3-dimensional Hilbert-Huang distribution. This is a helper function for constructing a sparse array representation of a three dimensional distribution of power. This would not normally be called by the user. Parameters ---------- X : ndarray 2d array of values defining the first dimension, usually [samples x imfs] Y : ndarray 3d array of values defining the second dimension, usually [samples x imfs x imfs] Z : ndarray 3d array of amplitude or power values matching the size of input Y x_edges : ndarray Vector array containing bin edges for input X y_edges : ndarray Vector array containing bin edges for input Y Returns ------- sparse_array Sparse array representation of three dimensional distribution. See Also -------- holospectrum, hilbertmarginal """ # No housekeeping here - assume that inputs have been sanitised by higher level functions. # Find bin indices for user specified dimensions x_inds = _digitize(X, x_edges) y_inds = _digitize(Y, y_edges) x_inds = np.broadcast_to(x_inds[:, :, np.newaxis], y_inds.shape) # Find bin indices for time dimension - cast to match input shape t_inds = np.broadcast_to(np.arange(x_inds.shape[0])[:, np.newaxis, np.newaxis], y_inds.shape) # Find bin indices for first IMF dimension - cast to match input shape i_inds = np.broadcast_to(np.arange(X.shape[1])[np.newaxis, :, np.newaxis], y_inds.shape) # Find bin indices for second IMF dimension - cast to match input shape j_inds = np.broadcast_to(np.arange(Y.shape[2])[np.newaxis, np.newaxis, :], y_inds.shape) # Create vectorised COO coordinate array coords = np.c_[x_inds.flatten(), y_inds.flatten(), t_inds.flatten(), i_inds.flatten(), j_inds.flatten()].T # Vectorise amplitude values to match coordinates Z = Z.flatten() # Drop observations which lie outside specified bin edges drops = np.any(coords == DROP_SENTINAL, axis=0) Z = np.delete(Z, drops) coords = np.delete(coords, drops, axis=1) # Compute final shape final_shape = (x_edges.shape[0]-1, y_edges.shape[0]-1, x_inds.shape[0], x_inds.shape[1], y_inds.shape[2]) # Create sparse spectrum from sparse import COO s = COO(coords, Z, shape=final_shape) return s def _digitize(vals, edges): """Return index of values into a set of defined bins. Parameters ---------- vals : array_like Array of values to be binned edges : array_like Array containing the edges of bins. N edges define N-1 bins. Edges are inclusive on both the left and right. Returns ------- ndarray Array containing index of each data point in vals into bins defined by edges Notes ----- This function is a wrapper for np.digitize but has important differences. 1. This function returns a sentinel value for observations outside the range of the specified bin edges 2. This bin edges in this function are inclusive on the lower end and exclusive on the top. """ drops = np.logical_or(vals < edges[0], vals >= edges[-1]) inds = np.digitize(vals, edges) - 1 inds[drops] = DROP_SENTINAL return inds #%% ----------------------------------------------------- # Utilities
[docs] def define_hist_bins(data_min, data_max, nbins, scale='linear'): """Define the bin edges and centre values for use in a histogram. Parameters ---------- data_min : float Value for minimum edge data_max : float Value for maximum edge nbins : int Number of bins to create scale : {'linear','log'} Flag indicating whether to use a linear or log spacing between bins (Default value = 'linear') Returns ------- edges : ndarray 1D array of bin edges centres : ndarray 1D array of bin centres Notes ----- An example creating histogram bins between 1 Hz and 5 Hz with four linearly spaced bins. >>> edges,centres = emd.spectra.define_hist_bins(1, 5, 4) >>> print(edges) [1. 2. 3. 4. 5.] >>> print(centres) [1.5 2.5 3.5 4.5] """ if scale == 'log': p = np.log([data_min, data_max]) edges = np.linspace(p[0], p[1], nbins + 1) edges = np.exp(edges) elif scale == 'linear': edges = np.linspace(data_min, data_max, nbins + 1) else: raise ValueError('scale \'{0}\' not recognised. please use \'log\' or \'linear\'.') # Get centre frequecy for the bins centres = np.array([(edges[ii] + edges[ii + 1]) / 2 for ii in range(len(edges) - 1)]) return edges, centres
[docs] def define_hist_bins_from_data(X, nbins=None, mode='sqrt', scale='linear', tol=1e-3, max_bins=2048): """Find the bin edges and centre frequencies for use in a histogram. If nbins is defined, mode is ignored Parameters ---------- X : ndarray Dataset whose summary stats will define the histogram nbins : int Number of bins to create, if undefined this is derived from the data (Default value = None) mode : {'sqrt'} Method for deriving number of bins if nbins is undefined (Default value = 'sqrt') scale : {'linear','log'} (Default value = 'linear') Returns ------- edges : ndarray 1D array of bin edges centres : ndarray 1D array of bin centres """ data_min = X.min() - tol data_max = X.max() + tol if nbins is None: if mode == 'sqrt': nbins = np.sqrt(X.shape[0]).astype(int) else: raise ValueError('mode {0} not recognised, please use \'sqrt\'') # Don't exceed max_bin number nbins = nbins if nbins < max_bins else max_bins return define_hist_bins(data_min, data_max, nbins, scale=scale)
def _histogram_bin_relay(params, data=None): """Relay function which does-the-right-thing with histogram bin inputs. Parameters ---------- params : None or tuple(start, stop, nsteps) or np.ndarray Parameters given by user, if: None - bins automatically computed using define_hist_bins_from_data Tuple of length three - bins computed by passing params to define_hist_bins numpy.ndarray - input defines bin edges, bin centres are computed. data : ndarray Optional data used to compute bins if params=None Returns ------- ndarray Array of bin edges ndarray Array of bin centres """ if params is None: # User didn't say anything - guess bins edges, bins = define_hist_bins_from_data(data.flatten()) elif isinstance(params, tuple) and len(params) in [3, 4]: # User specified meta bins - make actual bins edges, bins = define_hist_bins(*params) elif isinstance(params, (list, tuple, np.ndarray)): # User provided actual bin edges - use them edges = np.array(params) bins = _compute_centres_from_edges(edges) else: ValueError('Inputs not recognised....') return edges, bins def _compute_centres_from_edges(edges, method='mean'): """Compute bin centres from an array of bin edges.""" if method == 'geometric': bins = np.sqrt(edges[1:] * edges[:-1]) elif method == 'mean': bins = (edges[1:] + edges[:-1]) / 2 else: msg = 'method \'{0}\' not recognised. please use \'mean\' or \'geometric\'.' raise ValueError(msg) return bins