基音周期估计--Yin

Yin

d_{t}(\tau)=\sum_{j=t}^{t+W-1}(x_{j}-x_{j+\tau})^2

代码实现

import soundfile as sf
import numpy as np
from numpy.lib.stride_tricks import as_strided
def speech_frame(x, frame_length, hop_length, axis=-1):
x = np.ascontiguousarray(x)
n_frames = 1 + (x.shape[axis] - frame_length) // hop_length
strides = np.asarray(x.strides)
new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize

if axis == -1:
shape = list(x.shape)[:-1] + [frame_length, n_frames]
strides = list(strides) + [hop_length * new_stride]
elif axis == 0:
shape = [n_frames, frame_length] + list(x.shape)[1:]
strides = [hop_length * new_stride] + list(strides)

return as_strided(x, shape=shape, strides=strides)

def cumulative_mean_normalized_difference(y_frames, frame_length, win_length, min_period, max_period):
# 自相关函数和功率谱互为傅里叶变换对.该种方式计算自相关函数可以节省计算量.
a = np.fft.rfft(y_frames, frame_length, axis=0)
# 疑问点1:y_frames[win_length:: -1, :],不知道为啥这样取.
b = np.fft.rfft(y_frames[win_length:: -1, :], frame_length, axis=0)
acf_frames = np.fft.irfft(a * b, frame_length, axis=0)[win_length:]
acf_frames[np.abs(acf_frames) < 1e-6] = 0

energy_frames = np.cumsum(y_frames ** 2, axis=0)
#疑问点2: 不知道为啥这样就能表示r_{t+\tau}
energy_frames = energy_frames[win_length:, :] - energy_frames[:-win_length, :]
energy_frames[np.abs(energy_frames) < 1e-6] = 0

y_yin_frames = energy_frames[0, :] + energy_frames -  2 * acf_frames

yin_numerator = y_yin_frames[min_period: max_period + 1, :]
tau_range = np.arange(1, max_period + 1)[:, None]
cumulative_mean = np.cumsum(y_yin_frames[1 : max_period + 1, :], axis=0) / tau_range
yin_denominator = cumulative_mean[min_period - 1: max_period, :]

yin_frames  = yin_numerator / (yin_denominator + tiny(yin_denominator))
return yin_frames

def parabolic_interpolation(y_frames):
parabolic_shifts = np.zeros_like(y_frames)
parabola_a = (y_frames[:-2, :] + y_frames[2:, :] - 2 * y_frames[1:-1, :]) / 2
parabola_b = (y_frames[2:, :] - y_frames[:-2, :]) / 2
parabolic_shifts[1:-1, :] = -parabola_b / (2 * parabola_a + tiny(parabola_a))
parabolic_shifts[np.abs(parabolic_shifts) > 1] = 0
return parabolic_shifts

def localmin(x,axis = 0):
paddings = [(0, 0)] * x.ndim
inds1 = [slice(None)] * x.ndim
inds1[axis] = slice(0, -2)

inds2 = [slice(None)] * x.ndim

def tiny(x):
x = np.asarray(x)
if np.issubdtype(x.dtype, np.floating) or np.issubdtype(
x.dtype, np.complexfloating
):
dtype = x.dtype
else:
dtype = np.float32
return np.finfo(dtype).tiny

frame_length = 2048
win_length = frame_length // 2
hop_length = frame_length // 4
y_frame = speech_frame(y, frame_length=frame_length, hop_length=hop_length)
fmax = 2000
fmin = 100
trough_threshold = 0.2
min_period = max(int(np.floor(sr / fmax)), 1)
max_period = min(int(np.ceil(sr / fmin)), frame_length - win_length - 1)
yin_frames = cumulative_mean_normalized_difference(y_frame, frame_length,
win_length, min_period, max_period)
parabolic_shifts = parabolic_interpolation(yin_frames)
is_tough = localmin(yin_frames, axis=0)
is_tough[0, :] = yin_frames[0, :] < yin_frames[1, :]
is_threshold_trough = np.logical_and(is_tough, yin_frames < trough_threshold)

global_min = np.argmin(yin_frames, axis=0)
yin_period = np.argmax(is_threshold_trough, axis=0)
no_trough_below_threshold = np.all(~is_threshold_trough, axis=0)
yin_period[no_trough_below_threshold] = global_min[no_trough_below_threshold]
yin_period = (
min_period
+ yin_period
+ parabolic_shifts[yin_period, range(yin_frames.shape[1])]
)
f0 = sr / yin_period
print(f0)

(=￣ω￣=)··· 暂无内容！

2

0

0

0