Skip to content

Commit

Permalink
adapted to audio feature extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Dec 15, 2023
1 parent 137c58c commit dc1c88c
Show file tree
Hide file tree
Showing 5 changed files with 513 additions and 359 deletions.
168 changes: 118 additions & 50 deletions src/featuresExtractor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,75 +6,143 @@ include("spectral.jl")
function audioFeaturesExtraction(
x::AbstractArray{T},
sr::Int64;
#### define audio objects ####
# user defined options
frequency_range::Vector{Int64}=[0, Int(round(sr / 2))],
numCoeffs::Int=13,
melStyle::Symbol=:htk,
numBands::Int=32,
spectrum_type::Symbol=:power,
filterbank_design_domain::Symbol=:linear,
filterBankNormalization::Symbol=:bandwidth,
# default options
# fft
window_type::Symbol=:hann,
window_length::Int=Int(round(0.03 * sr)),
overlap_length::Int=Int(round(0.02 * sr)),

# mel
num_bands::Int=32,
mel_style::Symbol=:slaney, # :htk, :slaney
frequency_range::Vector{Int64}=[0, Int(round(sr / 2))],
filterbank_normalization::Symbol=:bandwidth,
spectrum_type::Symbol=:power,

# mfcc
num_coeffs::Int=13,
rectification::Symbol=:log,
logEnergyPos::Symbol=:append,
deltaWindowLength::Int=9
log_energy_pos::Symbol=:append,
delta_window_length::Int=9,

# filterbank_design_domain::Symbol=:linear, # settato, ma si usa?
# windowNormalization::Bool=true, # settato, ma si usa?
# oneSided::Bool=true # default, non viene parametrizzato
) where {T<:AbstractFloat}
# options and data structures definition
options = signalSetup(
# setup and data structures definition
setup = signal_setup(
sr=sr,
frequency_range=Float64.(frequency_range),
numCoeffs=numCoeffs,
melStyle=melStyle,
numBands=numBands,
spectrum_type=spectrum_type,
filterbank_design_domain=filterbank_design_domain,
filterBankNormalization=filterBankNormalization,

# fft
window_type=window_type,
window_length=window_length,
overlap_length=overlap_length,

# mel
num_bands=num_bands,
mel_style=mel_style,
frequency_range=Float64.(frequency_range),
filterbank_normalization=filterbank_normalization,
spectrum_type=spectrum_type,

# mfcc
num_coeffs=num_coeffs,
rectification=rectification,
logEnergyPos=logEnergyPos,
deltaWindowLength=deltaWindowLength
log_energy_pos=log_energy_pos,
delta_window_length=delta_window_length,

# filterbank_design_domain=filterbank_design_domain, # settato, ma si usa?
# windowNormalization=windowNormalization, # settato, ma si usa?
# oneSided=oneSided # default, non viene parametrizzato
)

# normalize signal
# normalize signal ???
x = Float64.(x)
x = x ./ maximum(abs.(x))

data = signalData(
data = signal_data(
x=x
)

takeFFT(data, options)
melSpectrogram(data, options)
mfcc(data, options)
spectral_features(data, options)

hcat(
data.mel_spectrogram',
data.coeffs,
data.delta,
data.deltaDelta,
data.spectral_centroid,
data.spectral_crest,
data.spectral_flatness,
data.spectral_flux,
data.spectral_decrease,
data.spectral_kurtosis,
data.spectral_rolloff,
data.spectral_skewness,
data.spectral_slope,
data.spectral_spread
takeFFT(data, setup)
melSpectrogram(data, setup)
mfcc(data, setup)
spectral_features(data, setup)

vcat(
data.mel_spectrogram,
data.mfcc_coeffs',
data.mfcc_delta',
data.mfcc_deltadelta',
data.spectral_centroid',
data.spectral_crest',
data.spectral_entropy',
data.spectral_flatness',
data.spectral_flux',
data.spectral_decrease',
data.spectral_kurtosis',
data.spectral_rolloff',
data.spectral_skewness',
data.spectral_slope',
data.spectral_spread'
)
end

# debug
using PyCall
librosa = pyimport("librosa")
sr_src = 8000
x, sr = librosa.load("/home/riccardopasini/Documents/Aclai/Julia_additional_files/test.wav", sr=sr_src, mono=true)
# # debug
# using PyCall
# librosa = pyimport("librosa")
# sr_src = 8000
# x, sr = librosa.load("/home/riccardopasini/Documents/Aclai/Julia_additional_files/test.wav", sr=sr_src, mono=true)

# # fft
# window_type = :hann
# window_length = Int(round(0.03 * sr))
# overlap_length = Int(round(0.02 * sr))

# # mel
# num_bands = 32
# mel_style = :slaney
# frequency_range = [0, Int(round(sr / 2))]
# filterbank_normalization = :bandwidth
# spectrum_type = :power

# #mfcc
# num_coeffs = 13
# rectification = :log
# log_energy_pos = :append
# delta_window_length = 9

# # filterbank_design_domain = :linear
# # windowNormalization = true
# # oneSided = true

# # options and data structures definition
# setup = signal_setup(
# sr=sr,

# # fft
# window_type=window_type,
# window_length=window_length,
# overlap_length=overlap_length,

# # mel
# num_bands=num_bands,
# mel_style=mel_style,
# frequency_range=Float64.(frequency_range),
# filterbank_normalization=filterbank_normalization,
# spectrum_type=spectrum_type,

# # mfcc
# num_coeffs=num_coeffs,
# rectification=rectification,
# log_energy_pos=log_energy_pos,
# delta_window_length=delta_window_length,

# # filterbank_design_domain=filterbank_design_domain,
# # windowNormalization=windowNormalization,
# # oneSided=oneSided
# )

audioFeaturesExtraction(x, sr)
# data = signal_data(
# x=Float64.(x)
# )
136 changes: 75 additions & 61 deletions src/fft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,103 +12,117 @@ function getFFTLength( # switch case da ricordare!!!
return 1024
end # getFFTLength

function getOnesidedFFTRange(FFT_length::Int64)

if mod(FFT_length, 2) == 0
return collect(1:Int(FFT_length / 2 + 1)) # EVEN
function get_onesided_fft_range(fft_length::Int64)
if mod(fft_length, 2) == 0
return collect(1:Int(fft_length / 2 + 1)) # EVEN
else
return collect(1:Int((FFT_length + 1) / 2)) # ODD
return collect(1:Int((fft_length + 1) / 2)) # ODD
end
end # getOnesidedFFTRange
end # get_onesided_fft_range

function takeFFT(
x::AbstractArray{Float64},
sr::Int64;
FFT_length::Int64=256,
windowLength::Int64,
overlap_length::Int64
fft_length::Int64=256,
window_type::Symbol=:hann,
window_length::Int64=Int(round(0.03 * sr)),
overlap_length::Int64=Int(round(0.02 * sr)),
# windowNormalization::Bool=true,
# oneSided::Bool=true
)
# options and data structures definition
options = signalSetup(
# setup and data structures definition
setup = signal_Setup(
sr=sr,
oneSided=false,
FFT_length=FFT_length,
windowLength=windowLength,
overlap_length=overlap_length
fft_length=fft_length,
window_type=window_type,
window_length=window_length,
overlap_length=overlap_length,
# windowNormalization=windowNormalization,
# oneSided=oneSided
)

data = signalData(
data = signal_data(
x=Float64.(x)
)

takeFFT(data, options)
takeFFT(data, setup)
end # takeFFT(kwarg...)

function takeFFT(
data::signalData,
options::signalSetup
data::signal_data,
setup::signal_setup
)
options.FFT_length = options.window_length
data.window, unused = gencoswin(options.window_type, options.window_length, :symmetric)
setup.fft_length = setup.window_length # definisce la fft pari alla finestra
hop_length = setup.window_length - setup.overlap_length
data.fft_window, unused = gencoswin(setup.window_type, setup.window_length, :symmetric)

hopLength = options.window_length - options.overlap_length
# da audio feature extracion forse da inserire
ossb = get_onesided_fft_range(setup.fft_length)
logical_ossb = falses(setup.fft_length)
logical_ossb[ossb] .= true

# if (options.windowNormalization)
# options.spectrum_type == :power ? options.scale_factor = 1 / (sum(data.window)^2) : options.scale_factor = 0.5 * sum(data.window)
# data.window = data.window * options.scale_factor
# end
y = buffer(data.x, setup.window_length, hop_length)
yw = y .* data.fft_window

# if (options.windowNormalization)
# options.spectrum_type == :power ? options.scale_factor = sqrt(0.5 * sum(data.window)^2) : options.scale_factor = 0.5 * sum(data.window)
# data.window = data.window / options.scale_factor
# end
# apply window
# data.fft = fft(y .* data.window, (1,))
Z = fft(y .* data.fft_window, (1,))
Z = Z[logical_ossb, :] # one sided

y = buffer(data.x, options.window_length, hopLength)
setup.spectrum_type == :power ? data.fft = real(Z.*conj(Z)) : data.fft = abs.(Z)

# log energy
E = sum(eachrow(y .^ 2)) # somma per righe
E[E.==0] .= floatmin(Float64) # il minimo float al posto di zero
data.logEnergy = log.(E)
# apply window
data.fft = (fft(y .* data.window, (1,)))
data.log_energy = log.(E)

# if (setup.windowNormalization)
# setup.spectrum_type == :power ? setup.scale_factor = 1 / (sum(data.fft_window)^2) : setup.scale_factor = 0.5 * sum(data.fft_window)
# data.fft_window = data.fft_window * setup.scale_factor
# end

# Convert to one-sided FFT
# if (options.oneSided)
# binHigh = Int(floor(options.FFT_length / 2 + 1))
# if (setup.windowNormalization)
# setup.spectrum_type == :power ? setup.scale_factor = sqrt(0.5 * sum(data.fft_window)^2) : setup.scale_factor = 0.5 * sum(data.fft_window)
# data.fft_window = data.fft_window / setup.scale_factor
# end

# # Convert to one-sided FFT
# if (setup.oneSided)
# binHigh = Int(floor(setup.fft_length / 2 + 1))
# X = data.fft[1:binHigh, :]
# if (options.spectrum_type == :power)
# if (setup.spectrum_type == :power)
# data.fft = (X .* conj(X))
# else
# data.fft = abs.(X)
# end
# end

# trim to desired range
binLow = Int(ceil(options.frequency_range[1] * options.FFT_length / options.sr + 1))
binHigh = Int(floor(options.frequency_range[2] * options.FFT_length / options.sr + 1))
# # trim to desired range
binLow = Int(ceil(setup.frequency_range[1] * setup.fft_length / setup.sr + 1))
binHigh = Int(floor(setup.frequency_range[2] * setup.fft_length / setup.sr + 1))
bins = binLow:binHigh
data.fft = data.fft[bins, :]
# convert to half-sided magnitude or power spectrum
if (options.spectrum_type == :power)
data.fft = data.fft .* conj(data.fft) ./ (0.5 * sum(data.window)^2)
else # Magnitude
data.fft = abs.(data.fft) ./ (0.5 * sum(data.window))
end
# if the first bin is DC, halve it.
if (binLow == 1)
data.fft[1, :] = 0.5 * data.fft[1, :]
end
# data.fft = data.fft[bins, :]
# # convert to half-sided magnitude or power spectrum
# if (setup.spectrum_type == :power)
# data.fft = data.fft .* conj(data.fft) ./ (0.5 * sum(data.window)^2)
# else # Magnitude
# data.fft = abs.(data.fft) ./ (0.5 * sum(data.window))
# end
# # if the first bin is DC, halve it.
# if (binLow == 1)
# data.fft[1, :] = 0.5 * data.fft[1, :]
# end

# if the final bin is Nyquist, and FFTLength is even, halve it.
if (binHigh == floor(options.FFT_length / 2 + 1) && rem(options.FFT_length, 2) == 0)
data.fft[end, :] = 0.5 * data.fft[end, :]
end
# # if the final bin is Nyquist, and FFTLength is even, halve it.
# if (binHigh == floor(setup.fft_length / 2 + 1) && rem(setup.fft_length, 2) == 0)
# data.fft[end, :] = 0.5 * data.fft[end, :]
# end

# create frequency vector
w = ((options.sr / options.FFT_length) .* (collect(bins) .- 1))
w = ((setup.sr / setup.fft_length) .* (collect(bins) .- 1))
# shift final bin if fftLength is odd and the final range is full to fs/2.
if (rem(options.FFT_length, 2) == 1 && binHigh == floor(options.FFT_length / 2 + 1))
w[end] = options.sr * (options.FFT_length - 1) / (2 * options.FFT_length)
if (rem(setup.fft_length, 2) == 1 && binHigh == floor(setup.fft_length / 2 + 1))
w[end] = setup.sr * (setup.fft_length - 1) / (2 * setup.fft_length)
end
data.frequency_vector = w[:]
end # takeFFT(data, options)
data.fft_frequencies = w[:]
end # takeFFT(data, setup)
Loading

0 comments on commit dc1c88c

Please sign in to comment.