Skip to content

Commit

Permalink
spectral features added
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Dec 12, 2023
1 parent 24b15ba commit 137c58c
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 34 deletions.
80 changes: 80 additions & 0 deletions src/featuresExtractor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
include("signalDataStructure.jl")
include("fft.jl")
include("mel.jl")
include("spectral.jl")

function audioFeaturesExtraction(
x::AbstractArray{T},
sr::Int64;
#### define audio objects ####
# user defined options
frequency_range::Vector{Int64}=[0, Int(round(sr / 2))],
numCoeffs::Int=13,
melStyle::Symbol=:htk,
numBands::Int=32,
spectrum_type::Symbol=:power,
filterbank_design_domain::Symbol=:linear,
filterBankNormalization::Symbol=:bandwidth,
window_type::Symbol=:hann,
window_length::Int=Int(round(0.03 * sr)),
overlap_length::Int=Int(round(0.02 * sr)),
rectification::Symbol=:log,
logEnergyPos::Symbol=:append,
deltaWindowLength::Int=9
) where {T<:AbstractFloat}
# options and data structures definition
options = signalSetup(
sr=sr,
frequency_range=Float64.(frequency_range),
numCoeffs=numCoeffs,
melStyle=melStyle,
numBands=numBands,
spectrum_type=spectrum_type,
filterbank_design_domain=filterbank_design_domain,
filterBankNormalization=filterBankNormalization,
window_type=window_type,
window_length=window_length,
overlap_length=overlap_length,
rectification=rectification,
logEnergyPos=logEnergyPos,
deltaWindowLength=deltaWindowLength
)

# normalize signal
x = Float64.(x)
x = x ./ maximum(abs.(x))

data = signalData(
x=x
)

takeFFT(data, options)
melSpectrogram(data, options)
mfcc(data, options)
spectral_features(data, options)

hcat(
data.mel_spectrogram',
data.coeffs,
data.delta,
data.deltaDelta,
data.spectral_centroid,
data.spectral_crest,
data.spectral_flatness,
data.spectral_flux,
data.spectral_decrease,
data.spectral_kurtosis,
data.spectral_rolloff,
data.spectral_skewness,
data.spectral_slope,
data.spectral_spread
)
end

# debug
using PyCall
librosa = pyimport("librosa")
sr_src = 8000
x, sr = librosa.load("/home/riccardopasini/Documents/Aclai/Julia_additional_files/test.wav", sr=sr_src, mono=true)

audioFeaturesExtraction(x, sr)
57 changes: 43 additions & 14 deletions src/fft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function takeFFT(
options::signalSetup
)
options.FFT_length = options.window_length
data.window, unused = gencoswin(options.window_type, options.window_length, :periodic)
data.window, unused = gencoswin(options.window_type, options.window_length, :symmetric)

hopLength = options.window_length - options.overlap_length

Expand All @@ -58,10 +58,10 @@ function takeFFT(
# data.window = data.window * options.scale_factor
# end

if (options.windowNormalization)
options.spectrum_type == :power ? options.scale_factor = sqrt(0.5 * sum(data.window)^2) : options.scale_factor = 0.5 * sum(data.window)
data.window = data.window / options.scale_factor
end
# if (options.windowNormalization)
# options.spectrum_type == :power ? options.scale_factor = sqrt(0.5 * sum(data.window)^2) : options.scale_factor = 0.5 * sum(data.window)
# data.window = data.window / options.scale_factor
# end

y = buffer(data.x, options.window_length, hopLength)

Expand All @@ -70,16 +70,45 @@ function takeFFT(
E[E.==0] .= floatmin(Float64) # il minimo float al posto di zero
data.logEnergy = log.(E)
# apply window
data.fft = fft(y .* data.window, (1,))
data.fft = (fft(y .* data.window, (1,)))

# Convert to one-sided FFT
if (options.oneSided)
binHigh = Int(floor(options.FFT_length / 2 + 1))
X = data.fft[1:binHigh, :]
if (options.spectrum_type == :power)
data.fft = real((X .* conj(X)))
else
data.fft = abs.(X)
end
# if (options.oneSided)
# binHigh = Int(floor(options.FFT_length / 2 + 1))
# X = data.fft[1:binHigh, :]
# if (options.spectrum_type == :power)
# data.fft = (X .* conj(X))
# else
# data.fft = abs.(X)
# end
# end

# trim to desired range
binLow = Int(ceil(options.frequency_range[1] * options.FFT_length / options.sr + 1))
binHigh = Int(floor(options.frequency_range[2] * options.FFT_length / options.sr + 1))
bins = binLow:binHigh
data.fft = data.fft[bins, :]
# convert to half-sided magnitude or power spectrum
if (options.spectrum_type == :power)
data.fft = data.fft .* conj(data.fft) ./ (0.5 * sum(data.window)^2)
else # Magnitude
data.fft = abs.(data.fft) ./ (0.5 * sum(data.window))
end
# if the first bin is DC, halve it.
if (binLow == 1)
data.fft[1, :] = 0.5 * data.fft[1, :]
end

# if the final bin is Nyquist, and FFTLength is even, halve it.
if (binHigh == floor(options.FFT_length / 2 + 1) && rem(options.FFT_length, 2) == 0)
data.fft[end, :] = 0.5 * data.fft[end, :]
end

# create frequency vector
w = ((options.sr / options.FFT_length) .* (collect(bins) .- 1))
# shift final bin if fftLength is odd and the final range is full to fs/2.
if (rem(options.FFT_length, 2) == 1 && binHigh == floor(options.FFT_length / 2 + 1))
w[end] = options.sr * (options.FFT_length - 1) / (2 * options.FFT_length)
end
data.frequency_vector = w[:]
end # takeFFT(data, options)
21 changes: 4 additions & 17 deletions src/mel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ end # mel2hz

function designMelFilterBank(
sr::Int64;
oneSided::Bool=true,
frequencyScale::Symbol=:mel,
FFT_length::Int64=256,
numBands::Int64=32,
Expand All @@ -70,7 +69,6 @@ function designMelFilterBank(
# options and data structures definition
options = signalSetup(
sr=sr,
oneSided=oneSided,
frequencyScale=frequencyScale,
FFT_length=FFT_length,
numBands=numBands,
Expand Down Expand Up @@ -172,10 +170,9 @@ function designMelFilterBank(data::signalData, options::signalSetup)
end
end

if (options.oneSided)
select = getOnesidedFFTRange(options.FFT_length)
data.filterBank = data.filterBank[:, select]
end
# take one side
select = getOnesidedFFTRange(options.FFT_length)
data.filterBank = data.filterBank[:, select]
end # function designMelFilterBank

function createDCTmatrix(
Expand Down Expand Up @@ -247,7 +244,6 @@ function melSpectrogram(
x::AbstractArray{T},
sr::Int64;
frequency_range::Vector{Int64}=[0, Int(round(sr / 2))],
oneSided::Bool=true,
melStyle::Symbol=:htk,
numBands::Int64=32,
spectrum_type::Symbol=:power,
Expand All @@ -256,7 +252,6 @@ function melSpectrogram(
window_type::Symbol=:hann,
window_length::Int64=Int(round(0.03 * sr)),
overlap_length::Int64=Int(round(0.02 * sr)),
windowNormalization::Bool=true,
) where {T<:AbstractFloat}

# options and data structures definition
Expand All @@ -268,11 +263,9 @@ function melSpectrogram(
spectrum_type=spectrum_type,
filterbank_design_domain=filterbank_design_domain,
filterBankNormalization=filterBankNormalization,
oneSided=oneSided,
window_type=window_type,
window_length=window_length,
overlap_length=overlap_length,
windowNormalization=windowNormalization,
)

data = signalData(
Expand Down Expand Up @@ -307,14 +300,12 @@ function mfcc(
sr::Int64;
numCoeffs::Int64=13,
frequency_range::Vector{Int64}=[0, Int(round(sr / 2))],
oneSided::Bool=false,
melStyle::Symbol=:slaney133, # :htk, :slaney, :slaney133
numBands::Int64=32,
spectrum_type::Symbol=:power, # :power, :magnitude
filterbank_design_domain::Symbol=:linear,
filterBankNormalization::Symbol=:bandwidth, # :bandwidth, :area, :none
window_type::Symbol=:hann,
windowNormalization::Bool=false,
window_length::Int64=Int(round(0.03 * sr)),
overlap_length::Int64=Int(round(0.02 * sr)),
rectification::Symbol=:log, # :log, :cubic
Expand All @@ -332,11 +323,9 @@ function mfcc(
spectrum_type=spectrum_type,
filterbank_design_domain=filterbank_design_domain,
filterBankNormalization=filterBankNormalization,
oneSided=oneSided,
window_type=window_type,
window_length=window_length,
overlap_length=overlap_length,
windowNormalization=windowNormalization,
rectification=rectification,
logEnergyPos=logEnergyPos,
deltaWindowLength=deltaWindowLength
Expand All @@ -360,7 +349,7 @@ function mfcc(

# place log energy
if (options.logEnergyPos == :append)
data.coeffs = hcat(data.logEnergy, data.coeffs)
data.coeffs = hcat(data.coeffs, data.logEnergy)
elseif (options.logEnergyPos == :replace)
# data.coeffs = [logE.',data.coeffs(:,2:end)];
# da fare
Expand Down Expand Up @@ -396,10 +385,8 @@ end
# coeffs, delta, deltaDelta, loc = mfcc(
# x,
# sr,
# oneSided=true,
# melStyle=:htk,
# window_length=320,
# windowNormalization=true,
# overlap_length=240,
# logEnergyPos=:none
# )
16 changes: 13 additions & 3 deletions src/signalDataStructure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ using Parameters
window_type::Symbol=:hann
window_length::Int64=0
overlap_length::Int64=0
windowNormalization::Bool=true # possibile cambiarlo in filterBankNormalization?
normalizationFactor::Float64=1.0
# mel
frequency_range::Vector{Float64}=[]
Expand All @@ -19,9 +18,8 @@ using Parameters
frequencyScale::Symbol=:mel
filterbank_design_domain::Symbol=:linear
filterBankNormalization::Symbol=:bandwidth # :bandwidth, :area, :none
oneSided::Bool=false
rectification::Symbol=:log
logEnergyPos::Symbol=:append
logEnergyPos::Symbol=:append #:apend, :replace, :none
deltaWindowLength::Int64=9
bandEdges::AbstractVector{AbstractFloat}=[]
end
Expand All @@ -30,6 +28,7 @@ end
x::AbstractArray{Float64}=[]
# fft
fft::AbstractArray{Complex{Float64}}=[]
frequency_vector::Vector{Float64}=[]
window::Vector{Float64}=[]
# mel
filterBank::AbstractArray{Float64}=[]
Expand All @@ -39,4 +38,15 @@ end
delta::AbstractArray{Float64}=[]
deltaDelta::AbstractArray{Float64}=[]
logEnergy::Vector{Float64}=[]
# spectral
spectral_centroid::Vector{Float64}=[]
spectral_crest::Vector{Float64}=[]
spectral_flatness::Vector{Float64}=[]
spectral_flux::Vector{Float64}=[]
spectral_decrease::Vector{Float64}=[]
spectral_kurtosis::Vector{Float64}=[]
spectral_rolloff::Vector{Float64}=[]
spectral_skewness::Vector{Float64}=[]
spectral_slope::Vector{Float64}=[]
spectral_spread::Vector{Float64}=[]
end
Loading

0 comments on commit 137c58c

Please sign in to comment.