-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ad2954c
commit 82c7fd0
Showing
5 changed files
with
358 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
## main | ||
using PyCall | ||
librosa = pyimport("librosa") | ||
soundfile = pyimport("soundfile") | ||
|
||
sr_src = 48000 | ||
x, sr = librosa.load("/media/paso/Paso Stick/Trombone Live_01.wav", sr=sr_src, mono=true) | ||
|
||
absX = abs.(x) | ||
|
||
soundfile.write("test1.wav",absX,samplerate=sr,subtype="PCM_24") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
using FFTW | ||
using DSP | ||
|
||
include("mfccDataStructure.jl") | ||
include("windowing.jl") | ||
include("mfcc.jl") | ||
|
||
function getDefaultBandEdges() | ||
# default band edges | ||
factor = 133.33333333333333 | ||
bE = zeros(1, 42) | ||
for i in 1:13 | ||
bE[i] = factor + (factor / 2) * (i - 1) | ||
end | ||
for i in 14:42 | ||
bE[i] = bE[i-1] * 1.0711703 | ||
end | ||
|
||
return vec(bE) | ||
end # getDefaultBandEdges | ||
|
||
function getFFTLength( # switch case da ricordare!!! | ||
sr::Int64, | ||
FFTLength::Int64 | ||
) | ||
if (FFTLength == 0) | ||
sr <= 4000 && return 128 | ||
sr <= 8000 && return 256 | ||
sr <= 16000 && return 512 | ||
return 1024 | ||
else | ||
return FFTLength | ||
end | ||
end # getFFTLength | ||
|
||
function hz2mel( | ||
fRange::Vector{Float64}, | ||
melStyle::Symbol=:oshaughnessy # :oshaughnessy, :slaney | ||
) | ||
if (melStyle == :oshaughnessy) | ||
mel = 2595 * log10.(1 .+ fRange / 700) | ||
else | ||
# f0 = 0.0 slaney, DA FARE! | ||
# fsp = 200 / 3 | ||
# brkfrq = 1000.0 | ||
# brkpt = (brkfrq - f0) / fsp | ||
# logstep = log(6.4) / 27 | ||
# linpt = f < brkfrq | ||
# z = linpt ? f / brkfrq / logstep : brkpt + log(f / brkfrq) / logstep | ||
end | ||
return mel | ||
end # hz2mel | ||
|
||
function mel2hz( | ||
mel::LinRange{Float64,Int64}, | ||
melStyle::Symbol=:oshaughnessy # :oshaughnessy, :slaney | ||
) | ||
if (melStyle == :oshaughnessy) | ||
hz = 700 * (exp10.(mel / 2595) .- 1) | ||
else | ||
# f0 = 0.0 slaney, DA FARE! | ||
# fsp = 200 / 3 | ||
# brkfrq = 1000.0 | ||
# brkpt = (brkfrq - f0) / fsp | ||
# logstep = log(6.4) / 27 | ||
# linpt = z < brkpt | ||
# f = linpt ? f0 + fsp * z : brkfrq * exp(logstep * (z - brkpt)) | ||
end | ||
return hz | ||
end # mel2hz | ||
|
||
function designAuditoryFilterBank( | ||
sr::Int64, | ||
options::mfccSetup | ||
) | ||
# Set the design domain | ||
if (options.filterBankDesignDomain == :linear) | ||
designDomain = :linear | ||
else | ||
designDomain = options.frequencyScale | ||
end | ||
|
||
# Set the default number of bands | ||
if (options.frequencyScale == :erb) | ||
# da impostare in caso di :erb (hz2erb) | ||
else | ||
numBands = options.numBands # da eliminare e usare solo options? | ||
end | ||
|
||
fRange = Float64.(options.frequencyRange) | ||
sampleRate = Float64(sr) | ||
|
||
if (options.frequencyScale == :erb) | ||
# erb TODO | ||
else | ||
if (options.frequencyScale == :mel) | ||
lRange = hz2mel(fRange, options.melStyle) | ||
bandEdges = mel2hz(LinRange(lRange[1], lRange[end], numBands + 2), options.melStyle) | ||
else | ||
# bark TODO | ||
end | ||
### punto di contatto con mfcc.jl ### | ||
fbank, fC = designMelFilterBank( | ||
sr, | ||
bandEdges, | ||
options.FFTLength, | ||
1, # sumExponent | ||
:none, # normalization | ||
:hz, # designDomain | ||
Float64, | ||
true, # keepTwoSided | ||
designDomain, # filterBankDesignDomain | ||
options.melStyle | ||
) | ||
bank = fbank' | ||
BW = bandEdges[3:end] - bandEdges[1:end-2] | ||
end | ||
|
||
# normalization (differente da quella di designMelFilterBank, da valutarne il perchè) | ||
if (options.filterBankNormalization == :area) | ||
# da fare | ||
elseif (options.filterBankNormalization == :bandwidth) | ||
# Weight by bandwidth | ||
weightPerBand = BW / 2 | ||
else | ||
# weightPerBand = ones(1,numBands) | ||
end | ||
for i = 1:Int(numBands) | ||
if (weightPerBand[i] != 0) | ||
bank[i, :] = bank[i, :] / weightPerBand[i] | ||
end | ||
end | ||
|
||
if (options.oneSided) | ||
select = getOnesidedFFTRange(options.FFTLength) | ||
filterBank = bank[:,select] | ||
if (options.frequencyScale==:erb) | ||
# # Scale down DC and Nyquist | ||
# # If the first bin is DC, halve it. | ||
# filterBank(:,1) = 0.5*filterBank(:,1); | ||
# # If the final bin is Nyquist, halve it. | ||
# if rem(options.FFTLength,2) == 0 | ||
# filterBank(:,end) = 0.5*filterBank(:,end); | ||
# end | ||
# % Scale up one-sided filter bank | ||
# filterBank = 2*filterBank; | ||
end | ||
else | ||
# scaleFactor = double(~strcmp(options.FrequencyScale,'erb')) + 1; | ||
# filterBank = bank/scaleFactor; | ||
end | ||
|
||
return filterBank, fC, BW | ||
end # designAuditoryFilterBank | ||
|
||
function melSpectrogram( | ||
x::AbstractArray{T}, | ||
sr::Int64; FFTLength::Int64=0, # deve essere 0, così viene automaticamente calcolata | ||
numCoeffs::Int64=13, frequencyRange::Vector{Int64}=[0, Int(round(sr / 2))], | ||
numBands::Int64=32, | ||
filterBankNormalization::Symbol=:bandwidth, | ||
oneSided::Bool=true, | ||
filterBankDesignDomain::Symbol=:linear, | ||
frequencyScale=:mel, | ||
spectrumType::Symbol=:power, | ||
melStyle::Symbol=:oshaughnessy, deltaWindowLength::Int64=9, | ||
rectification::Symbol=:log, | ||
logEnergy::Symbol=:append | ||
) where {T<:AbstractFloat} | ||
|
||
x = convert.(Float64, x) # mantiene compatibilità con Matlab che scricchiola con Float32, da verificare se è il caso. | ||
|
||
options = mfccSetup( | ||
numCoeffs=numCoeffs, | ||
bandEdges=getDefaultBandEdges(), frequencyRange=frequencyRange, | ||
numBands=numBands, | ||
filterBankNormalization=filterBankNormalization, | ||
frequencyScale=frequencyScale, | ||
oneSided=oneSided, | ||
filterBankDesignDomain=filterBankDesignDomain, | ||
spectrumType=spectrumType, | ||
melStyle=melStyle, deltaWindowLength=deltaWindowLength, | ||
rectification=rectification, | ||
logEnergy=logEnergy | ||
) | ||
|
||
# setup ufficiale | ||
# options.FFTLength = getFFTLength(sr, FFTLength) | ||
# options.window, options.overlapLength = gencoswin(:hann, options.FFTLength, :symmetric) | ||
# options.windowLength = size(options.window, 1) | ||
|
||
# setup di debug | ||
options.overlapLength = Int(round(0.02 * sr)) | ||
options.window, oll = gencoswin(:hamming, Int(round(0.03 * sr)), :periodic) | ||
options.windowLength = length(options.window) | ||
options.FFTLength = size(options.window, 1) # da verificare se è meglio della mia idea | ||
|
||
filterBank, F, BW = designAuditoryFilterBank(sr, options) | ||
|
||
if (options.windowNormalization) | ||
if(options.spectrumType==:power) | ||
scaleFactor = sqrt(0.5*sum(options.window)^2) | ||
else | ||
scaleFactor = 0.5*sum(options.window) | ||
end | ||
win = options.window / scaleFactor | ||
end | ||
|
||
N = size(win, 1) | ||
hopLength = N - options.overlapLength | ||
# Buffer into frames. | ||
y = buffer(x,N,hopLength) | ||
# apply window | ||
Y = fft(y .* win, (1,)) | ||
# Convert to one-sided FFT | ||
binHigh = Int(floor(options.FFTLength/2 + 1)) | ||
X = Y[1:binHigh,:] | ||
if (options.spectrumType==:power) | ||
Z = real((X .* conj(X))) | ||
else | ||
# Z = abs.(X) | ||
end | ||
# apply filterbank | ||
P = filterBank * Z | ||
# S = reshape(P, size(P,1), Int(round(size(P,2) / size(x,2))), size(x,2)) | ||
|
||
nRow = size(x,1) | ||
numHops = Int(floor((nRow-N)/hopLength) + 1) | ||
t = (collect(0:(numHops-1)) * hopLength .+ N/2) / sr | ||
|
||
return P, F, t | ||
|
||
end # melSpectrogram | ||
|
||
# Main debug | ||
# if (!@isdefined(x)) # carica solo se non è definita mono, serve giusto per debug | ||
# # cosi ogni volta che lancio non ricarica tutto | ||
# using PyCall | ||
# librosa = pyimport("librosa") | ||
# sr_src = 8000 | ||
# x, sr = librosa.load("/home/riccardopasini/Documents/Aclai/Julia_additional_files/test.wav", sr=sr_src, mono=true) | ||
# end | ||
|
||
# S, cF, t = melSpectrogram(x, sr) |
Oops, something went wrong.