datasets[audio] >= 1.12.0
torch >= 1.5
torchaudio
accelerate >= 0.12.0
librosa