qm-dsp  1.8
ClusterMeltSegmenter.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  * ClusterMeltSegmenter.cpp
5  *
6  * Created by Mark Levy on 23/03/2006.
7  * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14  */
15 
16 #include <cfloat>
17 #include <cmath>
18 
19 #include "ClusterMeltSegmenter.h"
20 #include "cluster_segmenter.h"
21 #include "segment.h"
22 
23 #include "dsp/transforms/FFT.h"
26 #include "dsp/mfcc/MFCC.h"
27 
29  window(NULL),
30  fft(NULL),
31  constq(NULL),
32  mfcc(NULL),
33  featureType(params.featureType),
34  hopSize(params.hopSize),
35  windowSize(params.windowSize),
36  fmin(params.fmin),
37  fmax(params.fmax),
38  nbins(params.nbins),
39  ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
40  nHMMStates(params.nHMMStates),
41  nclusters(params.nclusters),
42  histogramLength(params.histogramLength),
43  neighbourhoodLimit(params.neighbourhoodLimit),
44  decimator(NULL)
45 {
46 }
47 
49 {
50  samplerate = fs;
51 
54 
55  // run internal processing at 11025 or thereabouts
56  int internalRate = 11025;
57  int decimationFactor = samplerate / internalRate;
58  if (decimationFactor < 1) decimationFactor = 1;
59 
60  // must be a power of two
61  while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
62 
63  if (decimationFactor > Decimator::getHighestSupportedFactor()) {
64  decimationFactor = Decimator::getHighestSupportedFactor();
65  }
66 
67  if (decimationFactor > 1) {
68  decimator = new Decimator(getWindowsize(), decimationFactor);
69  }
70 
71  CQConfig config;
72  config.FS = samplerate / decimationFactor;
73  config.min = fmin;
74  config.max = fmax;
75  config.BPO = nbins;
76  config.CQThresh = 0.0054;
77 
78  constq = new ConstantQ(config);
80 
81  ncoeff = constq->getK();
82 
83  fft = new FFTReal(constq->getfftlength());
84 
85  } else if (featureType == FEATURE_TYPE_MFCC) {
86 
87  // run internal processing at 22050 or thereabouts
88  int internalRate = 22050;
89  int decimationFactor = samplerate / internalRate;
90  if (decimationFactor < 1) decimationFactor = 1;
91 
92  // must be a power of two
93  while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
94 
95  if (decimationFactor > Decimator::getHighestSupportedFactor()) {
96  decimationFactor = Decimator::getHighestSupportedFactor();
97  }
98 
99  if (decimationFactor > 1) {
100  decimator = new Decimator(getWindowsize(), decimationFactor);
101  }
102 
103  MFCCConfig config(samplerate / decimationFactor);
104  config.fftsize = 2048;
105  config.nceps = 19;
106  config.want_c0 = true;
107 
108  mfcc = new MFCC(config);
109  ncoeff = config.nceps + 1;
110  }
111 }
112 
114 {
115  delete window;
116  delete constq;
117  delete decimator;
118  delete fft;
119 }
120 
121 int
123 {
124  return static_cast<int>(windowSize * samplerate + 0.001);
125 }
126 
127 int
129 {
130  return static_cast<int>(hopSize * samplerate + 0.001);
131 }
132 
133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
134 {
137  extractFeaturesConstQ(samples, nsamples);
138  } else if (featureType == FEATURE_TYPE_MFCC) {
139  extractFeaturesMFCC(samples, nsamples);
140  }
141 }
142 
143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
144 {
145  if (!constq) {
146  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
147  << "No const-q: initialise not called?"
148  << std::endl;
149  return;
150  }
151 
152  if (nsamples < getWindowsize()) {
153  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
154  return;
155  }
156 
157  int fftsize = constq->getfftlength();
158 
159  if (!window || window->getSize() != fftsize) {
160  delete window;
161  window = new Window<double>(HammingWindow, fftsize);
162  }
163 
164  vector<double> cq(ncoeff);
165 
166  for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
167 
168  const double *psource = samples;
169  int pcount = nsamples;
170 
171  if (decimator) {
172  pcount = nsamples / decimator->getFactor();
173  double *decout = new double[pcount];
174  decimator->process(samples, decout);
175  psource = decout;
176  }
177 
178  int origin = 0;
179 
180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
181 
182  int frames = 0;
183 
184  double *frame = new double[fftsize];
185  double *real = new double[fftsize];
186  double *imag = new double[fftsize];
187  double *cqre = new double[ncoeff];
188  double *cqim = new double[ncoeff];
189 
190  while (origin <= pcount) {
191 
192  // always need at least one fft window per block, but after
193  // that we want to avoid having any incomplete ones
194  if (origin > 0 && origin + fftsize >= pcount) break;
195 
196  for (int i = 0; i < fftsize; ++i) {
197  if (origin + i < pcount) {
198  frame[i] = psource[origin + i];
199  } else {
200  frame[i] = 0.0;
201  }
202  }
203 
204  for (int i = 0; i < fftsize/2; ++i) {
205  double value = frame[i];
206  frame[i] = frame[i + fftsize/2];
207  frame[i + fftsize/2] = value;
208  }
209 
210  window->cut(frame);
211 
212  fft->forward(frame, real, imag);
213 
214  constq->process(real, imag, cqre, cqim);
215 
216  for (int i = 0; i < ncoeff; ++i) {
217  cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
218  }
219  ++frames;
220 
221  origin += fftsize/2;
222  }
223 
224  delete [] cqre;
225  delete [] cqim;
226  delete [] real;
227  delete [] imag;
228  delete [] frame;
229 
230  for (int i = 0; i < ncoeff; ++i) {
231  cq[i] /= frames;
232  }
233 
234  if (decimator) delete[] psource;
235 
236  features.push_back(cq);
237 }
238 
239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
240 {
241  if (!mfcc) {
242  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
243  << "No mfcc: initialise not called?"
244  << std::endl;
245  return;
246  }
247 
248  if (nsamples < getWindowsize()) {
249  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
250  return;
251  }
252 
253  int fftsize = mfcc->getfftlength();
254 
255  vector<double> cc(ncoeff);
256 
257  for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
258 
259  const double *psource = samples;
260  int pcount = nsamples;
261 
262  if (decimator) {
263  pcount = nsamples / decimator->getFactor();
264  double *decout = new double[pcount];
265  decimator->process(samples, decout);
266  psource = decout;
267  }
268 
269  int origin = 0;
270  int frames = 0;
271 
272  double *frame = new double[fftsize];
273  double *ccout = new double[ncoeff];
274 
275  while (origin <= pcount) {
276 
277  // always need at least one fft window per block, but after
278  // that we want to avoid having any incomplete ones
279  if (origin > 0 && origin + fftsize >= pcount) break;
280 
281  for (int i = 0; i < fftsize; ++i) {
282  if (origin + i < pcount) {
283  frame[i] = psource[origin + i];
284  } else {
285  frame[i] = 0.0;
286  }
287  }
288 
289  mfcc->process(frame, ccout);
290 
291  for (int i = 0; i < ncoeff; ++i) {
292  cc[i] += ccout[i];
293  }
294  ++frames;
295 
296  origin += fftsize/2;
297  }
298 
299  delete [] ccout;
300  delete [] frame;
301 
302  for (int i = 0; i < ncoeff; ++i) {
303  cc[i] /= frames;
304  }
305 
306  if (decimator) delete[] psource;
307 
308  features.push_back(cc);
309 }
310 
312 {
313  nclusters = m;
314  segment();
315 }
316 
317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
318 {
319  features = f;
321 }
322 
324 {
325  delete constq;
326  constq = 0;
327  delete mfcc;
328  mfcc = 0;
329  delete decimator;
330  decimator = 0;
331 
332  if (features.size() < histogramLength) return;
333 /*
334  std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
335  << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
336 */
337  // copy the features to a native array and use the existing C segmenter...
338  double** arrFeatures = new double*[features.size()];
339  for (int i = 0; i < features.size(); i++)
340  {
342  arrFeatures[i] = new double[features[0].size()];
343  for (int j = 0; j < features[0].size(); j++)
344  arrFeatures[i][j] = features[i][j];
345  } else {
346  arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
347  for (int j = 0; j < ncoeff; j++)
348  arrFeatures[i][j] = features[i][j];
349  }
350  }
351 
352  q = new int[features.size()];
353 
356  cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
358  else
359  constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
361 
362  // convert the cluster assignment sequence to a segmentation
363  makeSegmentation(q, features.size());
364 
365  // de-allocate arrays
366  delete [] q;
367  for (int i = 0; i < features.size(); i++)
368  delete [] arrFeatures[i];
369  delete [] arrFeatures;
370 
371  // clear the features
372  clear();
373 }
374 
376 {
377  segmentation.segments.clear();
380 
382  segment.start = 0;
383  segment.type = q[0];
384 
385  for (int i = 1; i < len; i++)
386  {
387  if (q[i] != q[i-1])
388  {
389  segment.end = i * getHopsize();
390  segmentation.segments.push_back(segment);
391  segment.type = q[i];
392  segment.start = segment.end;
393  }
394  }
395  segment.end = len * getHopsize();
396  segmentation.segments.push_back(segment);
397 }
398 
int fftsize
Definition: MFCC.h:25
int start
Definition: Segmenter.h:28
void process(const double *src, double *dst)
Process inLength samples (as supplied to constructor) from src and write inLength / decFactor samples...
Definition: Decimator.cpp:200
int getfftlength() const
Definition: MFCC.h:56
void extractFeaturesMFCC(const double *, int)
int getFactor() const
Definition: Decimator.h:56
void extractFeaturesConstQ(const double *, int)
int process(const double *inframe, double *outceps)
Process time-domain input data.
Definition: MFCC.cpp:205
static int getHighestSupportedFactor()
Definition: Decimator.h:57
unsigned int FS
Definition: ConstantQ.h:24
int nceps
Definition: MFCC.h:26
virtual void clear()
Definition: Segmenter.h:54
void cluster_segment(int *q, double **features, int frames_read, int feature_length, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
int nsegtypes
Definition: Segmenter.h:36
void constq_segment(int *q, double **features, int frames_read, int bins, int ncoeff, int feature_type, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
ClusterMeltSegmenter(ClusterMeltSegmenterParams params)
void process(const double *FFTRe, const double *FFTIm, double *CQRe, double *CQIm)
Definition: ConstantQ.cpp:317
double CQThresh
Definition: ConstantQ.h:28
void setFeatures(const vector< vector< double > > &f)
void forward(const double *realIn, double *realOut, double *imagOut)
Carry out a forward real-to-complex transform of size nsamples, where nsamples is the value provided ...
Definition: FFT.cpp:185
#define NULL
Definition: Filter.h:20
vector< Segment > segments
Definition: Segmenter.h:38
int getfftlength()
Definition: ConstantQ.h:53
double max
Definition: ConstantQ.h:26
double min
Definition: ConstantQ.h:25
int getSize() const
Definition: Window.h:67
int samplerate
Definition: Segmenter.h:37
int samplerate
Definition: Segmenter.h:59
Segmentation segmentation
Definition: Segmenter.h:58
Definition: FFT.h:46
unsigned int BPO
Definition: ConstantQ.h:27
int end
Definition: Segmenter.h:29
void sparsekernel()
Definition: ConstantQ.cpp:76
bool want_c0
Definition: MFCC.h:28
Decimator carries out a fast downsample by a power-of-two factor.
Definition: Decimator.h:24
Window< double > * window
virtual void initialise(int samplerate)
void makeSegmentation(int *q, int len)
int getK()
Definition: ConstantQ.h:52
int type
Definition: Segmenter.h:30
virtual void extractFeatures(const double *samples, int nsamples)
Definition: MFCC.h:35
void cut(T *src) const
Definition: Window.h:61
vector< vector< double > > features
Definition: Segmenter.h:57