qm-dsp  1.8
TempoTrackV2.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  QM DSP Library
5 
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2008-2009 Matthew Davies and QMUL.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
16 #include "TempoTrackV2.h"
17 
18 #include <cmath>
19 #include <cstdlib>
20 #include <iostream>
21 
22 #include "maths/MathUtilities.h"
23 
24 #define EPS 0.0000008 // just some arbitrary small number
25 
26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
27  m_rate(rate), m_increment(increment) { }
29 
30 void
32 {
33  d_vec_t a(3);
34  d_vec_t b(3);
35  d_vec_t lp_df(df.size());
36 
37  //equivalent in matlab to [b,a] = butter(2,0.4);
38  a[0] = 1.0000;
39  a[1] = -0.3695;
40  a[2] = 0.1958;
41  b[0] = 0.2066;
42  b[1] = 0.4131;
43  b[2] = 0.2066;
44 
45  double inp1 = 0.;
46  double inp2 = 0.;
47  double out1 = 0.;
48  double out2 = 0.;
49 
50 
51  // forwards filtering
52  for (unsigned int i = 0;i < df.size();i++)
53  {
54  lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
55  inp2 = inp1;
56  inp1 = df[i];
57  out2 = out1;
58  out1 = lp_df[i];
59  }
60 
61  // copy forwards filtering to df...
62  // but, time-reversed, ready for backwards filtering
63  for (unsigned int i = 0;i < df.size();i++)
64  {
65  df[i] = lp_df[df.size()-i-1];
66  }
67 
68  for (unsigned int i = 0;i < df.size();i++)
69  {
70  lp_df[i] = 0.;
71  }
72 
73  inp1 = 0.; inp2 = 0.;
74  out1 = 0.; out2 = 0.;
75 
76  // backwards filetering on time-reversed df
77  for (unsigned int i = 0;i < df.size();i++)
78  {
79  lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
80  inp2 = inp1;
81  inp1 = df[i];
82  out2 = out1;
83  out1 = lp_df[i];
84  }
85 
86  // write the re-reversed (i.e. forward) version back to df
87  for (unsigned int i = 0;i < df.size();i++)
88  {
89  df[i] = lp_df[df.size()-i-1];
90  }
91 }
92 
93 
94 // MEPD 28/11/12
95 // This function now allows for a user to specify an inputtempo (in BPM)
96 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
97 // with a gaussian which is centered around the input tempo
98 // Note, if inputtempo = 120 and constraintempo = false, then functionality is
99 // as it was before
100 void
101 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
102  vector<double> &beat_period,
103  vector<double> &tempi,
104  double inputtempo, bool constraintempo)
105 {
106  // to follow matlab.. split into 512 sample frames with a 128 hop size
107  // calculate the acf,
108  // then the rcf.. and then stick the rcfs as columns of a matrix
109  // then call viterbi decoding with weight vector and transition matrix
110  // and get best path
111 
112  unsigned int wv_len = 128;
113 
114  // MEPD 28/11/12
115  // the default value of inputtempo in the beat tracking plugin is 120
116  // so if the user specifies a different inputtempo, the rayparam will be updated
117  // accordingly.
118  // note: 60*44100/512 is a magic number
119  // this might (will?) break if a user specifies a different frame rate for the onset detection function
120  double rayparam = (60*44100/512)/inputtempo;
121 
122  // these debug statements can be removed.
123 // std::cerr << "inputtempo" << inputtempo << std::endl;
124 // std::cerr << "rayparam" << rayparam << std::endl;
125 // std::cerr << "constraintempo" << constraintempo << std::endl;
126 
127  // make rayleigh weighting curve
128  d_vec_t wv(wv_len);
129 
130  // check whether or not to use rayleigh weighting (if constraintempo is false)
131  // or use gaussian weighting it (constraintempo is true)
132  if (constraintempo)
133  {
134  for (unsigned int i=0; i<wv.size(); i++)
135  {
136  // MEPD 28/11/12
137  // do a gaussian weighting instead of rayleigh
138  wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
139  }
140  }
141  else
142  {
143  for (unsigned int i=0; i<wv.size(); i++)
144  {
145  // MEPD 28/11/12
146  // standard rayleigh weighting over periodicities
147  wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
148  }
149  }
150 
151  // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
152  unsigned int winlen = 512;
153  unsigned int step = 128;
154 
155  // matrix to store output of comb filter bank, increment column of matrix at each frame
156  d_mat_t rcfmat;
157  int col_counter = -1;
158 
159  // main loop for beat period calculation
160  for (unsigned int i=0; i+winlen<df.size(); i+=step)
161  {
162  // get dfframe
163  d_vec_t dfframe(winlen);
164  for (unsigned int k=0; k<winlen; k++)
165  {
166  dfframe[k] = df[i+k];
167  }
168  // get rcf vector for current frame
169  d_vec_t rcf(wv_len);
170  get_rcf(dfframe,wv,rcf);
171 
172  rcfmat.push_back( d_vec_t() ); // adds a new column
173  col_counter++;
174  for (unsigned int j=0; j<rcf.size(); j++)
175  {
176  rcfmat[col_counter].push_back( rcf[j] );
177  }
178  }
179 
180  // now call viterbi decoding function
181  viterbi_decode(rcfmat,wv,beat_period,tempi);
182 }
183 
184 
185 void
186 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
187 {
188  // calculate autocorrelation function
189  // then rcf
190  // just hard code for now... don't really need separate functions to do this
191 
192  // make acf
193 
194  d_vec_t dfframe(dfframe_in);
195 
197 
198  d_vec_t acf(dfframe.size());
199 
200 
201  for (unsigned int lag=0; lag<dfframe.size(); lag++)
202  {
203  double sum = 0.;
204  double tmp = 0.;
205 
206  for (unsigned int n=0; n<(dfframe.size()-lag); n++)
207  {
208  tmp = dfframe[n] * dfframe[n+lag];
209  sum += tmp;
210  }
211  acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
212  }
213 
214  // now apply comb filtering
215  int numelem = 4;
216 
217  for (unsigned int i = 2;i < rcf.size();i++) // max beat period
218  {
219  for (int a = 1;a <= numelem;a++) // number of comb elements
220  {
221  for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
222  {
223  rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
224  }
225  }
226  }
227 
228  // apply adaptive threshold to rcf
230 
231  double rcfsum =0.;
232  for (unsigned int i=0; i<rcf.size(); i++)
233  {
234  rcf[i] += EPS ;
235  rcfsum += rcf[i];
236  }
237 
238  // normalise rcf to sum to unity
239  for (unsigned int i=0; i<rcf.size(); i++)
240  {
241  rcf[i] /= (rcfsum + EPS);
242  }
243 }
244 
245 void
246 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
247 {
248  // following Kevin Murphy's Viterbi decoding to get best path of
249  // beat periods through rfcmat
250 
251  // make transition matrix
252  d_mat_t tmat;
253  for (unsigned int i=0;i<wv.size();i++)
254  {
255  tmat.push_back ( d_vec_t() ); // adds a new column
256  for (unsigned int j=0; j<wv.size(); j++)
257  {
258  tmat[i].push_back(0.); // fill with zeros initially
259  }
260  }
261 
262  // variance of Gaussians in transition matrix
263  // formed of Gaussians on diagonal - implies slow tempo change
264  double sigma = 8.;
265  // don't want really short beat periods, or really long ones
266  for (unsigned int i=20;i <wv.size()-20; i++)
267  {
268  for (unsigned int j=20; j<wv.size()-20; j++)
269  {
270  double mu = static_cast<double>(i);
271  tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
272  }
273  }
274 
275  // parameters for Viterbi decoding... this part is taken from
276  // Murphy's matlab
277 
278  d_mat_t delta;
279  i_mat_t psi;
280  for (unsigned int i=0;i <rcfmat.size(); i++)
281  {
282  delta.push_back( d_vec_t());
283  psi.push_back( i_vec_t());
284  for (unsigned int j=0; j<rcfmat[i].size(); j++)
285  {
286  delta[i].push_back(0.); // fill with zeros initially
287  psi[i].push_back(0); // fill with zeros initially
288  }
289  }
290 
291 
292  unsigned int T = delta.size();
293 
294  if (T < 2) return; // can't do anything at all meaningful
295 
296  unsigned int Q = delta[0].size();
297 
298  // initialize first column of delta
299  for (unsigned int j=0; j<Q; j++)
300  {
301  delta[0][j] = wv[j] * rcfmat[0][j];
302  psi[0][j] = 0;
303  }
304 
305  double deltasum = 0.;
306  for (unsigned int i=0; i<Q; i++)
307  {
308  deltasum += delta[0][i];
309  }
310  for (unsigned int i=0; i<Q; i++)
311  {
312  delta[0][i] /= (deltasum + EPS);
313  }
314 
315 
316  for (unsigned int t=1; t<T; t++)
317  {
318  d_vec_t tmp_vec(Q);
319 
320  for (unsigned int j=0; j<Q; j++)
321  {
322  for (unsigned int i=0; i<Q; i++)
323  {
324  tmp_vec[i] = delta[t-1][i] * tmat[j][i];
325  }
326 
327  delta[t][j] = get_max_val(tmp_vec);
328 
329  psi[t][j] = get_max_ind(tmp_vec);
330 
331  delta[t][j] *= rcfmat[t][j];
332  }
333 
334  // normalise current delta column
335  double deltasum = 0.;
336  for (unsigned int i=0; i<Q; i++)
337  {
338  deltasum += delta[t][i];
339  }
340  for (unsigned int i=0; i<Q; i++)
341  {
342  delta[t][i] /= (deltasum + EPS);
343  }
344  }
345 
346  i_vec_t bestpath(T);
347  d_vec_t tmp_vec(Q);
348  for (unsigned int i=0; i<Q; i++)
349  {
350  tmp_vec[i] = delta[T-1][i];
351  }
352 
353  // find starting point - best beat period for "last" frame
354  bestpath[T-1] = get_max_ind(tmp_vec);
355 
356  // backtrace through index of maximum values in psi
357  for (unsigned int t=T-2; t>0 ;t--)
358  {
359  bestpath[t] = psi[t+1][bestpath[t+1]];
360  }
361 
362  // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
363  bestpath[0] = psi[1][bestpath[1]];
364 
365  unsigned int lastind = 0;
366  for (unsigned int i=0; i<T; i++)
367  {
368  unsigned int step = 128;
369  for (unsigned int j=0; j<step; j++)
370  {
371  lastind = i*step+j;
372  beat_period[lastind] = bestpath[i];
373  }
374 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
375  }
376 
377  //fill in the last values...
378  for (unsigned int i=lastind; i<beat_period.size(); i++)
379  {
380  beat_period[i] = beat_period[lastind];
381  }
382 
383  for (unsigned int i = 0; i < beat_period.size(); i++)
384  {
385  tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
386  }
387 }
388 
389 double
391 {
392  double maxval = 0.;
393  for (unsigned int i=0; i<df.size(); i++)
394  {
395  if (maxval < df[i])
396  {
397  maxval = df[i];
398  }
399  }
400 
401  return maxval;
402 }
403 
404 int
406 {
407  double maxval = 0.;
408  int ind = 0;
409  for (unsigned int i=0; i<df.size(); i++)
410  {
411  if (maxval < df[i])
412  {
413  maxval = df[i];
414  ind = i;
415  }
416  }
417 
418  return ind;
419 }
420 
421 void
423 {
424  double sum = 0.;
425  for (unsigned int i=0; i<df.size(); i++)
426  {
427  sum += df[i];
428  }
429 
430  for (unsigned int i=0; i<df.size(); i++)
431  {
432  df[i]/= (sum + EPS);
433  }
434 }
435 
436 // MEPD 28/11/12
437 // this function has been updated to allow the "alpha" and "tightness" parameters
438 // of the dynamic program to be set by the user
439 // the default value of alpha = 0.9 and tightness = 4
440 void
441 TempoTrackV2::calculateBeats(const vector<double> &df,
442  const vector<double> &beat_period,
443  vector<double> &beats, double alpha, double tightness)
444 {
445  if (df.empty() || beat_period.empty()) return;
446 
447  d_vec_t cumscore(df.size()); // store cumulative score
448  i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
449  d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
450 
451  for (unsigned int i=0; i<df.size(); i++)
452  {
453  localscore[i] = df[i];
454  backlink[i] = -1;
455  }
456 
457  //double tightness = 4.;
458  //double alpha = 0.9;
459  // MEPD 28/11/12
460  // debug statements that can be removed.
461 // std::cerr << "alpha" << alpha << std::endl;
462 // std::cerr << "tightness" << tightness << std::endl;
463 
464  // main loop
465  for (unsigned int i=0; i<localscore.size(); i++)
466  {
467  int prange_min = -2*beat_period[i];
468  int prange_max = round(-0.5*beat_period[i]);
469 
470  // transition range
471  d_vec_t txwt (prange_max - prange_min + 1);
472  d_vec_t scorecands (txwt.size());
473 
474  for (unsigned int j=0;j<txwt.size();j++)
475  {
476  double mu = static_cast<double> (beat_period[i]);
477  txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
478 
479  // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
480  // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
481 
482  int cscore_ind = i+prange_min+j;
483  if (cscore_ind >= 0)
484  {
485  scorecands[j] = txwt[j] * cumscore[cscore_ind];
486  }
487  }
488 
489  // find max value and index of maximum value
490  double vv = get_max_val(scorecands);
491  int xx = get_max_ind(scorecands);
492 
493  cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
494  backlink[i] = i+prange_min+xx;
495 
496 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
497  }
498 
499  // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
500  d_vec_t tmp_vec;
501  for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
502  {
503  tmp_vec.push_back(cumscore[i]);
504  }
505 
506  int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
507 
508  // can happen if no results obtained earlier (e.g. input too short)
509  if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1;
510 
511  // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
512  // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
513  i_vec_t ibeats;
514  ibeats.push_back(startpoint);
515 // std::cerr << "startpoint = " << startpoint << std::endl;
516  while (backlink[ibeats.back()] > 0)
517  {
518 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
519  int b = ibeats.back();
520  if (backlink[b] == b) break; // shouldn't happen... haha
521  ibeats.push_back(backlink[b]);
522  }
523 
524  // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
525  for (unsigned int i=0; i<ibeats.size(); i++)
526  {
527  beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
528  }
529 }
530 
531 
static void adaptiveThreshold(std::vector< double > &data)
Threshold the input/output vector data against a moving-mean average filter.
void normalise_vec(d_vec_t &df)
vector< int > i_vec_t
Definition: TempoTrackV2.h:73
void viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &bp, d_vec_t &tempi)
size_t m_increment
Definition: TempoTrackV2.h:79
double get_max_val(const d_vec_t &df)
int get_max_ind(const d_vec_t &df)
vector< vector< int > > i_mat_t
Definition: TempoTrackV2.h:74
#define EPS
void calculateBeats(const vector< double > &df, const vector< double > &beatPeriod, vector< double > &beats)
Definition: TempoTrackV2.h:58
vector< double > d_vec_t
Definition: TempoTrackV2.h:75
void get_rcf(const d_vec_t &dfframe, const d_vec_t &wv, d_vec_t &rcf)
TempoTrackV2(float sampleRate, size_t dfIncrement)
Construct a tempo tracker that will operate on beat detection function data calculated from audio at ...
vector< vector< double > > d_mat_t
Definition: TempoTrackV2.h:76
void calculateBeatPeriod(const vector< double > &df, vector< double > &beatPeriod, vector< double > &tempi)
Definition: TempoTrackV2.h:43
void filter_df(d_vec_t &df)