annotate pylearn/datasets/tzanetakis.py @ 1492:e7c4d031d333

Fix for Windows paths
author Olivier Delalleau <delallea@iro>
date Tue, 16 Aug 2011 15:44:01 -0400
parents 651eb6506d91
children
rev   line source
605
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 """
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 Load Tzanetakis' genre-classification dataset.
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 """
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5 from __future__ import absolute_import
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 import os
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 import numpy
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10 from ..io.amat import AMat
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11 from .config import data_root
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
12 from .dataset import dataset_factory, Dataset
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
13
640
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
14 def centre_data(x, inplace=False):
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
15 rval = x if inplace else x.copy()
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
16 #zero-mean
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
17 rval -= numpy.mean(rval, axis=0)
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
18 #unit-variance
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
19 rval *= 1.0 / (1.0e-6 + numpy.std(rval, axis=0))
605
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 return rval
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 def mfcc16(segments_per_song = 1, include_covariance = True, random_split = 0,
640
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
23 ntrain = 700, nvalid = 100, ntest = 200,
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
24 normalize=True):
605
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25 if segments_per_song != 1:
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 raise NotImplementedError()
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
28 path = os.path.join(data_root(), 'tzanetakis','feat_mfcc16_540_1.stat.amat')
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29 dat = AMat(path=path)
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30 all_input = dat.input
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31 assert all_input.shape == (1000 * segments_per_song, 152)
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32 all_targ = numpy.tile(numpy.arange(10).reshape(10,1), 100 * segments_per_song)\
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 .reshape(1000 * segments_per_song)
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 if not include_covariance:
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36 all_input = all_input[:,0:16]
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
37
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 #shuffle the data according to the random split
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 assert all_input.shape[0] == all_targ.shape[0]
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 seed = random_split + 1
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 numpy.random.RandomState(seed).shuffle(all_input)
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 numpy.random.RandomState(seed).shuffle(all_targ)
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 #construct a dataset to return
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 rval = Dataset()
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46
640
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
47 def prepx(x):
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
48 return centre_data(x, inplace=True) if normalize else x
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
49
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
50 rval.train = Dataset.Obj(x=prepx(all_input[0:ntrain]),
605
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 y=all_targ[0:ntrain])
640
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
52 rval.valid = Dataset.Obj(x=prepx(all_input[ntrain:ntrain+nvalid]),
605
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 y=all_targ[ntrain:ntrain+nvalid])
640
af14b1f32882 revised tzanetakis, added data centering
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 605
diff changeset
54 rval.test = Dataset.Obj(x=prepx(all_input[ntrain+nvalid:ntrain+nvalid+ntest]),
605
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55 y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest])
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 rval.n_classes = 10
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 return rval
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
61 import theano
605
20953adfdef8 initial tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
63 class TzanetakisExample(theano.Op):
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
64 """Return the i'th file, label pair from the Tzanetakis dataset."""
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
65 @staticmethod
682
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
66 def read_tracklist(alt_path_root=None):
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
67 """Read the tzanetakis dataset file
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
68 :rtype: (list, list)
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
69 :returns: paths, labels
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
70 """
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
71 tracklist = open(data_root() + '/tzanetakis/tracklist.txt')
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
72 path = []
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
73 label = []
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
74 for line in tracklist:
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
75 toks = line.split()
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
76 try:
682
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
77 if alt_path_root is None:
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
78 path.append(toks[0])
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
79 else:
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
80 line_path = toks[0]
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
81 file_name = line_path.split('/')[-1]
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
82 path.append(alt_path_root + '/' + file_name)
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
83 label.append(toks[1])
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
84 except:
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
85 print 'BAD LINE IN TZANETAKIS TRACKLIST'
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
86 print line, toks
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
87 raise
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
88 assert len(path) == 1000
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
89 return path, label
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
90
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
91 class_idx_dict = dict(blues=numpy.asarray(0),
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
92 classical=1,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
93 country=2,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
94 disco=3,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
95 hiphop=4,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
96 jazz=5,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
97 metal=6,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
98 pop=7,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
99 reggae=8,
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
100 rock=9)
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
101
682
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
102 def __init__(self, alt_path_root=None):
be6639fccecc added option for custom path in tzanetakis
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 674
diff changeset
103 self.path, self.label = self.read_tracklist(alt_path_root)
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
104 self.class_idx_dict = {}
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
105 classes = ('blues classical country disco hiphop jazz metal pop reggae rock').split()
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
106 for i, c in enumerate(classes):
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
107 self.class_idx_dict[c] = numpy.asarray(i, dtype='int64')
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
108
689
651eb6506d91 do not read data file on import
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 682
diff changeset
109 n_examples = property(lambda self: len(self.path))
651eb6506d91 do not read data file on import
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 682
diff changeset
110 nclasses = property(lambda self: 10)
651eb6506d91 do not read data file on import
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 682
diff changeset
111
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
112
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
113 def make_node(self, idx):
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
114 idx_ = theano.tensor.as_tensor_variable(idx)
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
115 if idx_.type not in theano.tensor.int_types:
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
116 raise TypeError(idx)
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
117 return theano.Apply(self,
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
118 [idx_],
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
119 [theano.generic('tzanetakis_path'),
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
120 theano.tensor.lscalar('tzanetakis_label')])
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
121
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
122 def perform(self, node, (idx,), (path, label)):
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
123 path[0] = self.path[idx]
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
124 label[0] = self.class_idx_dict[self.label[idx]]
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
125
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
126 def grad(self, inputs, g_output):
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
127 return [None for i in inputs]
674
f3b7d6956209 changes to tzanetakis and wavread
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 671
diff changeset
128
689
651eb6506d91 do not read data file on import
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 682
diff changeset
129 #tzanetakis_example = TzanetakisExample() #requires reading a data file
671
9e62fd6b6677 adding wavread and tzanetakis dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 640
diff changeset
130