Mercurial > pylearn
annotate dataset.py @ 3:378b68d5c4ad
Added first (untested) version of ArrayDataSet
author | bengioy@bengiomac.local |
---|---|
date | Sun, 23 Mar 2008 14:41:22 -0400 |
parents | 3fddb1c8f955 |
children | f7dcfb5f9d5b |
rev | line source |
---|---|
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
1 |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
2 |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
3 class DataSet(object): |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
4 """ |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
5 This is a virtual base class or interface for datasets. |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
6 A dataset is basically an iterator over examples. It does not necessarily |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
7 have a fixed length (this is useful for 'streams' which feed on-line learning). |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
8 Datasets with fixed and known length are FiniteDataSet, a subclass of DataSet. |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
9 Examples and datasets have named fields. |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
10 One can obtain a sub-dataset by taking dataset.field or dataset(field1,field2,field3,...). |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
11 Fields are not mutually exclusive, i.e. two fields can overlap in their actual content. |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
12 The content of a field can be of any type, but often will be a numpy tensor. |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
13 """ |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
14 |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
15 def __init__(self): |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
16 pass |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
17 |
3
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
18 def __iter__(self): |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
19 return self |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
20 |
3
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
21 def next(self): |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
22 """Return the next example in the dataset.""" |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
23 raise NotImplementedError |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
24 |
3
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
25 def __getattr__(self,fieldname): |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
26 """Return a sub-dataset containing only the given fieldname as field.""" |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
27 return self(fieldname) |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
28 |
3
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
29 def __call__(self,*fieldnames): |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
30 """Return a sub-dataset containing only the given fieldnames as fields.""" |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
31 raise NotImplementedError |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
32 |
3
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
33 def fieldNames(self): |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
34 """Return the list of field names that are supported by getattr and getFields.""" |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
35 raise NotImplementedError |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
36 |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
37 class FiniteDataSet(DataSet): |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
38 """ |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
39 Virtual interface, a subclass of DataSet for datasets which have a finite, known length. |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
40 Examples are indexed by an integer between 0 and self.length()-1, |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
41 and a subdataset can be obtained by slicing. |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
42 """ |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
43 |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
44 def __init__(self): |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
45 pass |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
46 |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
47 def __len__(self): |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
48 """len(dataset) returns the number of examples in the dataset.""" |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
49 raise NotImplementedError |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
50 |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
51 def __getitem__(self,i): |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
52 """dataset[i] returns the (i+1)-th example of the dataset.""" |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
53 raise NotImplementedError |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
54 |
2
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
55 def __getslice__(self,*slice_args): |
3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
bengioy@bengiomac.local
parents:
1
diff
changeset
|
56 """dataset[i:j] returns the subdataset with examples i,i+1,...,j-1.""" |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
57 raise NotImplementedError |
3
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
58 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
59 # we may want ArrayDataSet defined in another python file |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
60 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
61 from numpy import * |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
62 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
63 class ArrayDataSet(FiniteDataSet): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
64 """ |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
65 A fixed-length and fixed-width dataset in which each element is a numpy.array |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
66 or a number, hence the whole dataset corresponds to a numpy.array. Fields |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
67 must correspond to a slice of columns. If the dataset has fields, |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
68 each 'example' is just a one-row ArrayDataSet, otherwise it is a numpy.array. |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
69 Any dataset can also be converted to a numpy.array (losing the notion of fields) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
70 by the asarray(dataset) call. |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
71 """ |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
72 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
73 def __self__(self,dataset=None,data=None,fields={}): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
74 """ |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
75 Construct an ArrayDataSet, either from a DataSet, or from |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
76 a numpy.array plus an optional specification of fields (by |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
77 a dictionary of column slices indexed by field names). |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
78 """ |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
79 self.current_row=-1 # used for view of this dataset as an iterator |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
80 if dataset: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
81 assert data==None and fields=={} |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
82 # convert dataset to an ArrayDataSet |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
83 raise NotImplementedError |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
84 if data: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
85 assert dataset==None |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
86 self.data=data |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
87 self.fields=fields |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
88 self.width = data.shape[1] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
89 for fieldname in fields: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
90 fieldslice=fields[fieldname] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
91 assert fieldslice.start>=0 and fieldslice.stop<=width) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
92 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
93 def next(self): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
94 """Return the next example in the dataset. If the dataset has fields, |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
95 the 'example' is just a one-row ArrayDataSet, otherwise it is a numpy.array.""" |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
96 if fields: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
97 self.current_row+=1 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
98 if self.current_row==len(self.data): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
99 self.current_row=0 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
100 return self[self.current_row] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
101 else: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
102 return self.data[self.current_row] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
103 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
104 def __getattr__(self,fieldname): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
105 """Return a sub-dataset containing only the given fieldname as field.""" |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
106 data = self.fields[fieldname] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
107 return ArrayDataSet(data=data) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
108 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
109 def __call__(self,*fieldnames): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
110 """Return a sub-dataset containing only the given fieldnames as fields.""" |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
111 min_col=self.data.shape[1] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
112 max_col=0 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
113 for field_slice in self.fields.values(): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
114 min_col=min(min_col,field_slice.start) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
115 max_col=max(max_col,field_slice.stop) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
116 new_fields={} |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
117 for field in self.fields: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
118 new_fields[field[0]]=slice(field[1].start-min_col,field[1].stop-min_col,field[1].step) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
119 return ArrayDataSet(data=self.data[:,min_col:max_col],fields=new_fields) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
120 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
121 def fieldNames(self): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
122 """Return the list of field names that are supported by getattr and getFields.""" |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
123 return self.fields.keys() |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
124 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
125 def __len__(self): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
126 """len(dataset) returns the number of examples in the dataset.""" |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
127 return len(self.data) |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
0
diff
changeset
|
128 |
3
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
129 def __getitem__(self,i): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
130 """ |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
131 dataset[i] returns the (i+1)-th example of the dataset. If the dataset has fields |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
132 then a one-example dataset is returned (to be able to handle example.field accesses). |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
133 """ |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
134 if self.fields: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
135 if isinstance(i,slice): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
136 return ArrayDataSet(data=data[slice],fields=self.fields) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
137 return ArrayDataSet(data=self.data[i:i+1],fields=self.fields) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
138 else: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
139 return data[i] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
140 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
141 def __getslice__(self,*slice_args): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
142 """dataset[i:j] returns the subdataset with examples i,i+1,...,j-1.""" |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
143 return ArrayDataSet(data=self.data[slice(slice_args)],fields=self.fields) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
144 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
145 def asarray(self): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
146 if self.fields: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
147 columns_used = zeros((self.data.shape[1]),dtype=bool) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
148 for field_slice in self.fields.values(): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
149 for c in xrange(field_slice.start,field_slice.stop,field_slice.step): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
150 columns_used[c]=True |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
151 # try to figure out if we can map all the slices into one slice: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
152 mappable_to_one_slice = True |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
153 start=0 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
154 while start<len(columns_used) and not columns_used[start]: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
155 start+=1 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
156 stop=len(columns_used) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
157 while stop>0 and not columns_used[stop-1]: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
158 stop-=1 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
159 step=0 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
160 i=start |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
161 while i<stop: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
162 j=i+1 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
163 while not columns_used[j] and j<stop: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
164 j+=1 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
165 if step: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
166 if step!=j-i: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
167 mappable_to_one_slice = False |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
168 break |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
169 else: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
170 step = j-i |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
171 if mappable_to_one_slice: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
172 return data[slice(start,stop,step)] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
173 # else make contiguous copy |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
174 n_columns = sum(columns_used) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
175 result = zeros((len(self.data),n_columns)+self.data.shape[2:],self.data.dtype) |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
176 c=0 |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
177 for field_slice in self.fields.values(): |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
178 slice_width=field_slice.stop-field_slice.start |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
179 if field_slice.step: |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
180 slice_width /= field_slice.step |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
181 # copy the field here |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
182 result[:,slice(c,slice_width)]=self.data[field_slice] |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
183 c+=slice_width |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
184 return result |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
185 return self.data |
378b68d5c4ad
Added first (untested) version of ArrayDataSet
bengioy@bengiomac.local
parents:
2
diff
changeset
|
186 |