comparison pylearn/datasets/utlc.py @ 1402:b14f3d6f5cd4

first version of a script to load the utlc datasets.
author Frederic Bastien <nouiz@nouiz.org>
date Fri, 21 Jan 2011 17:05:46 -0500
parents
children 89017617ab36
comparison
equal deleted inserted replaced
1401:e06c0ff46d2a 1402:b14f3d6f5cd4
1 """
2 user should use the load _ndarray_dataset or load_sparse_dataset function
3 See the file PYLEARN_DB_PATH/UTCL/README for detail on the datasets.
4 See the end of this file for an example on how to load the file.
5 """
6
7 import cPickle
8 import gzip
9 import os
10
11 import pylearn.io.filetensor as ft
12 import config
13
14 def load_ndarray_dataset(name):
15 assert name in ['avicenna','harry','rita','sylvester','ule']
16 trname,vname,tename = [os.path.join(config.data_root(),
17 'UTLC','filetensor',
18 name+'_'+subset+'.ft')
19 for subset in ['train','valid','test']]
20 train = load_filetensor(trname)
21 valid = load_filetensor(vname)
22 test = load_filetensor(tename)
23 return train, valid, test
24
25 def load_sparse_dataset(name):
26 assert name in ['harry','terry','ule']
27 trname,vname,tename = [os.path.join(config.data_root(),
28 'UTLC','sparse',
29 name+'_'+subset+'.npy')
30 for subset in ['train','valid','test']]
31 train = load_sparse(trname)
32 valid = load_sparse(vname)
33 test = load_sparse(tename)
34 return train, valid, test
35
36 def load_filetensor(fname):
37 f = None
38 try:
39 if not os.path.exists(fname):
40 fname = fname+'.gz'
41 assert os.path.exists(fname)
42 f = gzip.open(fname)
43 else:
44 f = open(fname)
45 d = ft.read(f)
46 finally:
47 if f:
48 f.close()
49
50 return d
51
52 def load_sparse(fname):
53 f = None
54 try:
55 if not os.path.exists(fname):
56 fname = fname+'.gz'
57 assert os.path.exists(fname)
58 f = gzip.open(fname)
59 else:
60 f = open(fname)
61 d = cPickle.load(f)
62 finally:
63 if f:
64 f.close()
65 return d
66
67 if __name__ == '__main__':
68 import numpy
69 import scipy.sparse
70 for name in ['avicenna','harry','rita','sylvester','ule']:
71 train, valid, test = load_ndarray_dataset(name)
72 assert isinstance(train, numpy.ndarray)
73 assert isinstance(valid, numpy.ndarray)
74 assert isinstance(test, numpy.ndarray)
75 import pdb;pdb.set_trace()
76 assert train.shape[1]==test.shape[1]==valid.shape[1]
77
78 for name in ['harry','terry','ule']:
79 train, valid, test = load_sparse_dataset(name)
80 assert scipy.sparse.issparse(train)
81 assert scipy.sparse.issparse(valid)
82 assert scipy.sparse.issparse(test)
83 import pdb;pdb.set_trace()
84 assert train.shape[1]==test.shape[1]==valid.shape[1]
85