Mercurial > pylearn
comparison pylearn/datasets/utlc.py @ 1402:b14f3d6f5cd4
first version of a script to load the utlc datasets.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Fri, 21 Jan 2011 17:05:46 -0500 |
parents | |
children | 89017617ab36 |
comparison
equal
deleted
inserted
replaced
1401:e06c0ff46d2a | 1402:b14f3d6f5cd4 |
---|---|
1 """ | |
2 user should use the load _ndarray_dataset or load_sparse_dataset function | |
3 See the file PYLEARN_DB_PATH/UTCL/README for detail on the datasets. | |
4 See the end of this file for an example on how to load the file. | |
5 """ | |
6 | |
7 import cPickle | |
8 import gzip | |
9 import os | |
10 | |
11 import pylearn.io.filetensor as ft | |
12 import config | |
13 | |
14 def load_ndarray_dataset(name): | |
15 assert name in ['avicenna','harry','rita','sylvester','ule'] | |
16 trname,vname,tename = [os.path.join(config.data_root(), | |
17 'UTLC','filetensor', | |
18 name+'_'+subset+'.ft') | |
19 for subset in ['train','valid','test']] | |
20 train = load_filetensor(trname) | |
21 valid = load_filetensor(vname) | |
22 test = load_filetensor(tename) | |
23 return train, valid, test | |
24 | |
25 def load_sparse_dataset(name): | |
26 assert name in ['harry','terry','ule'] | |
27 trname,vname,tename = [os.path.join(config.data_root(), | |
28 'UTLC','sparse', | |
29 name+'_'+subset+'.npy') | |
30 for subset in ['train','valid','test']] | |
31 train = load_sparse(trname) | |
32 valid = load_sparse(vname) | |
33 test = load_sparse(tename) | |
34 return train, valid, test | |
35 | |
36 def load_filetensor(fname): | |
37 f = None | |
38 try: | |
39 if not os.path.exists(fname): | |
40 fname = fname+'.gz' | |
41 assert os.path.exists(fname) | |
42 f = gzip.open(fname) | |
43 else: | |
44 f = open(fname) | |
45 d = ft.read(f) | |
46 finally: | |
47 if f: | |
48 f.close() | |
49 | |
50 return d | |
51 | |
52 def load_sparse(fname): | |
53 f = None | |
54 try: | |
55 if not os.path.exists(fname): | |
56 fname = fname+'.gz' | |
57 assert os.path.exists(fname) | |
58 f = gzip.open(fname) | |
59 else: | |
60 f = open(fname) | |
61 d = cPickle.load(f) | |
62 finally: | |
63 if f: | |
64 f.close() | |
65 return d | |
66 | |
67 if __name__ == '__main__': | |
68 import numpy | |
69 import scipy.sparse | |
70 for name in ['avicenna','harry','rita','sylvester','ule']: | |
71 train, valid, test = load_ndarray_dataset(name) | |
72 assert isinstance(train, numpy.ndarray) | |
73 assert isinstance(valid, numpy.ndarray) | |
74 assert isinstance(test, numpy.ndarray) | |
75 import pdb;pdb.set_trace() | |
76 assert train.shape[1]==test.shape[1]==valid.shape[1] | |
77 | |
78 for name in ['harry','terry','ule']: | |
79 train, valid, test = load_sparse_dataset(name) | |
80 assert scipy.sparse.issparse(train) | |
81 assert scipy.sparse.issparse(valid) | |
82 assert scipy.sparse.issparse(test) | |
83 import pdb;pdb.set_trace() | |
84 assert train.shape[1]==test.shape[1]==valid.shape[1] | |
85 |