Mercurial > pylearn
view pylearn/datasets/utlc.py @ 1402:b14f3d6f5cd4
first version of a script to load the utlc datasets.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Fri, 21 Jan 2011 17:05:46 -0500 |
parents | |
children | 89017617ab36 |
line wrap: on
line source
""" user should use the load _ndarray_dataset or load_sparse_dataset function See the file PYLEARN_DB_PATH/UTCL/README for detail on the datasets. See the end of this file for an example on how to load the file. """ import cPickle import gzip import os import pylearn.io.filetensor as ft import config def load_ndarray_dataset(name): assert name in ['avicenna','harry','rita','sylvester','ule'] trname,vname,tename = [os.path.join(config.data_root(), 'UTLC','filetensor', name+'_'+subset+'.ft') for subset in ['train','valid','test']] train = load_filetensor(trname) valid = load_filetensor(vname) test = load_filetensor(tename) return train, valid, test def load_sparse_dataset(name): assert name in ['harry','terry','ule'] trname,vname,tename = [os.path.join(config.data_root(), 'UTLC','sparse', name+'_'+subset+'.npy') for subset in ['train','valid','test']] train = load_sparse(trname) valid = load_sparse(vname) test = load_sparse(tename) return train, valid, test def load_filetensor(fname): f = None try: if not os.path.exists(fname): fname = fname+'.gz' assert os.path.exists(fname) f = gzip.open(fname) else: f = open(fname) d = ft.read(f) finally: if f: f.close() return d def load_sparse(fname): f = None try: if not os.path.exists(fname): fname = fname+'.gz' assert os.path.exists(fname) f = gzip.open(fname) else: f = open(fname) d = cPickle.load(f) finally: if f: f.close() return d if __name__ == '__main__': import numpy import scipy.sparse for name in ['avicenna','harry','rita','sylvester','ule']: train, valid, test = load_ndarray_dataset(name) assert isinstance(train, numpy.ndarray) assert isinstance(valid, numpy.ndarray) assert isinstance(test, numpy.ndarray) import pdb;pdb.set_trace() assert train.shape[1]==test.shape[1]==valid.shape[1] for name in ['harry','terry','ule']: train, valid, test = load_sparse_dataset(name) assert scipy.sparse.issparse(train) assert scipy.sparse.issparse(valid) assert scipy.sparse.issparse(test) import pdb;pdb.set_trace() assert train.shape[1]==test.shape[1]==valid.shape[1]