view pylearn/datasets/config.py @ 1424:84cb96db5673

return the first file in a file that exists in the directory in the PYLEARN_DATA_ROOT.
author Frederic Bastien <nouiz@nouiz.org>
date Tue, 08 Feb 2011 11:14:04 -0500
parents 2b82c5a11512
children 25985fb3bb4f
line wrap: on
line source

"""Configuration options for datasets


Especially, the locations of data files.
"""

import os, sys, logging
def _logger():  return logging.getLogger('pylearn.datasets.config')
def debug(*msg): _logger().debug(' '.join(str(m) for m in msg))
def info(*msg): _logger().info(' '.join(str(m) for m in msg))
def warn(*msg): _logger().warn(' '.join(str(m) for m in msg))
def warning(*msg): _logger().warning(' '.join(str(m) for m in msg))
def error(*msg): _logger().error(' '.join(str(m) for m in msg))


def env_get(key, default, key2 = None):
    if key2 and os.getenv(key) is None:
        key=key2
    if os.getenv(key) is None:
        if env_get.first_warning:
            warning("Environment variable", key, 'is not set. Using default of', default)
            env_get.first_warning = False
        return default
    else:
        return os.getenv(key)
env_get.first_warning = True

def data_root():
    """Deprecated, use data_roots() or get_filepath_in_roots()
    
    It id deprecated as it don't allow to use more then 1 path.
    """
    roots = env_get('PYLEARN_DATA_ROOT', os.getenv('HOME')+'/data', 'DBPATH')
    return roots.split(':')[0]

def data_roots():
    """Return a list of path that are in the PYLEARN_DATA_ROOT env variable."""
    if hasattr(data_roots, 'rval'):
        return data_roots.rval
    roots = os.getenv('PYLEARN_DATA_ROOT')
    if roots is None:
        roots = [data_root()]
    else:
        roots = roots.split(':')
    roots2 = []
    #remove directory that don't exist
    for root in roots:
        if os.path.exists(root):
            roots2.append(root)
    data_roots.rval = roots2
    return roots2


def get_filepath_in_roots(*names):
    """Return the full path of name that exist under a directory
    in the PYLEARN_DATA_ROOT env variable.
    
    If their is multiple file name, we return the first that exist.
    This allow to get one of the file that is there.
    """
    for name in names:
        for root in data_roots():
            path = os.path.join(root,name)
            if os.path.exists(path):
                return path