view pylearn/datasets/embeddings/one-per-line.py @ 1476:8c10bda4bb5f

Configured default train/valid/test split for icml07.MNIST_rotated_background dataset. Defaults are the ones used by Hugo in the ICML07 paper and in all contracting auto-encoder papers.
author gdesjardins
date Fri, 20 May 2011 16:53:00 -0400
parents b054271b2504
children
line wrap: on
line source

#!/usr/bin/python

import string
#import psyco

weightsfile = "lm-weights.txt"
vocabfile = "words.asc"
size = 30000
dimensions = 50

import numpy, math
import sys
from percent import percent

word_to_vector = {}

f = open(weightsfile)
f.readline()
vals = [float(v) for v in string.split(f.readline())]
assert len(vals) == size * dimensions
vals.reverse()
#for i in range(size):
r = range(size)
r.reverse()
for i in r:
    l = vals[dimensions*i:dimensions*(i+1)]
    print string.join([`s` for s in l], "\t")