Mercurial > pylearn
changeset 467:f3711bcc467e
Fixed a bug in how embeddings are read
author | Joseph Turian <turian@iro.umontreal.ca> |
---|---|
date | Mon, 20 Oct 2008 19:14:06 -0400 |
parents | 23221eefb70e |
children | a07948f780b9 |
files | sandbox/embeddings/process.py |
diffstat | 1 files changed, 23 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/sandbox/embeddings/process.py Wed Oct 15 18:59:55 2008 -0400 +++ b/sandbox/embeddings/process.py Mon Oct 20 19:14:06 2008 -0400 @@ -34,7 +34,6 @@ f.readline() vals = [float(v) for v in string.split(f.readline())] assert len(vals) == NUMBER_OF_WORDS * DIMENSIONS - vals.reverse() for i in range(NUMBER_OF_WORDS): l = vals[DIMENSIONS*i:DIMENSIONS*(i+1)] w = __words[i] @@ -90,3 +89,26 @@ # assert not strict # e.append(__word_to_embedding[UNKNOWN]) # return e + +#def test(): +# """ +# Debugging code. +# """ +# read_embeddings() +# for w in __word_to_embedding: +# assert len(__word_to_embedding[w]) == 50 +# import numpy +# for w1 in __words: +# e1 = numpy.asarray(__word_to_embedding[w1]) +# lst = [] +# print w1, numpy.dot(e1, e1) +# for w2 in __word_to_embedding: +# if w1 >= w2: continue +# e2 = numpy.asarray(__word_to_embedding[w2]) +# d = (e1 - e2) +# l2 = numpy.dot(d, d) +# lst.append((l2, w1, w2)) +# lst.sort() +# print lst[:10] +# +#test()