Mercurial > pylearn
comparison sandbox/embeddings/process.py @ 467:f3711bcc467e
Fixed a bug in how embeddings are read
author | Joseph Turian <turian@iro.umontreal.ca> |
---|---|
date | Mon, 20 Oct 2008 19:14:06 -0400 |
parents | 121cc6db4481 |
children |
comparison
equal
deleted
inserted
replaced
466:23221eefb70e | 467:f3711bcc467e |
---|---|
32 sys.stderr.write("Reading %s...\n" % WEIGHTSFILE) | 32 sys.stderr.write("Reading %s...\n" % WEIGHTSFILE) |
33 f = open(WEIGHTSFILE) | 33 f = open(WEIGHTSFILE) |
34 f.readline() | 34 f.readline() |
35 vals = [float(v) for v in string.split(f.readline())] | 35 vals = [float(v) for v in string.split(f.readline())] |
36 assert len(vals) == NUMBER_OF_WORDS * DIMENSIONS | 36 assert len(vals) == NUMBER_OF_WORDS * DIMENSIONS |
37 vals.reverse() | |
38 for i in range(NUMBER_OF_WORDS): | 37 for i in range(NUMBER_OF_WORDS): |
39 l = vals[DIMENSIONS*i:DIMENSIONS*(i+1)] | 38 l = vals[DIMENSIONS*i:DIMENSIONS*(i+1)] |
40 w = __words[i] | 39 w = __words[i] |
41 __word_to_embedding[w] = l | 40 __word_to_embedding[w] = l |
42 __read = True | 41 __read = True |
88 # else: | 87 # else: |
89 # sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw)) | 88 # sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw)) |
90 # assert not strict | 89 # assert not strict |
91 # e.append(__word_to_embedding[UNKNOWN]) | 90 # e.append(__word_to_embedding[UNKNOWN]) |
92 # return e | 91 # return e |
92 | |
93 #def test(): | |
94 # """ | |
95 # Debugging code. | |
96 # """ | |
97 # read_embeddings() | |
98 # for w in __word_to_embedding: | |
99 # assert len(__word_to_embedding[w]) == 50 | |
100 # import numpy | |
101 # for w1 in __words: | |
102 # e1 = numpy.asarray(__word_to_embedding[w1]) | |
103 # lst = [] | |
104 # print w1, numpy.dot(e1, e1) | |
105 # for w2 in __word_to_embedding: | |
106 # if w1 >= w2: continue | |
107 # e2 = numpy.asarray(__word_to_embedding[w2]) | |
108 # d = (e1 - e2) | |
109 # l2 = numpy.dot(d, d) | |
110 # lst.append((l2, w1, w2)) | |
111 # lst.sort() | |
112 # print lst[:10] | |
113 # | |
114 #test() |