# HG changeset patch
# User Joseph Turian <turian@iro.umontreal.ca>
# Date 1224544446 14400
# Node ID f3711bcc467e6db7a807460d3d7409eef6ddc09a
# Parent  23221eefb70eb499cd8b2d78698bf82cd19b73cd
Fixed a bug in how embeddings are read

diff -r 23221eefb70e -r f3711bcc467e sandbox/embeddings/process.py
--- a/sandbox/embeddings/process.py	Wed Oct 15 18:59:55 2008 -0400
+++ b/sandbox/embeddings/process.py	Mon Oct 20 19:14:06 2008 -0400
@@ -34,7 +34,6 @@
     f.readline()
     vals = [float(v) for v in string.split(f.readline())]
     assert len(vals) == NUMBER_OF_WORDS * DIMENSIONS
-    vals.reverse()
     for i in range(NUMBER_OF_WORDS):
         l = vals[DIMENSIONS*i:DIMENSIONS*(i+1)]
         w = __words[i]
@@ -90,3 +89,26 @@
 #            assert not strict
 #            e.append(__word_to_embedding[UNKNOWN])
 #    return e
+
+#def test():
+#    """
+#    Debugging code.
+#    """
+#    read_embeddings()
+#    for w in __word_to_embedding:
+#        assert len(__word_to_embedding[w]) == 50
+#    import numpy
+#    for w1 in __words:
+#        e1 = numpy.asarray(__word_to_embedding[w1])
+#        lst = []
+#        print w1, numpy.dot(e1, e1)
+#        for w2 in __word_to_embedding:
+#            if w1 >= w2: continue
+#            e2 = numpy.asarray(__word_to_embedding[w2])
+#            d = (e1 - e2)
+#            l2 = numpy.dot(d, d)
+#            lst.append((l2, w1, w2))
+#        lst.sort()
+#        print lst[:10]
+#
+#test()