diff sandbox/embeddings/one-per-line.py @ 456:131e19dfe793

Added sandbox.embeddings
author Joseph Turian <turian@iro.umontreal.ca>
date Tue, 07 Oct 2008 17:56:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sandbox/embeddings/one-per-line.py	Tue Oct 07 17:56:52 2008 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+
+import string
+#import psyco
+
+weightsfile = "lm-weights.txt"
+vocabfile = "words.asc"
+size = 30000
+dimensions = 50
+
+import numpy, math
+import sys
+from common.str import percent
+
+word_to_vector = {}
+
+f = open(weightsfile)
+f.readline()
+vals = [float(v) for v in string.split(f.readline())]
+assert len(vals) == size * dimensions
+vals.reverse()
+#for i in range(size):
+r = range(size)
+r.reverse()
+for i in r:
+    l = vals[dimensions*i:dimensions*(i+1)]
+    print string.join([`s` for s in l], "\t")