annotate embeddings/convert.py @ 517:716c04512dbe

init
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 12 Nov 2008 10:54:38 -0500
parents a07948f780b9
children
rev   line source
456
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
1 #!/usr/bin/python
458
ed6b0b3be8d2 Polished embeddings module
Joseph Turian <turian@iro.umontreal.ca>
parents: 456
diff changeset
2 """
ed6b0b3be8d2 Polished embeddings module
Joseph Turian <turian@iro.umontreal.ca>
parents: 456
diff changeset
3 Convert stdin sentences to word embeddings, and output YAML.
ed6b0b3be8d2 Polished embeddings module
Joseph Turian <turian@iro.umontreal.ca>
parents: 456
diff changeset
4 """
456
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
5
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
6 import sys, string
458
ed6b0b3be8d2 Polished embeddings module
Joseph Turian <turian@iro.umontreal.ca>
parents: 456
diff changeset
7 import read
456
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
8 import yaml
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
9
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
10 output = []
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
11 for l in sys.stdin:
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
12 l = string.strip(l)
458
ed6b0b3be8d2 Polished embeddings module
Joseph Turian <turian@iro.umontreal.ca>
parents: 456
diff changeset
13 output.append((l, read.convert_string(l)))
456
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
14
131e19dfe793 Added sandbox.embeddings
Joseph Turian <turian@iro.umontreal.ca>
parents:
diff changeset
15 print yaml.dump(output)