annotate upmana/mercurial/encoding.py @ 229:1645a78a33a4 alpha

Traipse Alpha 'OpenRPG' {100612-02} Traipse is a distribution of OpenRPG that is designed to be easy to setup and go. Traipse also makes it easy for developers to work on code without fear of sacrifice. 'Ornery-Orc' continues the trend of 'Grumpy' and adds fixes to the code. 'Ornery-Orc's main goal is to offer more advanced features and enhance the productivity of the user. Update Summary (Preparing to close updates) New Features: New to Map, can re-order Grid, Miniatures, and Whiteboard layer draw order Fixes: Fix to InterParse that was causing an Infernal Loop with Namespace Internal Fix to XML data, removed old Minidom and switched to Element Tree Fix to Server that was causing eternal attempt to find a Server ID, in Register Rooms thread Fix to metaservers.xml file not being created
author sirebral
date Sat, 12 Jun 2010 04:46:16 -0500
parents 496dbf12a6cb
children
rev   line source
121
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
1 # encoding.py - character transcoding support for Mercurial
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
2 #
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
4 #
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
5 # This software may be used and distributed according to the terms of the
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
6 # GNU General Public License version 2, incorporated herein by reference.
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
7
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
8 import error
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
9 import sys, unicodedata, locale, os
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
10
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
11 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
12
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
13 try:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
14 encoding = os.environ.get("HGENCODING")
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
15 if sys.platform == 'darwin' and not encoding:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
16 # On darwin, getpreferredencoding ignores the locale environment and
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
17 # always returns mac-roman. We override this if the environment is
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
18 # not C (has been customized by the user).
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
19 locale.setlocale(locale.LC_CTYPE, '')
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
20 encoding = locale.getlocale()[1]
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
21 if not encoding:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
22 encoding = locale.getpreferredencoding() or 'ascii'
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
23 encoding = _encodingfixup.get(encoding, encoding)
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
24 except locale.Error:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
25 encoding = 'ascii'
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
26 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
27 fallbackencoding = 'ISO-8859-1'
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
28
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
29 def tolocal(s):
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
30 """
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
31 Convert a string from internal UTF-8 to local encoding
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
32
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
33 All internal strings should be UTF-8 but some repos before the
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
34 implementation of locale support may contain latin1 or possibly
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
35 other character sets. We attempt to decode everything strictly
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
36 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
37 replace unknown characters.
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
38 """
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
39 for e in ('UTF-8', fallbackencoding):
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
40 try:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
41 u = s.decode(e) # attempt strict decoding
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
42 return u.encode(encoding, "replace")
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
43 except LookupError, k:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
44 raise error.Abort("%s, please check your locale settings" % k)
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
45 except UnicodeDecodeError:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
46 pass
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
47 u = s.decode("utf-8", "replace") # last ditch
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
48 return u.encode(encoding, "replace")
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
49
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
50 def fromlocal(s):
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
51 """
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
52 Convert a string from the local character encoding to UTF-8
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
53
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
54 We attempt to decode strings using the encoding mode set by
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
55 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
56 characters will cause an error message. Other modes include
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
57 'replace', which replaces unknown characters with a special
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
58 Unicode character, and 'ignore', which drops the character.
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
59 """
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
60 try:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
61 return s.decode(encoding, encodingmode).encode("utf-8")
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
62 except UnicodeDecodeError, inst:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
63 sub = s[max(0, inst.start-10):inst.start+10]
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
64 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
65 except LookupError, k:
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
66 raise error.Abort("%s, please check your locale settings" % k)
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
67
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
68 def colwidth(s):
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
69 "Find the column width of a UTF-8 string for display"
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
70 d = s.decode(encoding, 'replace')
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
71 if hasattr(unicodedata, 'east_asian_width'):
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
72 w = unicodedata.east_asian_width
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
73 return sum([w(c) in 'WF' and 2 or 1 for c in d])
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
74 return len(d)
496dbf12a6cb Traipse Alpha 'OpenRPG' {091030-00}
sirebral
parents:
diff changeset
75