Mercurial > traipse_dev
comparison orpg/chat/chat_util.py @ 0:4385a7d0efd1 grumpy-goblin
Deleted and repushed it with the 'grumpy-goblin' branch. I forgot a y
author | sirebral |
---|---|
date | Tue, 14 Jul 2009 16:41:58 -0500 |
parents | |
children | 449a8900f9ac |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4385a7d0efd1 |
---|---|
1 # utility function; see Post() in chatwnd.py | |
2 | |
3 import re | |
4 import string | |
5 | |
6 #============================================ | |
7 # simple_html_repair(string) | |
8 # | |
9 # Crude html/xml parser/verifier. | |
10 # Catches many mistyped and/or malformed | |
11 # html tags and prevents them from causing | |
12 # issues with the chat display (see chatwnd.py) | |
13 # DOES NOT catch misused but properly formated | |
14 # html like <script> or <li> which are known | |
15 # to cause issues with the chat display | |
16 # | |
17 # Created 04-25-2005 by Snowdog | |
18 #============================================= | |
19 def simple_html_repair(string): | |
20 "Returns string with extra > symbols to isolate badly formated HTML" | |
21 #walk though string checking positions of < and > tags. | |
22 first_instance = string.find('<') | |
23 if first_instance == -1: return string #no html, bail out. | |
24 | |
25 #strip string of an instances of ">>" and "<<" recursively | |
26 #while (string.find(">>") != -1):string = string.replace(">>",">") | |
27 while (string.find("<<") != -1):string = string.replace("<<","<") | |
28 | |
29 last_start = first_instance | |
30 in_tag_flag = 1 | |
31 a = first_instance + 1 | |
32 while a < len(string): | |
33 if string[a] == '<': | |
34 if in_tag_flag == 1: | |
35 #attempt to figure out best place to put missing > | |
36 #search from last_start to current position | |
37 at_front = 1 | |
38 for best_pos in range(last_start,a): | |
39 if (str(string[best_pos]).isspace())and (at_front == 0): | |
40 break | |
41 else: | |
42 at_front = 0 | |
43 best_pos = best_pos + 1 | |
44 a = best_pos | |
45 string = string[:a]+">"+string[a:] | |
46 in_tag_flag = 0 | |
47 #jump back up one character to catch the last > and reset the in_tag_flag | |
48 a = a - 1 | |
49 else: | |
50 in_tag_flag = 1 | |
51 last_start = a | |
52 | |
53 if string[a] == '>': | |
54 last_start = a #found a closing tag, move start of scan block up. | |
55 in_tag_flag = 0 | |
56 if (a >= (len(string)-1))and(in_tag_flag == 1): | |
57 #at end of string and need a closing tag marker | |
58 string = string +">" | |
59 a = a+1 | |
60 | |
61 #strip string of an instances of "<>" | |
62 string = string.replace("<>","") | |
63 | |
64 #sanity check. Count the < and > characters, if there arn't enough > chars | |
65 #tack them on the end to avoid open-tag conditions | |
66 diff = string.count('<') - string.count('>') | |
67 if diff > 0: | |
68 for d in range(1,diff): | |
69 string = string+">" | |
70 | |
71 return string | |
72 | |
73 def strip_unicode(txt): | |
74 for i in xrange(len(txt)): | |
75 if txt[i] not in string.printable: | |
76 try: | |
77 txt = txt.replace(txt[i], '&#' + str(ord(txt[i])) + ';') | |
78 except: | |
79 txt = txt.replace(txt[i], '{?}') | |
80 return txt | |
81 | |
82 #================================================ | |
83 # strip_script_tags(string) | |
84 # | |
85 # removes all script tags (start and end) | |
86 # 04-26-2005 Snowdog | |
87 #================================================ | |
88 def strip_script_tags(string): | |
89 #kill the <script> issue | |
90 p = re.compile( '<(\s*)(/*)[Ss][Cc][Rr][Ii][Pp][Tt](.*?)>') | |
91 string = p.sub( "<!-- script tag removed //-->", string) | |
92 return string | |
93 | |
94 #================================================ | |
95 # strip_li_tags(string) | |
96 # | |
97 # removes all li tags (start and end) | |
98 # 05-13-2005 | |
99 #================================================ | |
100 def strip_li_tags(string): | |
101 #kill the <li> issue | |
102 string = re.sub( r'<(\s*)[Ll][Ii](.*?)>', r'<b><font color="#000000" size=+1>*</font></b> ', string) | |
103 string = re.sub( r'<(/*)[Ll][Ii](.*?)>', r'<br />', string) | |
104 return string | |
105 | |
106 #================================================ | |
107 # strip_body_tags(string) | |
108 # | |
109 # removes all body tags (start and end) from messages | |
110 # should not break the setting of custom background colors | |
111 # through legitimate means such as the OpenRPG settings. | |
112 # 07-27-2005 by mDuo13 | |
113 #================================================ | |
114 def strip_body_tags(string): | |
115 bodytag_regex = re.compile(r"""<\/?body.*?>""", re.I) | |
116 string = re.sub(bodytag_regex, "", string) | |
117 return string | |
118 | |
119 #================================================ | |
120 # strip_misalignment_tags(string) | |
121 # | |
122 # removes the alignment aspect of <p> tags, since | |
123 # simply closing one doesn't actually fix the text | |
124 # alignment. (I'm assuming this is a bug in wxWindows' | |
125 # html parser.) | |
126 # However, closing <center> tags does | |
127 # return the text to its normal alignment, so this | |
128 # algorithm simply closes them, allowing them to be | |
129 # used legitimately without causing much annoyance. | |
130 # 07-27-2005 mDuo13 | |
131 #================================================ | |
132 def strip_misalignment_tags(string): | |
133 alignment_regex = re.compile(r"""<p([^>]*?)align\s*=\s*('.*?'|".*?"|[^\s>]*)(.*?)>""", re.I) | |
134 string = re.sub(alignment_regex, "<p\\1\\3>", string) | |
135 | |
136 center_regex = re.compile(r"""<center.*?>""", re.I) | |
137 endcenter_regex = re.compile(r"""</center.*?>""", re.I) | |
138 num_centertags = center_regex.findall(string) | |
139 num_endcentertags = endcenter_regex.findall(string) | |
140 if num_centertags > num_endcentertags: | |
141 missing_tags = len(num_centertags) - len(num_endcentertags) | |
142 string = string + missing_tags*"</center>"#yes, you can do this. | |
143 return string | |
144 | |
145 #================================================ | |
146 # strip_img_tags(string) | |
147 # | |
148 # removes all img tags (start and end) | |
149 # 05-13-2005 | |
150 # redone 07-11-2005 by mDuo13 | |
151 #================================================ | |
152 def strip_img_tags(string): | |
153 #This is a Settings definable feature, Allowing users to enable or disable image display to fix the client crash due to large img posted to chat. | |
154 #p = re.sub( r'<(\s*)(/*)[Ii][Mm][Gg][ ][Ss][Rr][Cc][=](.*?)>', r'<!-- img tag removed //--> <a href=\3>\3</a>', string) | |
155 | |
156 #this regex is substantially more powerful than the one above | |
157 img_tag_regex = re.compile(r"""<img.*?src\s*?=\s*('.*?'|".*?"|[^\s>]*).*?>""", re.I) | |
158 #this is what replaces the regex match. the \\1 refers to the URL from the previous string | |
159 img_repl_str = "<a href=\\1>[img]</a>" | |
160 | |
161 #replaces all instances of images in the string with links | |
162 p = re.sub(img_tag_regex, img_repl_str, string) | |
163 return p |