comparison orpg/chat/chat_util.py @ 0:4385a7d0efd1 grumpy-goblin

Deleted and repushed it with the 'grumpy-goblin' branch. I forgot a y
author sirebral
date Tue, 14 Jul 2009 16:41:58 -0500
parents
children 449a8900f9ac
comparison
equal deleted inserted replaced
-1:000000000000 0:4385a7d0efd1
1 # utility function; see Post() in chatwnd.py
2
3 import re
4 import string
5
6 #============================================
7 # simple_html_repair(string)
8 #
9 # Crude html/xml parser/verifier.
10 # Catches many mistyped and/or malformed
11 # html tags and prevents them from causing
12 # issues with the chat display (see chatwnd.py)
13 # DOES NOT catch misused but properly formated
14 # html like <script> or <li> which are known
15 # to cause issues with the chat display
16 #
17 # Created 04-25-2005 by Snowdog
18 #=============================================
19 def simple_html_repair(string):
20 "Returns string with extra > symbols to isolate badly formated HTML"
21 #walk though string checking positions of < and > tags.
22 first_instance = string.find('<')
23 if first_instance == -1: return string #no html, bail out.
24
25 #strip string of an instances of ">>" and "<<" recursively
26 #while (string.find(">>") != -1):string = string.replace(">>",">")
27 while (string.find("<<") != -1):string = string.replace("<<","<")
28
29 last_start = first_instance
30 in_tag_flag = 1
31 a = first_instance + 1
32 while a < len(string):
33 if string[a] == '<':
34 if in_tag_flag == 1:
35 #attempt to figure out best place to put missing >
36 #search from last_start to current position
37 at_front = 1
38 for best_pos in range(last_start,a):
39 if (str(string[best_pos]).isspace())and (at_front == 0):
40 break
41 else:
42 at_front = 0
43 best_pos = best_pos + 1
44 a = best_pos
45 string = string[:a]+">"+string[a:]
46 in_tag_flag = 0
47 #jump back up one character to catch the last > and reset the in_tag_flag
48 a = a - 1
49 else:
50 in_tag_flag = 1
51 last_start = a
52
53 if string[a] == '>':
54 last_start = a #found a closing tag, move start of scan block up.
55 in_tag_flag = 0
56 if (a >= (len(string)-1))and(in_tag_flag == 1):
57 #at end of string and need a closing tag marker
58 string = string +">"
59 a = a+1
60
61 #strip string of an instances of "<>"
62 string = string.replace("<>","")
63
64 #sanity check. Count the < and > characters, if there arn't enough > chars
65 #tack them on the end to avoid open-tag conditions
66 diff = string.count('<') - string.count('>')
67 if diff > 0:
68 for d in range(1,diff):
69 string = string+">"
70
71 return string
72
73 def strip_unicode(txt):
74 for i in xrange(len(txt)):
75 if txt[i] not in string.printable:
76 try:
77 txt = txt.replace(txt[i], '&#' + str(ord(txt[i])) + ';')
78 except:
79 txt = txt.replace(txt[i], '{?}')
80 return txt
81
82 #================================================
83 # strip_script_tags(string)
84 #
85 # removes all script tags (start and end)
86 # 04-26-2005 Snowdog
87 #================================================
88 def strip_script_tags(string):
89 #kill the <script> issue
90 p = re.compile( '<(\s*)(/*)[Ss][Cc][Rr][Ii][Pp][Tt](.*?)>')
91 string = p.sub( "<!-- script tag removed //-->", string)
92 return string
93
94 #================================================
95 # strip_li_tags(string)
96 #
97 # removes all li tags (start and end)
98 # 05-13-2005
99 #================================================
100 def strip_li_tags(string):
101 #kill the <li> issue
102 string = re.sub( r'<(\s*)[Ll][Ii](.*?)>', r'<b><font color="#000000" size=+1>*</font></b> ', string)
103 string = re.sub( r'<(/*)[Ll][Ii](.*?)>', r'<br />', string)
104 return string
105
106 #================================================
107 # strip_body_tags(string)
108 #
109 # removes all body tags (start and end) from messages
110 # should not break the setting of custom background colors
111 # through legitimate means such as the OpenRPG settings.
112 # 07-27-2005 by mDuo13
113 #================================================
114 def strip_body_tags(string):
115 bodytag_regex = re.compile(r"""<\/?body.*?>""", re.I)
116 string = re.sub(bodytag_regex, "", string)
117 return string
118
119 #================================================
120 # strip_misalignment_tags(string)
121 #
122 # removes the alignment aspect of <p> tags, since
123 # simply closing one doesn't actually fix the text
124 # alignment. (I'm assuming this is a bug in wxWindows'
125 # html parser.)
126 # However, closing <center> tags does
127 # return the text to its normal alignment, so this
128 # algorithm simply closes them, allowing them to be
129 # used legitimately without causing much annoyance.
130 # 07-27-2005 mDuo13
131 #================================================
132 def strip_misalignment_tags(string):
133 alignment_regex = re.compile(r"""<p([^>]*?)align\s*=\s*('.*?'|".*?"|[^\s>]*)(.*?)>""", re.I)
134 string = re.sub(alignment_regex, "<p\\1\\3>", string)
135
136 center_regex = re.compile(r"""<center.*?>""", re.I)
137 endcenter_regex = re.compile(r"""</center.*?>""", re.I)
138 num_centertags = center_regex.findall(string)
139 num_endcentertags = endcenter_regex.findall(string)
140 if num_centertags > num_endcentertags:
141 missing_tags = len(num_centertags) - len(num_endcentertags)
142 string = string + missing_tags*"</center>"#yes, you can do this.
143 return string
144
145 #================================================
146 # strip_img_tags(string)
147 #
148 # removes all img tags (start and end)
149 # 05-13-2005
150 # redone 07-11-2005 by mDuo13
151 #================================================
152 def strip_img_tags(string):
153 #This is a Settings definable feature, Allowing users to enable or disable image display to fix the client crash due to large img posted to chat.
154 #p = re.sub( r'<(\s*)(/*)[Ii][Mm][Gg][ ][Ss][Rr][Cc][=](.*?)>', r'<!-- img tag removed //--> <a href=\3>\3</a>', string)
155
156 #this regex is substantially more powerful than the one above
157 img_tag_regex = re.compile(r"""<img.*?src\s*?=\s*('.*?'|".*?"|[^\s>]*).*?>""", re.I)
158 #this is what replaces the regex match. the \\1 refers to the URL from the previous string
159 img_repl_str = "<a href=\\1>[img]</a>"
160
161 #replaces all instances of images in the string with links
162 p = re.sub(img_tag_regex, img_repl_str, string)
163 return p