28
|
1 # utility function; see Post() in chatwnd.py
|
|
2
|
|
3 import re
|
18
|
4 import string
|
|
5 from orpg.orpgCore import *
|
28
|
6 from orpg.tools.orpg_log import logger, debug
|
|
7 from orpg.tools.decorators import debugging
|
|
8
|
|
9 #============================================
|
|
10 # simple_html_repair(string)
|
|
11 #
|
|
12 # Crude html/xml parser/verifier.
|
|
13 # Catches many mistyped and/or malformed
|
|
14 # html tags and prevents them from causing
|
|
15 # issues with the chat display (see chatwnd.py)
|
|
16 # DOES NOT catch misused but properly formated
|
|
17 # html like <script> or <li> which are known
|
|
18 # to cause issues with the chat display
|
|
19 #
|
|
20 # Created 04-25-2005 by Snowdog
|
18
|
21 #=============================================
|
28
|
22
|
18
|
23 def simple_html_repair(string):
|
28
|
24 "Returns string with extra > symbols to isolate badly formated HTML"
|
|
25 #walk though string checking positions of < and > tags.
|
|
26 first_instance = string.find('<')
|
|
27 if first_instance == -1: return string #no html, bail out.
|
|
28
|
|
29 #strip string of an instances of ">>" and "<<" recursively
|
|
30 #while (string.find(">>") != -1):string = string.replace(">>",">")
|
|
31 while (string.find("<<") != -1):string = string.replace("<<","<")
|
|
32
|
|
33 last_start = first_instance
|
|
34 in_tag_flag = 1
|
|
35 a = first_instance + 1
|
|
36 while a < len(string):
|
|
37 if string[a] == '<':
|
|
38 if in_tag_flag == 1:
|
|
39 #attempt to figure out best place to put missing >
|
|
40 #search from last_start to current position
|
|
41 at_front = 1
|
|
42 for best_pos in range(last_start,a):
|
|
43 if (str(string[best_pos]).isspace())and (at_front == 0):
|
|
44 break
|
|
45 else:
|
|
46 at_front = 0
|
|
47 best_pos = best_pos + 1
|
|
48 a = best_pos
|
|
49 string = string[:a]+">"+string[a:]
|
|
50 in_tag_flag = 0
|
|
51 #jump back up one character to catch the last > and reset the in_tag_flag
|
|
52 a = a - 1
|
|
53 else:
|
|
54 in_tag_flag = 1
|
|
55 last_start = a
|
|
56
|
|
57 if string[a] == '>':
|
|
58 last_start = a #found a closing tag, move start of scan block up.
|
|
59 in_tag_flag = 0
|
|
60 if (a >= (len(string)-1))and(in_tag_flag == 1):
|
|
61 #at end of string and need a closing tag marker
|
|
62 string = string +">"
|
|
63 a = a+1
|
|
64
|
|
65 #strip string of an instances of "<>"
|
|
66 string = string.replace("<>","")
|
|
67
|
|
68 #sanity check. Count the < and > characters, if there arn't enough > chars
|
|
69 #tack them on the end to avoid open-tag conditions
|
|
70 diff = string.count('<') - string.count('>')
|
|
71 if diff > 0:
|
|
72 for d in range(1,diff):
|
|
73 string = string+">"
|
18
|
74 return string
|
28
|
75
|
|
76 # Depricated! Might as well use the already made component.get('xml')
|
18
|
77 def strip_unicode(txt):
|
28
|
78 for i in xrange(len(txt)):
|
|
79 if txt[i] not in string.printable:
|
|
80 try:
|
|
81 txt = txt.replace(txt[i], '&#' + str(ord(txt[i])) + ';')
|
|
82 except:
|
|
83 txt = txt.replace(txt[i], '{?}')
|
18
|
84 return txt
|
28
|
85
|
|
86 #================================================
|
|
87 # strip_script_tags(string)
|
|
88 #
|
|
89 # removes all script tags (start and end)
|
|
90 # 04-26-2005 Snowdog
|
|
91 #================================================
|
|
92
|
|
93 def strip_script_tags(string):
|
|
94 #kill the <script> issue
|
|
95 p = re.compile( '<(\s*)(/*)[Ss][Cc][Rr][Ii][Pp][Tt](.*?)>')
|
|
96 string = p.sub( "<!-- script tag removed //-->", string)
|
|
97 return string
|
|
98
|
|
99 #================================================
|
|
100 # strip_li_tags(string)
|
|
101 #
|
|
102 # removes all li tags (start and end)
|
|
103 # 05-13-2005
|
|
104 #================================================
|
|
105
|
|
106 def strip_li_tags(string):
|
|
107 #kill the <li> issue
|
|
108 string = re.sub( r'<(\s*)[Ll][Ii](.*?)>', r'<b><font color="#000000" size=+1>*</font></b> ', string)
|
|
109 string = re.sub( r'<(/*)[Ll][Ii](.*?)>', r'<br />', string)
|
|
110 return string
|
|
111
|
|
112 #================================================
|
|
113 # strip_body_tags(string)
|
|
114 #
|
|
115 # removes all body tags (start and end) from messages
|
|
116 # should not break the setting of custom background colors
|
|
117 # through legitimate means such as the OpenRPG settings.
|
|
118 # 07-27-2005 by mDuo13
|
|
119 #================================================
|
|
120
|
|
121 def strip_body_tags(string):
|
|
122 bodytag_regex = re.compile(r"""<\/?body.*?>""", re.I)
|
|
123 string = re.sub(bodytag_regex, "", string)
|
|
124 return string
|
|
125
|
18
|
126 #================================================
|
28
|
127 # strip_misalignment_tags(string)
|
|
128 #
|
|
129 # removes the alignment aspect of <p> tags, since
|
|
130 # simply closing one doesn't actually fix the text
|
|
131 # alignment. (I'm assuming this is a bug in wxWindows'
|
|
132 # html parser.)
|
|
133 # However, closing <center> tags does
|
|
134 # return the text to its normal alignment, so this
|
|
135 # algorithm simply closes them, allowing them to be
|
|
136 # used legitimately without causing much annoyance.
|
|
137 # 07-27-2005 mDuo13
|
18
|
138 #================================================
|
28
|
139
|
|
140 def strip_misalignment_tags(string):
|
|
141 alignment_regex = re.compile(r"""<p([^>]*?)align\s*=\s*('.*?'|".*?"|[^\s>]*)(.*?)>""", re.I)
|
|
142 string = re.sub(alignment_regex, "<p\\1\\3>", string)
|
|
143
|
|
144 center_regex = re.compile(r"""<center.*?>""", re.I)
|
|
145 endcenter_regex = re.compile(r"""</center.*?>""", re.I)
|
|
146 num_centertags = center_regex.findall(string)
|
|
147 num_endcentertags = endcenter_regex.findall(string)
|
|
148 if num_centertags > num_endcentertags:
|
|
149 missing_tags = len(num_centertags) - len(num_endcentertags)
|
|
150 string = string + missing_tags*"</center>"#yes, you can do this.
|
|
151 return string
|
|
152
|
18
|
153 #================================================
|
28
|
154 # strip_img_tags(string)
|
|
155 #
|
|
156 # removes all img tags (start and end)
|
|
157 # 05-13-2005
|
|
158 # redone 07-11-2005 by mDuo13
|
|
159 #================================================
|
|
160
|
|
161 def strip_img_tags(string):
|
|
162 #This is a Settings definable feature, Allowing users to enable or disable image display to fix the client crash due to large img posted to chat.
|
|
163 #p = re.sub( r'<(\s*)(/*)[Ii][Mm][Gg][ ][Ss][Rr][Cc][=](.*?)>', r'<!-- img tag removed //--> <a href=\3>\3</a>', string)
|
|
164
|
|
165 #this regex is substantially more powerful than the one above
|
|
166 img_tag_regex = re.compile(r"""<img.*?src\s*?=\s*('.*?'|".*?"|[^\s>]*).*?>""", re.I)
|
|
167 #this is what replaces the regex match. the \\1 refers to the URL from the previous string
|
|
168 img_repl_str = "<a href=\\1>[img]</a>"
|
|
169
|
|
170 #replaces all instances of images in the string with links
|
|
171 p = re.sub(img_tag_regex, img_repl_str, string)
|
|
172 return p
|