view orpg/chat/chat_util.py @ 57:9014d7861bb3 traipse_dev

Removed some extra print messages from the server. Cleaned up the Manifest that was causing problems on linux. Might have been an errouneous push. Testing on WinXP
author sirebral
date Fri, 07 Aug 2009 23:21:37 -0500
parents 4385a7d0efd1
children 449a8900f9ac
line wrap: on
line source

# utility function; see Post() in chatwnd.py

import re
import string

#============================================
# simple_html_repair(string)
#
# Crude html/xml parser/verifier.
# Catches many mistyped and/or malformed
# html tags and prevents them from causing
# issues with the chat display (see chatwnd.py)
# DOES NOT catch misused but properly formated
#    html like <script> or <li> which are known
#    to cause issues with the chat display
#
# Created 04-25-2005 by Snowdog
#=============================================
def simple_html_repair(string):
    "Returns string with extra > symbols to isolate badly formated HTML"
    #walk though string checking positions of < and > tags.
    first_instance = string.find('<')
    if first_instance == -1: return string #no html, bail out.

    #strip string of an instances of ">>" and "<<" recursively
    #while (string.find(">>") != -1):string = string.replace(">>",">")
    while (string.find("<<") != -1):string = string.replace("<<","<")

    last_start = first_instance
    in_tag_flag = 1
    a = first_instance + 1
    while a < len(string):
        if string[a] == '<':
            if in_tag_flag == 1:
                #attempt to figure out best place to put missing >
                #search from last_start to current position
                at_front = 1
                for best_pos in range(last_start,a):
                    if (str(string[best_pos]).isspace())and (at_front == 0):
                        break
                    else:
                        at_front = 0
                        best_pos = best_pos + 1
                a = best_pos
                string = string[:a]+">"+string[a:]
                in_tag_flag = 0
                #jump back up one character to catch the last > and reset the in_tag_flag
                a = a - 1
            else:
                in_tag_flag = 1
                last_start = a

        if string[a] == '>':
            last_start = a #found a closing tag, move start of scan block up.
            in_tag_flag = 0
        if (a >= (len(string)-1))and(in_tag_flag == 1):
            #at end of string and need a closing tag marker
            string = string +">"
        a = a+1

    #strip string of an instances of "<>"
    string = string.replace("<>","")

    #sanity check. Count the < and > characters, if there arn't enough > chars
    #tack them on the end to avoid open-tag conditions
    diff = string.count('<') - string.count('>')
    if diff > 0:
        for d in range(1,diff):
            string = string+">"

    return string

def strip_unicode(txt):
    for i in xrange(len(txt)):
        if txt[i] not in string.printable:
            try:
                txt = txt.replace(txt[i], '&#' + str(ord(txt[i])) + ';')
            except:
                txt = txt.replace(txt[i], '{?}')
    return txt

#================================================
# strip_script_tags(string)
#
# removes all script tags (start and end)
# 04-26-2005 Snowdog
#================================================
def strip_script_tags(string):
    #kill the <script> issue
    p = re.compile( '<(\s*)(/*)[Ss][Cc][Rr][Ii][Pp][Tt](.*?)>')
    string =  p.sub( "<!-- script tag removed //-->", string)
    return string

#================================================
# strip_li_tags(string)
#
# removes all li tags (start and end)
# 05-13-2005
#================================================
def strip_li_tags(string):
    #kill the <li> issue
    string = re.sub( r'<(\s*)[Ll][Ii](.*?)>', r'<b><font color="#000000" size=+1>*</font></b>    ', string)
    string = re.sub( r'<(/*)[Ll][Ii](.*?)>', r'<br />', string)
    return string

#================================================
# strip_body_tags(string)
#
# removes all body tags (start and end) from messages
# should not break the setting of custom background colors
#   through legitimate means such as the OpenRPG settings.
# 07-27-2005 by mDuo13
#================================================
def strip_body_tags(string):
    bodytag_regex = re.compile(r"""<\/?body.*?>""", re.I)
    string = re.sub(bodytag_regex, "", string)
    return string

#================================================
# strip_misalignment_tags(string)
#
# removes the alignment aspect of <p> tags, since
# simply closing one doesn't actually fix the text
# alignment. (I'm assuming this is a bug in wxWindows'
# html parser.)
# However, closing <center> tags does
# return the text to its normal alignment, so this
# algorithm simply closes them, allowing them to be
# used legitimately without causing much annoyance.
# 07-27-2005 mDuo13
#================================================
def strip_misalignment_tags(string):
    alignment_regex = re.compile(r"""<p([^>]*?)align\s*=\s*('.*?'|".*?"|[^\s>]*)(.*?)>""", re.I)
    string = re.sub(alignment_regex, "<p\\1\\3>", string)

    center_regex = re.compile(r"""<center.*?>""", re.I)
    endcenter_regex = re.compile(r"""</center.*?>""", re.I)
    num_centertags = center_regex.findall(string)
    num_endcentertags = endcenter_regex.findall(string)
    if num_centertags > num_endcentertags:
        missing_tags = len(num_centertags) - len(num_endcentertags)
        string = string + missing_tags*"</center>"#yes, you can do this.
    return string

#================================================
# strip_img_tags(string)
#
# removes all img tags (start and end)
# 05-13-2005
# redone 07-11-2005 by mDuo13
#================================================
def strip_img_tags(string):
    #This is a Settings definable feature, Allowing users to enable or disable image display to fix the client crash due to large img posted to chat.
    #p = re.sub( r'<(\s*)(/*)[Ii][Mm][Gg][ ][Ss][Rr][Cc][=](.*?)>', r'<!-- img tag removed //--> <a href=\3>\3</a>', string)

    #this regex is substantially more powerful than the one above
    img_tag_regex = re.compile(r"""<img.*?src\s*?=\s*('.*?'|".*?"|[^\s>]*).*?>""", re.I)
    #this is what replaces the regex match. the \\1 refers to the URL from the previous string
    img_repl_str = "<a href=\\1>[img]</a>"

    #replaces all instances of images in the string with links
    p = re.sub(img_tag_regex, img_repl_str, string)
    return p