import sys, re
from lxml import etree as E
fn = r"D:/install/oracle/B19306_01/server.102/b14219/e0.htm"
#fn = r"D:/install/oracle/B19306_01/server.102/b14219/e12700.htm"
def parse(fn):
parser = E.XMLParser(remove_blank_text=True)
x = E.parse(open(fn, "rb"))
root = x.getroot()
reg1 = re.compile('''(.*)''')
reg2 = re.compile('''(.*)(.*)''', re.M)
reg3 = re.compile('''(.*)(.*)''', re.M)
regS = re.compile('''([^<>]*)([a-zA-Z0-9]*)(.*)$''', re.M)
def clear_span(s):
return s
for el in x.getiterator():
try:
if el.attrib["class"] == "msgentry":
msgerror = ""
msgexpl = ""
msgaction = ""
for inner in el.getiterator():
try:
if inner.tag.find("span") and inner.attrib["class"] == "msg":
s = E.tostring(inner)
m = reg1.match(s)
if m:
msgerror = clear_span(m.group(1))
elif inner.tag.find("div") and inner.attrib["class"] == "msgexplankw":
s = E.tostring(inner)
m = reg2.match(s)
if m:
msgexpl = clear_span(m.group(2))
elif inner.tag.find("div") and inner.attrib["class"] == "msgactionkw":
s = E.tostring(inner)
m = reg3.match(s)
if m:
msgaction = clear_span(m.group(2))
print msgerror, '\t', msgexpl, '\t', msgaction
break
except Exception, e:
#print e
pass
except Exception, e:
pass
parse(fn)