master
inpos 2017-12-26 17:20:10 +03:00
parent 5695220377
commit c280ab3d63
1 changed files with 3 additions and 7 deletions

View File

@ -74,17 +74,13 @@ def ctype(c_t):
def htfile_tounicode(htfile): def htfile_tounicode(htfile):
if type(htfile) is str: if type(htfile) is str:
return htfile return htfile
html = etree.HTML(htfile) ct = etree.HTML(htfile).xpath('//meta/@http-equiv')
ct = html.xpath('//meta/@http-equiv')
enc = detect(htfile)['encoding'] enc = detect(htfile)['encoding']
if ct != []: if ct != []:
meta_elem = ct[0].getparent() c_t = ct[0].getparent().attrib['content']
c_t = meta_elem.attrib['content']
if 'charset' in c_t: if 'charset' in c_t:
enc = c_t.split('charset')[1].strip().split('=')[1].strip().split(' ')[0] enc = c_t.split('charset')[1].strip().split('=')[1].strip().split(' ')[0]
meta_elem.attrib['content'] = re.sub('charset=[^;" ]+', '', meta_elem.attrib['content']) return str(htfile, enc, 'ignore')
return str(etree.tounicode(html, method='html', pretty_print = True), enc, 'ignore')
def getbin(sess, hhash): def getbin(sess, hhash):
cur = sess.db.cursor() cur = sess.db.cursor()