riper fix
parent
c23b4b6281
commit
02df953ca1
|
@ -28,6 +28,7 @@ import gzip
|
||||||
import HTMLParser
|
import HTMLParser
|
||||||
|
|
||||||
import Localization
|
import Localization
|
||||||
|
from functions import log, debug
|
||||||
|
|
||||||
|
|
||||||
class Content:
|
class Content:
|
||||||
|
@ -269,3 +270,9 @@ class Content:
|
||||||
size = str(long(sizeBytes)) + 'B'
|
size = str(long(sizeBytes)) + 'B'
|
||||||
|
|
||||||
return size
|
return size
|
||||||
|
|
||||||
|
def log(self, msg):
|
||||||
|
log(msg)
|
||||||
|
|
||||||
|
def debug(self, msg):
|
||||||
|
debug(msg)
|
|
@ -18,18 +18,13 @@
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import Content
|
import Content, re
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class RiperAM(Content.Content):
|
class RiperAM(Content.Content):
|
||||||
category_dict = {
|
category_dict = {
|
||||||
#'movies':('Movies', '/popular/'),
|
|
||||||
#'tvshows':('TV Shows', '/top/serial/list/'),
|
|
||||||
#'cartoons':('Cartoons', '/top/id_genre/14/'),
|
|
||||||
#'anime':('Anime', '/search/title?count=100&genres=animation&keywords=anime&num_votes=1000,&explore=title_type&ref_=gnr_kw_an'),
|
|
||||||
'hot': ('Most Recent', '/', {'page': '/portal.php?tp=%d', 'increase': 30, 'second_page': 30}),
|
'hot': ('Most Recent', '/', {'page': '/portal.php?tp=%d', 'increase': 30, 'second_page': 30}),
|
||||||
#'top':('Top 250 Movies', '/top/'),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
baseurl = "http://www.riper.am"
|
baseurl = "http://www.riper.am"
|
||||||
|
@ -64,38 +59,40 @@ class RiperAM(Content.Content):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_contentList(self, category, subcategory=None, apps_property=None):
|
def get_contentList(self, category, subcategory=None, apps_property=None):
|
||||||
|
#self.debug=self.log
|
||||||
contentList = []
|
contentList = []
|
||||||
url = self.get_url(category, subcategory, apps_property)
|
url = self.get_url(category, subcategory, apps_property)
|
||||||
|
|
||||||
response = self.makeRequest(url, headers=self.headers)
|
response = self.makeRequest(url, headers=self.headers)
|
||||||
|
|
||||||
if None != response and 0 < len(response):
|
if None != response and 0 < len(response):
|
||||||
#print response
|
response=response.decode('utf-8')
|
||||||
|
#self.debug(str(response))
|
||||||
if category in ['hot']:
|
if category in ['hot']:
|
||||||
contentList = self.popmode(response)
|
contentList = self.popmode(response)
|
||||||
#print str(contentList)
|
self.debug('[get_contentList] contentList '+str(contentList))
|
||||||
return contentList
|
return contentList
|
||||||
|
|
||||||
def popmode(self, response):
|
def popmode(self, response):
|
||||||
contentList = []
|
contentList = []
|
||||||
Soup = BeautifulSoup(response)
|
|
||||||
result = Soup.findAll('table', 'postbody postbody_portal')
|
|
||||||
#print str(result)
|
|
||||||
num = 31
|
num = 31
|
||||||
bad_forum = [u'Безопасность', u'Книги и журналы', u'Action & Shooter', u'RPG/MMORPG']
|
bad_forum = [u'Безопасность', u'Книги и журналы', u'Action & Shooter', u'RPG/MMORPG', u'Книги', u'Журналы']
|
||||||
for tr in result:
|
|
||||||
#main
|
regex = u'<table class="postbody postbody_portal"(.+?)</table>'
|
||||||
|
regex_tr = u'''<img height="200" src="(.+?)".+?></a>.+?<h4 class="first"><a href="(.+?)" title=".+?"><strong>(.+?)</strong></a></h4></div>.+?<div style="height:20px;overflow:hidden;">.+?<a href=".+?">(.+?)</a>'''
|
||||||
|
for tr in re.compile(regex, re.DOTALL).findall(response):
|
||||||
|
|
||||||
|
result=re.compile(regex_tr, re.DOTALL).findall(tr)
|
||||||
|
self.debug(tr+' -> '+str(result))
|
||||||
|
if result:
|
||||||
|
(img, link, label, forum)=result[0]
|
||||||
info = {}
|
info = {}
|
||||||
forum = tr.find('div', {'style': 'height:20px;overflow:hidden;'}).find('a').text
|
|
||||||
if forum and forum in bad_forum:
|
if forum and forum in bad_forum:
|
||||||
continue
|
continue
|
||||||
link = tr.find('div', {'style': 'width:200px;overflow:hidden;'}).find('a').get('href')
|
|
||||||
num = num - 1
|
num = num - 1
|
||||||
label = tr.find('strong').text
|
|
||||||
original_title = None
|
original_title = None
|
||||||
year = 0
|
year = 0
|
||||||
title = self.unescape(label)
|
title = self.unescape(label)
|
||||||
img = tr.findAll('a')[0].find('img').get('src')
|
|
||||||
if img:
|
if img:
|
||||||
img = img.replace('.webp', '.jpg')
|
img = img.replace('.webp', '.jpg')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue