plugin.video.torrenter/resources/contenters/KinoPoisk.py

321 lines
13 KiB
Python
Raw Normal View History

2015-01-09 14:11:21 +03:00
# -*- coding: utf-8 -*-
'''
Torrenter plugin for XBMC
Copyright (C) 2012 Vadim Skorba
vadim.skorba@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
'''
import re
import socket
2016-05-14 12:04:25 +03:00
from datetime import date
2015-01-09 14:11:21 +03:00
import Content
from BeautifulSoup import BeautifulSoup
class KinoPoisk(Content.Content):
category_dict = {
'tvshows': ('TV Shows', '/top/serial/list/'),
'cartoons': ('Cartoons', '/top/id_genre/14/'),
'search': ('[B]Search[/B]', '/s/type/film/list/1/find/%s/'),
2015-01-28 22:53:45 +03:00
'movies': ('Movies', '/s/type/film/list/1/m_act[country]/1/m_act[type]/film/'),
2015-01-09 14:11:21 +03:00
'rus_movies': ('Russian Movies', '/s/type/film/list/1/m_act[country]/2/m_act[type]/film/'),
'anime': ('Anime', '/s/type/film/list/1/order/rating/m_act[genre][0]/1750/',),
2015-01-28 22:53:45 +03:00
'hot': ('Most Recent', '/popular/'),
2015-01-09 14:11:21 +03:00
'top': ('Top 250 Movies', '/top/'),
'genre': {'genre': 'by Genre',
'russia': ('Russia & USSR', '/top/rus/list/'),
'biography': ('Biography', '/s/type/film/list/1/m_act[genre][0]/22/'),
'action': ('Action', '/top/id_genre/3/'),
'thriller': ('Thriller', '/top/id_genre/4/'),
'comedy': ('Comedy', '/top/id_genre/6/'),
'drama': ('Drama', '/top/id_genre/8/'),
'romance': ('Romance', '/top/id_genre/7/'),
'horror': ('Horror', '/top/id_genre/1/'),
'sci_fi': ('Sci-Fi', '/top/id_genre/2/'),
'documentary': ('Documentary', '/top/id_genre/12/'),
'cartoonseries': ('Cartoons Series', '/top/mult_serial/list/'),
'cartoonshort': ('Cartoons Short', '/top/short_mult/list/'),
'short': ('Short', '/top/short/list/'),
'male': ('Male', '/top/sex/male/'),
'female': ('Female', '/top/sex/female/'),
}
}
for category in category_dict.keys():
if isinstance(category_dict.get(category), dict):
for subcategory in category_dict.get(category).keys():
if subcategory != category:
x = category_dict[category][subcategory]
if x[1].startswith('/s/type/film/list/'):
category_dict[category][subcategory] = (x[0], x[1] + 'perpage/25/',
{'page': x[1] + 'perpage/25/page/%d/', 'increase': 1,
'second_page': 2})
if not isinstance(category_dict.get(category), dict):
x = category_dict[category]
if x[1].startswith('/s/type/film/list/'):
category_dict[category] = (
x[0], x[1] + 'perpage/25/', {'page': x[1] + 'perpage/25/page/%d/', 'increase': 1, 'second_page': 2})
category_dict['year'] = {'year': 'by Year', }
2016-05-14 12:04:25 +03:00
for y in range(date.today().year, 1970, -1):
2015-01-09 14:11:21 +03:00
category_dict['year'][str(y)] = (str(y), '/s/type/film/list/1/m_act[year]/%s/' % str(y) + 'perpage/25/',
{'page': '/s/type/film/list/1/m_act[year]/%s/' % str(y) + 'perpage/25/page/%d/',
'increase': 1, 'second_page': 2})
baseurl = "http://www.kinopoisk.ru"
headers = [('User-Agent',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124' + \
' YaBrowser/14.10.2062.12061 Safari/537.36'),
('Referer', baseurl), ('Accept-Encoding', 'gzip'), ('Accept-Language', 'ru,en;q=0.8')]
'''
Weight of source with this searcher provided.
Will be multiplied on default weight.
Default weight is seeds number
'''
sourceWeight = 2
2015-07-15 19:14:22 +03:00
def isTracker(self):
return False
def isSearcher(self):
2015-01-09 14:11:21 +03:00
return False
def isPages(self):
return True
def isSearchOption(self):
return True
def isScrappable(self):
return True
2015-01-28 22:53:45 +03:00
def get_contentList(self, category, subcategory=None, apps_property=None):
2016-12-26 23:36:50 +03:00
#self.debug=self.log
2015-01-09 14:11:21 +03:00
socket.setdefaulttimeout(15)
contentList = []
2015-01-28 22:53:45 +03:00
url = self.get_url(category, subcategory, apps_property)
2015-01-09 14:11:21 +03:00
2016-12-26 23:36:50 +03:00
self.debug('get_contentList: url = '+url)
2015-01-09 14:11:21 +03:00
response = self.makeRequest(url, headers=self.headers)
if None != response and 0 < len(response):
2016-12-26 23:36:50 +03:00
self.debug(str(response))
2015-01-09 14:11:21 +03:00
if category in ['hot']:
contentList = self.popmode(response)
elif url.startswith(self.baseurl + '/s/type/film/list/'):
contentList = self.infomode(response)
else:
contentList = self.topmode(response)
2016-12-26 23:36:50 +03:00
self.debug('get_contentList: contentList = '+str(contentList))
2015-01-09 14:11:21 +03:00
return contentList
def stripTtl(self, title):
bad_end = [u'\(ТВ\)', u'\(сериал\)', u'\(видео\)']
for code in bad_end:
title = re.sub(u' ' + code + '$', '', title)
return title
def popmode(self, response):
contentList = []
Soup = BeautifulSoup(response)
result = Soup.find('div', 'stat').findAll('div', 'el')
2016-12-26 23:36:50 +03:00
self.debug('popmode: '+str(result))
2015-01-09 14:11:21 +03:00
for tr in result:
#main
a = tr.findAll('a')
num = a[0].text
2016-12-26 23:36:50 +03:00
2015-01-09 14:11:21 +03:00
info = {}
year = 0
img = ''
originaltitle = tr.find('i')
if originaltitle:
originaltitle = self.stripHtml(self.unescape(originaltitle.text))
title = a[1].text
link = a[1].get('href')
if link:
id = re.compile('/film/(\d+)/').findall(link)
if id:
img = self.id2img(id[0])
try:
title, year = re.compile('(.+?) \((\d\d\d\d)\)', re.DOTALL).findall(a[1].text)[0]
2016-12-26 23:36:50 +03:00
#self.log('popmode 1'+str((title, year)))
2015-01-09 14:11:21 +03:00
except:
pass
if not year:
try:
title, year = re.compile('(.+?) \(.*(\d\d\d\d)').findall(a[1].text)[0]
info['tvshowtitle'] = title
2016-12-26 23:36:50 +03:00
#self.log('popmode 2' + str((title, year)))
2015-01-09 14:11:21 +03:00
except:
pass
title = self.stripHtml(self.stripTtl(title))
#info
info['title'] = title
info['year'] = int(year)
contentList.append((
int(int(self.sourceWeight) * (201 - int(num))),
originaltitle, title, int(year), img, info,
))
return contentList
def topmode(self, response):
contentList = []
Soup = BeautifulSoup(response)
result = Soup.find('table', {'cellpadding': '3'}).findAll('tr')[2:]
2016-12-26 23:36:50 +03:00
self.debug('topmode: ' + str(result))
2015-01-09 14:11:21 +03:00
for tr in result:
#main
td = tr.findAll('td')
year = 0
info = {}
img = ''
title, originaltitle = None, None
num = td[0].text.rstrip('.')
originaltitle = tr.find('span', 'text-grey')
if originaltitle:
originaltitle = self.stripHtml(self.unescape(originaltitle.text))
a_all = tr.find('a', {'class': 'all'})
if a_all:
link = a_all.get('href')
if link:
id = re.compile('/film/(\d+)/').findall(link)
if id:
img = self.id2img(id[0])
year = re.compile('(.+) \((\d\d\d\d)\)').findall(a_all.text)
if not year:
try:
2016-12-26 23:36:50 +03:00
match = re.search(r"(.+) \((\d\d\d\d) &ndash;|(.+) \(.*(\d\d\d\d)", a_all.text,
re.IGNORECASE | re.MULTILINE)
if match:
title = match.group(1)
year = match.group(2)
info['tvshowtitle'] = title
2015-01-09 14:11:21 +03:00
except:
title = a_all.text
else:
title, year = year[0]
title = self.stripHtml(self.stripTtl(title))
#info
if originaltitle and not title:
title = originaltitle
originaltitle = None
if title:
info['title'] = title
info['year'] = int(year)
contentList.append((
int(int(self.sourceWeight) * (251 - int(num))),
originaltitle, title, int(year), img, info,
))
return contentList
def infomode(self, response):
contentList = []
Soup = BeautifulSoup(response)
result = Soup.findAll('div', 'info')
#print str(result)
num = 0
for div in result:
#main
info = {}
img = ''
name = div.find('p', 'name')
title = name.find('a').text
link = name.find('a').get('href')
if link:
id = re.compile('/film/(\d+)/').findall(link)
if id:
img = self.id2img(id[0])
year = name.find('span', 'year') if name.find('span', 'year') else 0
if year:
year=year.text
ysplit = year.split(' ')
if len(ysplit) > 1: year = ysplit[0]
title = self.stripHtml(self.unescape(title))
tvshowtitle = re.compile(u'(.+?) \((.+?)\)$').findall(title)
if tvshowtitle and tvshowtitle[0][1] in [u'сериал']:
title = tvshowtitle[0][0]
info['tvshowtitle'] = title
num = num + 1
originaltitle = div.find('span', 'gray')
if originaltitle:
originaltitle = re.match('(.+?), \d', originaltitle.text)
if originaltitle:
originaltitle = self.stripHtml(self.unescape(originaltitle.group(1)))
title = self.stripTtl(title)
#info
info['title'] = title
info['year'] = int(year)
contentList.append((
int(int(self.sourceWeight) * (100 - int(num))),
originaltitle, title, int(year), img, info,
))
return contentList
def id2img(self, id):
if id:
return "http://st.kp.yandex.net/images/film_iphone/iphone360_%s.jpg" % (str(id))
else:
return ''
'''
- Video Values:
- genre : string (Comedy)
- year : integer (2009)
- episode : integer (4)
- season : integer (1)
- top250 : integer (192)
- tracknumber : integer (3)
- rating : float (6.4) - range is 0..10
- watched : depreciated - use playcount instead
- playcount : integer (2) - number of times this item has been played
- overlay : integer (2) - range is 0..8. See GUIListItem.h for values
- cast : list (Michal C. Hall)
- castandrole : list (Michael C. Hall|Dexter)
- director : string (Dagur Kari)
- mpaa : string (PG-13)
- plot : string (Long Description)
- plotoutline : string (Short Description)
- title : string (Big Fan)
- originaltitle : string (Big Fan)
- sorttitle : string (Big Fan)
- duration : string (3:18)
- studio : string (Warner Bros.)
- tagline : string (An awesome movie) - short description of movie
- writer : string (Robert D. Siegel)
- tvshowtitle : string (Heroes)
- premiered : string (2005-03-04)
- status : string (Continuing) - status of a TVshow
- code : string (tt0110293) - IMDb code
- aired : string (2008-12-07)
- credits : string (Andy Kaufman) - writing credits
- lastplayed : string (Y-m-d h:m:s = 2009-04-05 23:16:04)
- album : string (The Joshua Tree)
- artist : list (['U2'])
- votes : string (12345 votes)
- trailer : string (/home/user/trailer.avi)
- dateadded : string (Y-m-d h:m:s = 2009-04-05 23:16:04)
'''