plugin.video.torrenter/resources/contenters/IMDB.py

248 lines
9.8 KiB
Python

# -*- coding: utf-8 -*-
'''
Torrenter plugin for XBMC
Copyright (C) 2012 Vadim Skorba
vadim.skorba@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
'''
import re
import HTMLParser
import Content
from BeautifulSoup import BeautifulSoup
class IMDB(Content.Content):
category_dict = {
'movies': ('Forieng Movies', '/search/title?languages=en|1&title_type=feature&sort=moviemeter,asc'),
'rus_movies': ('Russian Movies', '/search/title?languages=ru|1&title_type=feature&sort=moviemeter,asc'),
'tvshows': ('TV Shows', '/search/title?count=100&title_type=tv_series,mini_series&ref_=gnr_tv_mp'),
'cartoons': ('Cartoons', '/search/title?genres=animation&title_type=feature&sort=moviemeter,asc'),
'anime': ('Anime',
'/search/title?count=100&genres=animation&keywords=anime&num_votes=1000,&explore=title_type&ref_=gnr_kw_an'),
'hot': ('Hot & New', '/search/title?count=100&title_type=feature%2Ctv_series%2Ctv_movie&ref_=nv_ch_mm_1'),
'top': ('Top 250 Movies', '/chart/top/'),
'search': ('[B]Search[/B]', '/find?q=%s&s=tt&ttype=ft'),
'year': {'year': 'by Year', },
'genre': {'genre': 'by Genre',
'action': ('Action', '/genre/action'),
'adventure': ('Adventure', '/genre/adventure'),
'animation': ('Animation', '/genre/animation'),
'biography': ('Biography', '/genre/biography'),
'comedy': ('Comedy', '/genre/comedy'),
'crime': ('Crime', '/genre/crime'),
'documentary': ('Documentary', '/genre/documentary'),
'drama': ('Drama', '/genre/drama'),
'family': ('Family', '/genre/family'),
'fantasy': ('Fantasy', '/genre/fantasy'),
'film_noir': ('Film-Noir', '/genre/film_noir'),
'history': ('History', '/genre/history'),
'horror': ('Horror', '/genre/horror'),
'music': ('Music', '/genre/music'),
'musical': ('Musical', '/genre/musical'),
'mystery': ('Mystery', '/genre/mystery'),
'romance': ('Romance', '/genre/romance'),
'sci_fi': ('Sci-Fi', '/genre/sci_fi'),
'short': ('Short', '/genre/short'),
'sport': ('Sport', '/genre/sport'),
'thriller': ('Thriller', '/genre/thriller'),
'war': ('War', '/genre/war'),
'western': ('Western', '/genre/western'),
}
}
for y in range(2015, 1970, -1):
category_dict['year'][str(y)] = (str(y), '/year/%s/' % str(y))
regex_list = []
baseurl = "http://imdb.com"
headers = [('User-Agent',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124' + \
' YaBrowser/14.10.2062.12061 Safari/537.36'),
('Referer', baseurl), ('Accept-Encoding', 'gzip'), ('Accept-Language', 'ru,en;q=0.8')]
'''
Weight of source with this searcher provided.
Will be multiplied on default weight.
Default weight is seeds number
'''
sourceWeight = 2
def isLabel(self):
return False
def isPages(self):
return False
def isSearchOption(self):
return True
def isScrappable(self):
return True
def get_contentList(self, category, subcategory=None, page=None):
contentList = []
url = self.get_url(category, subcategory, page, self.baseurl)
response = self.makeRequest(url, headers=self.headers)
if None != response and 0 < len(response):
#print response
if category in ['top']:
contentList = self.topmode(response)
elif category == 'search':
contentList = self.searchmode(response)
else: #if category in ['genre']:
contentList = self.genremode(response)
#print str(contentList)
return contentList
def searchmode(self, response):
contentList = []
pars = HTMLParser.HTMLParser()
Soup = BeautifulSoup(response)
result = Soup.findAll('tr', {'class': ['findResult odd', 'findResult even']})
num = 250
for tr in result:
#main
info = {}
year = 0
num = num - 1
title = pars.unescape(tr.findAll('a')[1].text)
tdtitle = tr.find('td', 'result_text')
#print str(tdtitle.text.encode('utf-8'))
originaltitle = tr.find('i')
if originaltitle:
originaltitle = originaltitle.text
try:
year = re.compile('\((\d\d\d\d)\)').findall(tdtitle.text)[0]
except:
try:
year = re.compile('(\d\d\d\d)').findall(tdtitle.text)[0]
except:
pass
info['tvshowtitle'] = title
img = self.biggerImg(tr.find('img').get('src'))
contentList.append((
int(int(self.sourceWeight) * (251 - int(num))),
originaltitle, title, int(year), img, info,
))
#print result
return contentList
def genremode(self, response):
contentList = []
pars = HTMLParser.HTMLParser()
Soup = BeautifulSoup(response)
result = Soup.findAll('tr', {'class': ['odd detailed', 'even detailed']})
for tr in result:
#main
info = {}
year = 0
tdtitle = tr.find('td', 'title')
num = tr.find('td', 'number').text.rstrip('.')
originaltitle = None
title = pars.unescape(tdtitle.find('a').text)
try:
year = re.compile('\((\d\d\d\d)\)').findall(tdtitle.find('span', 'year_type').text)[0]
except:
try:
year = re.compile('(\d\d\d\d)').findall(tdtitle.find('span', 'year_type').text)[0]
except:
pass
info['tvshowtitle'] = title
img = self.biggerImg(tr.find('td', 'image').find('img').get('src'))
#info
info['code'] = tr.find('span', 'wlb_wrapper').get('data-tconst')
contentList.append((
int(int(self.sourceWeight) * (251 - int(num))),
originaltitle, title, int(year), img, info,
))
#print result
return contentList
def biggerImg(self, img):
if img and '._' in img:
img = img.split('._')[0] + '._V1_SY_CR1,0,,_AL_.jpg'
return img
def topmode(self, response):
contentList = []
Soup = BeautifulSoup(response)
result = Soup.findAll('tr', {'class': ['odd', 'even']})
for tr in result:
#main
tdtitle = tr.find('td', 'titleColumn')
num = tdtitle.find('span', {'name': 'ir'}).text.rstrip('.')
originaltitle = None
title = tdtitle.find('a').text
year = tdtitle.find('span', {'name': 'rd'}).text.rstrip(')').lstrip('(')
tdposter = tr.find('td', 'posterColumn')
img = self.biggerImg(tdposter.find('img').get('src'))
#info
info = {}
info['title'] = title
info['year'] = int(year)
info['code'] = tr.find('div', 'wlb_ribbon').get('data-tconst')
contentList.append((
int(int(self.sourceWeight) * (251 - int(num))),
originaltitle, title, int(year), img, info,
))
#print result
return contentList
'''
- Video Values:
- genre : string (Comedy)
- year : integer (2009)
- episode : integer (4)
- season : integer (1)
- top250 : integer (192)
- rating : float (6.4) - range is 0..10
- cast : list (Michal C. Hall)
- castandrole : list (Michael C. Hall|Dexter)
- director : string (Dagur Kari)
- mpaa : string (PG-13)
- plot : string (Long Description)
- plotoutline : string (Short Description)
- title : string (Big Fan)
- originaltitle : string (Big Fan)
- sorttitle : string (Big Fan)
- duration : string (3:18)
- studio : string (Warner Bros.)
- tagline : string (An awesome movie) - short description of movie
- writer : string (Robert D. Siegel)
- tvshowtitle : string (Heroes)
- premiered : string (2005-03-04)
- status : string (Continuing) - status of a TVshow
- code : string (tt0110293) - IMDb code
- aired : string (2008-12-07)
- credits : string (Andy Kaufman) - writing credits
- lastplayed : string (Y-m-d h:m:s = 2009-04-05 23:16:04)
- album : string (The Joshua Tree)
- artist : list (['U2'])
- votes : string (12345 votes)
- trailer : string (/home/user/trailer.avi)
- dateadded : string (Y-m-d h:m:s = 2009-04-05 23:16:04)
'''