176 lines
7.1 KiB
Python
176 lines
7.1 KiB
Python
# -*- coding: utf-8 -*-
|
||
#
|
||
# Russian metadata plugin for Plex, which uses http://www.kinopoisk.ru/ to get the tag data.
|
||
# Плагин для обновления информации о фильмах использующий КиноПоиск (http://www.kinopoisk.ru/).
|
||
# Copyright (C) 2013 Yevgeny Nyden
|
||
#
|
||
# This program is free software; you can redistribute it and/or
|
||
# modify it under the terms of the GNU General Public License
|
||
# as published by the Free Software Foundation; either version 2
|
||
# of the License, or (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program; if not, write to the Free Software
|
||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||
# 02110-1301, USA.
|
||
#
|
||
# @author zhenya (Yevgeny Nyden)
|
||
# @version 1.52
|
||
# @revision 148
|
||
|
||
import re
|
||
import urllib
|
||
import operator
|
||
|
||
import common
|
||
import pluginsettings as S
|
||
import translit
|
||
|
||
|
||
|
||
|
||
# MATCHER_MOVIE_DURATION = re.compile('\s*(\d+).*?', re.UNICODE | re.DOTALL)
|
||
# MATCHER_IMDB_RATING = re.compile('IMDb:\s*(\d+\.?\d*)\s*\(\s*([\s\d]+)\s*\)', re.UNICODE | re.DOTALL)
|
||
# MATCHER_IMDB_RATING = re.compile('IMDb:\s*(\d+\.?\d*)\s?\((.*)\)', re.UNICODE)
|
||
|
||
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 Safari/537.22'
|
||
'''
|
||
MOVIE_THUMBNAIL_SMALL_WIDTH = 130
|
||
MOVIE_THUMBNAIL_SMALL_HEIGHT = 168
|
||
MOVIE_THUMBNAIL_BIG_WIDTH = 780
|
||
MOVIE_THUMBNAIL_BIG_HEIGHT = 1024
|
||
|
||
# Compiled regex matchers.
|
||
MATCHER_WIDTH_FROM_STYLE = re.compile('.*width\s*:\s*(\d+)px.*', re.UNICODE)
|
||
MATCHER_HEIGHT_FROM_STYLE = re.compile('.*height\s*:\s*(\d+)px.*', re.UNICODE)
|
||
MATCHER_LEADING_NONALPHA = re.compile('^[\s\d\.\(\)]*', re.UNICODE | re.MULTILINE)
|
||
|
||
|
||
# Русские месяца, пригодятся для определения дат.
|
||
RU_MONTH = {
|
||
u'января': '01',
|
||
u'февраля': '02',
|
||
u'марта': '03',
|
||
u'апреля': '04',
|
||
u'мая': '05',
|
||
u'июня': '06',
|
||
u'июля': '07',
|
||
u'августа': '08',
|
||
u'сентября': '09',
|
||
u'октября': '10',
|
||
u'ноября': '11',
|
||
u'декабря': '12'
|
||
}
|
||
'''
|
||
|
||
|
||
class PageParser:
|
||
def __init__(self, logger, httpUtils=common.HttpUtils(S.ENCODING_KINOPOISK_PAGE, USER_AGENT), isDebug=False):
|
||
self.log = logger
|
||
self.isDebug = isDebug
|
||
self.httpUtils = httpUtils
|
||
|
||
def fetchAndParseSearchResults(self, mediaName, mediaYear, mediaAltName=None):
|
||
""" Searches for movie titles on KinoPoisk.
|
||
@param mediaName Movie title parsed from a filename.
|
||
@param mediaName Movie year parsed from a filename.
|
||
@return Array of tuples: [kinoPoiskId, title, year, score]
|
||
"""
|
||
self.log.Info('Quering kinopoisk...')
|
||
results = self.queryKinoPoisk(mediaName, mediaYear)
|
||
|
||
# Check media name is all ASCII characters, and if it is,
|
||
# issue another query to KinoPoisk using a translified media name;
|
||
# lastly, merge the scored results.
|
||
if common.isAsciiString(mediaName):
|
||
translifiedMediaName = translit.detranslify(mediaName)
|
||
moreResults = self.queryKinoPoisk(translifiedMediaName, mediaYear)
|
||
resultsMap = dict()
|
||
for result in results:
|
||
resultsMap[result[0]] = result
|
||
results = [] # Recreate and repopulate the results array removing duplicates.
|
||
for result in moreResults:
|
||
currId = result[0]
|
||
if currId in resultsMap.keys():
|
||
origResult = resultsMap[currId]
|
||
del resultsMap[currId]
|
||
if result[3] >= origResult[3]:
|
||
results.append(result)
|
||
else:
|
||
results.append(origResult)
|
||
else:
|
||
results.append(result)
|
||
results.extend(resultsMap.values())
|
||
|
||
if mediaAltName:
|
||
moreResults = self.queryKinoPoisk(mediaAltName, mediaYear)
|
||
resultsMap = dict()
|
||
for result in results:
|
||
resultsMap[result[0]] = result
|
||
results = [] # Recreate and repopulate the results array removing duplicates.
|
||
for result in moreResults:
|
||
currId = result[0]
|
||
if currId in resultsMap.keys():
|
||
origResult = resultsMap[currId]
|
||
del resultsMap[currId]
|
||
if result[3] >= origResult[3]:
|
||
results.append(result)
|
||
else:
|
||
results.append(origResult)
|
||
else:
|
||
results.append(result)
|
||
results.extend(resultsMap.values())
|
||
|
||
# Sort all results based on their score.
|
||
results.sort(key=operator.itemgetter(3))
|
||
results.reverse()
|
||
if self.isDebug:
|
||
self.log.Debug('Search produced %d results:' % len(results))
|
||
index = -1
|
||
for result in results:
|
||
index += 1
|
||
self.log.Debug(' ... %d: id="%s", name="%s", year="%s", score="%d".' %
|
||
(index, result[0], result[1], str(result[2]), result[3]))
|
||
return results
|
||
|
||
def queryKinoPoisk(self, mediaName, mediaYear):
|
||
""" Ищет фильм на кинопоиске.
|
||
Returns title results as they are returned (no sorting is done here!).
|
||
"""
|
||
results = []
|
||
try:
|
||
encodedName = urllib.quote(mediaName.encode(S.ENCODING_KINOPOISK_PAGE))
|
||
except:
|
||
encodedName = urllib.quote(mediaName.encode('utf-8'))
|
||
page = self.httpUtils.requestAndParseHtmlPage(S.KINOPOISK_SEARCH_SIMPLE % encodedName)
|
||
if page is None:
|
||
self.log.Warn(' ### nothing was found on kinopoisk for media name "%s"' % mediaName)
|
||
return results
|
||
|
||
# Страница получена, берем с нее перечень всех названий фильмов.
|
||
self.log.Debug('**** Finding %s (%s) got a KinoPoisk...' % (mediaName, mediaYear))
|
||
|
||
reobj = re.compile(r'<span><a href="http://m\.kinopoisk\.ru/movie/(\d+)/">(.+?)</a><br />(.+?)</span>')
|
||
result = reobj.findall(page)
|
||
|
||
# Inspect query results titles and score them.
|
||
self.log.Debug('found %d results (div info tags)' % len(result))
|
||
itemIndex = -1
|
||
for itemKinoPoiskId, itemTitleitemYear, itemAltTitle in result:
|
||
itemIndex = itemIndex + 1
|
||
itemAltTitle = itemAltTitle.replace(' ', '')
|
||
try:
|
||
itemTitle, itemYear = re.compile('^(.+?), (\d\d\d\d)$').findall(itemTitleitemYear.strip())[0]
|
||
except:
|
||
itemYear = None
|
||
itemTitle = itemTitleitemYear
|
||
itemScore = common.scoreMediaTitleMatch(mediaName, mediaYear, itemTitle, itemAltTitle, itemYear, itemIndex)
|
||
results.append([itemKinoPoiskId, itemTitle, itemYear, itemScore])
|
||
|
||
return results
|