2015-01-09 14:11:21 +03:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
#
|
|
|
|
|
# Russian metadata plugin for Plex, which uses http://www.kinopoisk.ru/ to get the tag data.
|
|
|
|
|
# Плагин для обновления информации о фильмах использующий КиноПоиск (http://www.kinopoisk.ru/).
|
|
|
|
|
# Copyright (C) 2013 Yevgeny Nyden
|
|
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
|
# 02110-1301, USA.
|
|
|
|
|
#
|
|
|
|
|
# @author zhenya (Yevgeny Nyden)
|
|
|
|
|
# @version 1.52
|
|
|
|
|
# @revision 148
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
import urllib
|
|
|
|
|
import operator
|
|
|
|
|
|
|
|
|
|
import common
|
|
|
|
|
import pluginsettings as S
|
|
|
|
|
import translit
|
|
|
|
|
|
|
|
|
|
|
2015-06-23 23:00:27 +03:00
|
|
|
|
|
|
|
|
|
|
2015-08-02 22:58:45 +03:00
|
|
|
|
|
2015-01-09 14:11:21 +03:00
|
|
|
|
# MATCHER_MOVIE_DURATION = re.compile('\s*(\d+).*?', re.UNICODE | re.DOTALL)
|
2015-06-23 23:00:27 +03:00
|
|
|
|
# MATCHER_IMDB_RATING = re.compile('IMDb:\s*(\d+\.?\d*)\s*\(\s*([\s\d]+)\s*\)', re.UNICODE | re.DOTALL)
|
|
|
|
|
# MATCHER_IMDB_RATING = re.compile('IMDb:\s*(\d+\.?\d*)\s?\((.*)\)', re.UNICODE)
|
2015-01-09 14:11:21 +03:00
|
|
|
|
|
|
|
|
|
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 Safari/537.22'
|
|
|
|
|
'''
|
|
|
|
|
MOVIE_THUMBNAIL_SMALL_WIDTH = 130
|
|
|
|
|
MOVIE_THUMBNAIL_SMALL_HEIGHT = 168
|
|
|
|
|
MOVIE_THUMBNAIL_BIG_WIDTH = 780
|
|
|
|
|
MOVIE_THUMBNAIL_BIG_HEIGHT = 1024
|
|
|
|
|
|
|
|
|
|
# Compiled regex matchers.
|
|
|
|
|
MATCHER_WIDTH_FROM_STYLE = re.compile('.*width\s*:\s*(\d+)px.*', re.UNICODE)
|
|
|
|
|
MATCHER_HEIGHT_FROM_STYLE = re.compile('.*height\s*:\s*(\d+)px.*', re.UNICODE)
|
|
|
|
|
MATCHER_LEADING_NONALPHA = re.compile('^[\s\d\.\(\)]*', re.UNICODE | re.MULTILINE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Русские месяца, пригодятся для определения дат.
|
|
|
|
|
RU_MONTH = {
|
|
|
|
|
u'января': '01',
|
|
|
|
|
u'февраля': '02',
|
|
|
|
|
u'марта': '03',
|
|
|
|
|
u'апреля': '04',
|
|
|
|
|
u'мая': '05',
|
|
|
|
|
u'июня': '06',
|
|
|
|
|
u'июля': '07',
|
|
|
|
|
u'августа': '08',
|
|
|
|
|
u'сентября': '09',
|
|
|
|
|
u'октября': '10',
|
|
|
|
|
u'ноября': '11',
|
|
|
|
|
u'декабря': '12'
|
|
|
|
|
}
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PageParser:
|
|
|
|
|
def __init__(self, logger, httpUtils=common.HttpUtils(S.ENCODING_KINOPOISK_PAGE, USER_AGENT), isDebug=False):
|
|
|
|
|
self.log = logger
|
|
|
|
|
self.isDebug = isDebug
|
|
|
|
|
self.httpUtils = httpUtils
|
|
|
|
|
|
|
|
|
|
def fetchAndParseSearchResults(self, mediaName, mediaYear, mediaAltName=None):
|
|
|
|
|
""" Searches for movie titles on KinoPoisk.
|
|
|
|
|
@param mediaName Movie title parsed from a filename.
|
|
|
|
|
@param mediaName Movie year parsed from a filename.
|
|
|
|
|
@return Array of tuples: [kinoPoiskId, title, year, score]
|
|
|
|
|
"""
|
|
|
|
|
self.log.Info('Quering kinopoisk...')
|
|
|
|
|
results = self.queryKinoPoisk(mediaName, mediaYear)
|
|
|
|
|
|
|
|
|
|
# Check media name is all ASCII characters, and if it is,
|
|
|
|
|
# issue another query to KinoPoisk using a translified media name;
|
|
|
|
|
# lastly, merge the scored results.
|
|
|
|
|
if common.isAsciiString(mediaName):
|
|
|
|
|
translifiedMediaName = translit.detranslify(mediaName)
|
|
|
|
|
moreResults = self.queryKinoPoisk(translifiedMediaName, mediaYear)
|
|
|
|
|
resultsMap = dict()
|
|
|
|
|
for result in results:
|
|
|
|
|
resultsMap[result[0]] = result
|
|
|
|
|
results = [] # Recreate and repopulate the results array removing duplicates.
|
|
|
|
|
for result in moreResults:
|
|
|
|
|
currId = result[0]
|
|
|
|
|
if currId in resultsMap.keys():
|
|
|
|
|
origResult = resultsMap[currId]
|
|
|
|
|
del resultsMap[currId]
|
|
|
|
|
if result[3] >= origResult[3]:
|
|
|
|
|
results.append(result)
|
|
|
|
|
else:
|
|
|
|
|
results.append(origResult)
|
|
|
|
|
else:
|
|
|
|
|
results.append(result)
|
2015-07-20 19:32:00 +03:00
|
|
|
|
results.extend(resultsMap.values())
|
2015-01-09 14:11:21 +03:00
|
|
|
|
|
|
|
|
|
if mediaAltName:
|
|
|
|
|
moreResults = self.queryKinoPoisk(mediaAltName, mediaYear)
|
|
|
|
|
resultsMap = dict()
|
|
|
|
|
for result in results:
|
|
|
|
|
resultsMap[result[0]] = result
|
|
|
|
|
results = [] # Recreate and repopulate the results array removing duplicates.
|
|
|
|
|
for result in moreResults:
|
|
|
|
|
currId = result[0]
|
|
|
|
|
if currId in resultsMap.keys():
|
|
|
|
|
origResult = resultsMap[currId]
|
|
|
|
|
del resultsMap[currId]
|
|
|
|
|
if result[3] >= origResult[3]:
|
|
|
|
|
results.append(result)
|
|
|
|
|
else:
|
|
|
|
|
results.append(origResult)
|
|
|
|
|
else:
|
|
|
|
|
results.append(result)
|
2015-07-20 19:32:00 +03:00
|
|
|
|
results.extend(resultsMap.values())
|
2015-01-09 14:11:21 +03:00
|
|
|
|
|
|
|
|
|
# Sort all results based on their score.
|
|
|
|
|
results.sort(key=operator.itemgetter(3))
|
|
|
|
|
results.reverse()
|
|
|
|
|
if self.isDebug:
|
|
|
|
|
self.log.Debug('Search produced %d results:' % len(results))
|
|
|
|
|
index = -1
|
|
|
|
|
for result in results:
|
|
|
|
|
index += 1
|
|
|
|
|
self.log.Debug(' ... %d: id="%s", name="%s", year="%s", score="%d".' %
|
|
|
|
|
(index, result[0], result[1], str(result[2]), result[3]))
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
def queryKinoPoisk(self, mediaName, mediaYear):
|
|
|
|
|
""" Ищет фильм на кинопоиске.
|
|
|
|
|
Returns title results as they are returned (no sorting is done here!).
|
|
|
|
|
"""
|
|
|
|
|
results = []
|
|
|
|
|
try:
|
|
|
|
|
encodedName = urllib.quote(mediaName.encode(S.ENCODING_KINOPOISK_PAGE))
|
|
|
|
|
except:
|
|
|
|
|
encodedName = urllib.quote(mediaName.encode('utf-8'))
|
|
|
|
|
page = self.httpUtils.requestAndParseHtmlPage(S.KINOPOISK_SEARCH_SIMPLE % encodedName)
|
|
|
|
|
if page is None:
|
|
|
|
|
self.log.Warn(' ### nothing was found on kinopoisk for media name "%s"' % mediaName)
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
# Страница получена, берем с нее перечень всех названий фильмов.
|
|
|
|
|
self.log.Debug('**** Finding %s (%s) got a KinoPoisk...' % (mediaName, mediaYear))
|
|
|
|
|
|
|
|
|
|
reobj = re.compile(r'<span><a href="http://m\.kinopoisk\.ru/movie/(\d+)/">(.+?)</a><br />(.+?)</span>')
|
|
|
|
|
result = reobj.findall(page)
|
|
|
|
|
|
|
|
|
|
# Inspect query results titles and score them.
|
|
|
|
|
self.log.Debug('found %d results (div info tags)' % len(result))
|
|
|
|
|
itemIndex = -1
|
|
|
|
|
for itemKinoPoiskId, itemTitleitemYear, itemAltTitle in result:
|
|
|
|
|
itemIndex = itemIndex + 1
|
|
|
|
|
itemAltTitle = itemAltTitle.replace(' ', '')
|
|
|
|
|
try:
|
|
|
|
|
itemTitle, itemYear = re.compile('^(.+?), (\d\d\d\d)$').findall(itemTitleitemYear.strip())[0]
|
|
|
|
|
except:
|
|
|
|
|
itemYear = None
|
|
|
|
|
itemTitle = itemTitleitemYear
|
|
|
|
|
itemScore = common.scoreMediaTitleMatch(mediaName, mediaYear, itemTitle, itemAltTitle, itemYear, itemIndex)
|
|
|
|
|
results.append([itemKinoPoiskId, itemTitle, itemYear, itemScore])
|
|
|
|
|
|
|
|
|
|
return results
|