2015-01-09 14:11:21 +03:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
#
|
|
|
|
|
# Russian metadata plugin for Plex, which uses http://www.kinopoisk.ru/ to get the tag data.
|
|
|
|
|
# Плагин для обновления информации о фильмах использующий КиноПоиск (http://www.kinopoisk.ru/).
|
|
|
|
|
# Copyright (C) 2012 Yevgeny Nyden
|
|
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
|
# 02110-1301, USA.
|
|
|
|
|
#
|
|
|
|
|
# @author zhenya (Yevgeny Nyden)
|
|
|
|
|
# @revision 148
|
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
import math
|
|
|
|
|
import difflib
|
|
|
|
|
|
|
|
|
|
import translit
|
|
|
|
|
from HTTP import *
|
|
|
|
|
|
|
|
|
|
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 Safari/537.22'
|
|
|
|
|
# USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22'
|
|
|
|
|
ENCODING_PLEX = 'utf-8'
|
|
|
|
|
|
|
|
|
|
SCORE_PENALTY_ITEM_ORDER = 2
|
|
|
|
|
SCORE_PENALTY_YEAR = 30
|
|
|
|
|
SCORE_PENALTY_TITLE = 40
|
|
|
|
|
|
|
|
|
|
IMAGE_SCORE_MAX_NUMBER_OF_ITEMS = 5
|
|
|
|
|
IMAGE_SCORE_ITEM_ORDER_BONUS_MAX = 25
|
|
|
|
|
IMAGE_SCORE_RESOLUTION_BONUS_MAX = 25
|
|
|
|
|
IMAGE_SCORE_RATIO_BONUS_MAX = 45
|
|
|
|
|
IMAGE_SCORE_THUMB_BONUS = 5
|
|
|
|
|
POSTER_SCORE_MIN_RESOLUTION_PX = 60 * 1000
|
|
|
|
|
POSTER_SCORE_MAX_RESOLUTION_PX = 600 * 1000
|
|
|
|
|
POSTER_SCORE_BEST_RATIO = 0.7
|
|
|
|
|
ART_SCORE_BEST_RATIO = 1.5
|
|
|
|
|
ART_SCORE_MIN_RESOLUTION_PX = 200 * 1000
|
|
|
|
|
ART_SCORE_MAX_RESOLUTION_PX = 1000 * 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Preferences:
|
|
|
|
|
""" These instance variables are populated from plugin preferences.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
(maxPostersName, maxPostersDefault),
|
|
|
|
|
(maxArtName, maxArtDefault),
|
|
|
|
|
(getAllActorsName, getAllActorsDefault),
|
|
|
|
|
(imdbSupportName, imdbSupportDefault),
|
|
|
|
|
(cacheTimeName, cacheTimeDefault),
|
|
|
|
|
(imdbRatingName, imdbRatingDefault),
|
|
|
|
|
(additionalRatingName, additionalRatingDefault)):
|
|
|
|
|
self.maxPostersName = maxPostersName
|
|
|
|
|
self.maxPosters = maxPostersDefault
|
|
|
|
|
self.maxArtName = maxArtName
|
|
|
|
|
self.maxArt = maxArtDefault
|
|
|
|
|
self.getAllActorsName = getAllActorsName
|
|
|
|
|
self.getAllActors = getAllActorsDefault
|
|
|
|
|
self.imdbSupportName = imdbSupportName
|
|
|
|
|
self.imdbSupport = imdbSupportDefault
|
|
|
|
|
self.cacheTimeName = cacheTimeName
|
|
|
|
|
self.cacheTime = cacheTimeDefault
|
|
|
|
|
self.cacheTimeDefault = cacheTimeDefault
|
|
|
|
|
self.imdbRatingName = imdbRatingName
|
|
|
|
|
self.imdbRating = imdbRatingDefault
|
|
|
|
|
self.additionalRatingName = additionalRatingName
|
|
|
|
|
self.additionalRating = additionalRatingDefault
|
|
|
|
|
|
|
|
|
|
def readPluginPreferences(self):
|
|
|
|
|
# Setting image (poster and funart) preferences.
|
|
|
|
|
if self.maxPostersName is not None:
|
|
|
|
|
self.maxPosters = int(Prefs[self.maxPostersName])
|
|
|
|
|
Log.Debug('PREF: Max poster results is set to %d.' % self.maxPosters)
|
|
|
|
|
if self.maxArtName is not None:
|
|
|
|
|
self.maxArt = int(Prefs[self.maxArtName])
|
|
|
|
|
Log.Debug('PREF: Max art results is set to %d.' % self.maxArt)
|
|
|
|
|
if self.getAllActorsName is not None:
|
|
|
|
|
self.getAllActors = Prefs[self.getAllActorsName]
|
|
|
|
|
Log.Debug('PREF: Parse all actors is set to %s.' % str(self.getAllActors))
|
|
|
|
|
|
|
|
|
|
# Setting IMDB support.
|
|
|
|
|
if self.imdbSupportName is not None:
|
|
|
|
|
self.imdbSupport = Prefs[self.imdbSupportName]
|
|
|
|
|
Log.Debug('PREF: IMDB support is set to %s.' % str(self.imdbSupport))
|
|
|
|
|
|
|
|
|
|
# Setting cache expiration time.
|
|
|
|
|
if self.cacheTimeName is not None:
|
|
|
|
|
self.cacheTime = parseAndSetCacheTimeFromPrefs(self.cacheTimeName, self.cacheTimeDefault)
|
|
|
|
|
|
|
|
|
|
# Setting IMDB rating.
|
|
|
|
|
if self.imdbRatingName is not None:
|
|
|
|
|
self.imdbRating = Prefs[self.imdbRatingName]
|
|
|
|
|
Log.Debug('PREF: IMDB rating is set to %s.' % str(self.imdbRating))
|
|
|
|
|
|
|
|
|
|
# Setting kinopoisk.ru rating.
|
|
|
|
|
if self.additionalRatingName is not None:
|
|
|
|
|
self.additionalRating = Prefs[self.additionalRatingName]
|
|
|
|
|
Log.Debug('PREF: kinopoisk rating is set to %s.' % str(self.additionalRating))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getElementFromHttpRequest(url, encoding, userAgent=USER_AGENT):
|
|
|
|
|
""" Fetches a given URL and returns it as an element.
|
|
|
|
|
Функция преобразования html-кода в xml-код.
|
|
|
|
|
"""
|
|
|
|
|
for i in range(3):
|
|
|
|
|
errorCount = 0
|
|
|
|
|
try:
|
|
|
|
|
req = HTTPRequest(url, headers={'User-agent': userAgent, 'Accept': 'text/html'})
|
|
|
|
|
response = HTTP().fetch(req)
|
|
|
|
|
return str(response.body).decode(encoding)
|
|
|
|
|
except:
|
|
|
|
|
errorCount += 1
|
|
|
|
|
Log.Debug('Error fetching URL: "%s".' % url)
|
|
|
|
|
time.sleep(1 + errorCount)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def requestImageJpeg(url, userAgent):
|
|
|
|
|
""" Requests an image given its URL and returns a request object.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
response = HTTP.Request(url, headers={
|
|
|
|
|
'User-agent': userAgent,
|
|
|
|
|
'Accept': 'image/jpeg'
|
|
|
|
|
})
|
|
|
|
|
return response
|
|
|
|
|
except:
|
|
|
|
|
Log.Debug('Error fetching URL: "%s".' % url)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getWin1252ResponseFromHttpRequest(url):
|
|
|
|
|
""" Requests an image given its URL and returns a request object.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
response = HTTP.Request(url, headers={
|
|
|
|
|
'User-agent': USER_AGENT,
|
|
|
|
|
'Accept-Charset': 'ISO-8859-1;q=0.7,*;q=0.3',
|
|
|
|
|
'Accept-Language': 'en-US,en;q=0.8'
|
|
|
|
|
})
|
|
|
|
|
return response
|
|
|
|
|
except:
|
|
|
|
|
Log.Error('Error fetching URL: "%s".' % url)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def printSearchArrayResults(results):
|
|
|
|
|
""" Sends a list of media array results to debug log.
|
|
|
|
|
"""
|
|
|
|
|
Log.Debug('Search produced %d results:' % len(results))
|
|
|
|
|
index = -1
|
|
|
|
|
for result in results:
|
|
|
|
|
index += 1
|
|
|
|
|
Log.Debug(' ... %d: id="%s", name="%s", year="%s", score="%d".' %
|
|
|
|
|
(index, result[0], result[1], str(result[2]), result[3]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def printSearchResults(results):
|
|
|
|
|
""" Sends a list of media results to debug log.
|
|
|
|
|
"""
|
|
|
|
|
Log.Debug('Search produced %d results:' % len(results))
|
|
|
|
|
index = 0
|
|
|
|
|
for result in results:
|
|
|
|
|
Log.Debug(' ... %d: id="%s", name="%s", year="%s", score="%d".' %
|
|
|
|
|
(index, result.id, result.name, str(result.year), result.score))
|
|
|
|
|
index += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def printImageSearchResults(thumbnailList):
|
|
|
|
|
Log.Debug('printing %d image results:' % len(thumbnailList))
|
|
|
|
|
index = 0
|
|
|
|
|
for result in thumbnailList:
|
|
|
|
|
Log.Debug(' ... %d: index=%s, score=%s, URL="%s".' %
|
|
|
|
|
(index, result.index, result.score, result.url))
|
|
|
|
|
index += 1
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def logException(msg):
|
|
|
|
|
excInfo = sys.exc_info()
|
|
|
|
|
Log.Exception('%s; exception: %s; cause: %s' % (msg, excInfo[0], excInfo[1]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year, itemIndex):
|
|
|
|
|
""" Compares page and media titles taking into consideration
|
|
|
|
|
media item's year and title values. Returns score [0, 100].
|
|
|
|
|
Search item scores 100 when:
|
|
|
|
|
- it's first on the list of results; AND
|
|
|
|
|
- it equals to the media title (ignoring case) OR all media title words are found in the search item; AND
|
|
|
|
|
- search item year equals to media year.
|
|
|
|
|
|
|
|
|
|
For now, our title scoring is pretty simple - we check if individual words
|
|
|
|
|
from media item's title are found in the title from search results.
|
|
|
|
|
We should also take into consideration order of words, so that "One Two" would not
|
|
|
|
|
have the same score as "Two One". Also, taking into consideration year difference.
|
|
|
|
|
"""
|
|
|
|
|
# TODO(zhenya): add logging that works in unit tests.
|
|
|
|
|
# Log.Debug('comparing item %d::: "%s-%s" with "%s-%s" (%s)...' %
|
|
|
|
|
# (itemIndex, str(mediaName), str(mediaYear), str(title), str(year), str(altTitle)))
|
|
|
|
|
# Max score is when both title and year match exactly.
|
|
|
|
|
score = 100
|
|
|
|
|
|
|
|
|
|
# Item order penalty (the lower it is on the list or results, the larger the penalty).
|
|
|
|
|
score = score - (itemIndex * SCORE_PENALTY_ITEM_ORDER)
|
|
|
|
|
|
|
|
|
|
# Compute year penalty: [equal, diff>=3] --> [0, MAX].
|
|
|
|
|
yearPenalty = SCORE_PENALTY_YEAR
|
|
|
|
|
mediaYear = toInteger(mediaYear)
|
|
|
|
|
year = toInteger(year)
|
|
|
|
|
if mediaYear is not None and year is not None:
|
|
|
|
|
yearDiff = abs(mediaYear - year)
|
|
|
|
|
if not yearDiff:
|
|
|
|
|
yearPenalty = 0
|
|
|
|
|
elif yearDiff == 1:
|
|
|
|
|
yearPenalty = 15
|
|
|
|
|
elif yearDiff == 2:
|
|
|
|
|
yearPenalty = 25
|
|
|
|
|
score = score - yearPenalty
|
2015-06-23 23:00:27 +03:00
|
|
|
|
# print str(mediaYear)+' '+str(year)+' '+str(yearPenalty)
|
2015-01-09 14:11:21 +03:00
|
|
|
|
|
|
|
|
|
# Compute title penalty.
|
|
|
|
|
titlePenalty = computeTitlePenalty(mediaName, title)
|
|
|
|
|
altTitlePenalty = 100
|
|
|
|
|
if altTitle is not None:
|
|
|
|
|
altTitlePenalty = computeTitlePenalty(mediaName, altTitle)
|
|
|
|
|
|
|
|
|
|
# Get detranlitirated media name (in case filename is in latin characters),
|
|
|
|
|
# compare it's score with the original, and pick the min.
|
|
|
|
|
try:
|
|
|
|
|
altMediaName = translit.detranslify(mediaName)
|
|
|
|
|
detranslifiedTitlePenalty = computeTitlePenalty(altMediaName, title)
|
|
|
|
|
titlePenalty = min(detranslifiedTitlePenalty, titlePenalty)
|
|
|
|
|
if altTitle is not None:
|
|
|
|
|
detranslifiedAltTitlePenalty = computeTitlePenalty(altMediaName, altTitle)
|
|
|
|
|
altTitlePenalty = min(detranslifiedAltTitlePenalty, altTitlePenalty)
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
titlePenalty = min(titlePenalty, altTitlePenalty)
|
|
|
|
|
score = score - titlePenalty
|
|
|
|
|
|
|
|
|
|
# If the score is not high enough, add a few points to the first result -
|
|
|
|
|
# let's give KinoPoisk some credit :-).
|
|
|
|
|
if itemIndex == 0 and score <= 80:
|
|
|
|
|
score = score + 5
|
|
|
|
|
|
|
|
|
|
# IMPORTANT: always return an int.
|
|
|
|
|
score = int(score)
|
|
|
|
|
# Log.Debug('***** title scored %d' % score)
|
|
|
|
|
return score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def scoreThumbnailResult(thumb, isPoster):
|
|
|
|
|
""" Given a Thumbnail object that represents an poster or a funart result,
|
|
|
|
|
scores it, and stores the score on the passed object (thumb.score).
|
|
|
|
|
"""
|
|
|
|
|
score = 0
|
|
|
|
|
if thumb.url is None:
|
|
|
|
|
thumb.score = 0
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
if thumb.index < IMAGE_SCORE_MAX_NUMBER_OF_ITEMS:
|
|
|
|
|
# Score bonus from index for items below 10 on the list.
|
|
|
|
|
bonus = IMAGE_SCORE_ITEM_ORDER_BONUS_MAX * \
|
|
|
|
|
((IMAGE_SCORE_MAX_NUMBER_OF_ITEMS - thumb.index) / float(IMAGE_SCORE_MAX_NUMBER_OF_ITEMS))
|
|
|
|
|
score += bonus
|
|
|
|
|
|
|
|
|
|
if thumb.width is not None and thumb.height is not None:
|
|
|
|
|
# Get a resolution bonus if width*height is more than a certain min value.
|
|
|
|
|
if isPoster:
|
|
|
|
|
minPx = POSTER_SCORE_MIN_RESOLUTION_PX
|
|
|
|
|
maxPx = POSTER_SCORE_MAX_RESOLUTION_PX
|
|
|
|
|
bestRatio = POSTER_SCORE_BEST_RATIO
|
|
|
|
|
else:
|
|
|
|
|
minPx = ART_SCORE_MIN_RESOLUTION_PX
|
|
|
|
|
maxPx = ART_SCORE_MAX_RESOLUTION_PX
|
|
|
|
|
bestRatio = ART_SCORE_BEST_RATIO
|
|
|
|
|
pixelsCount = thumb.width * thumb.height
|
|
|
|
|
if pixelsCount > minPx:
|
|
|
|
|
if pixelsCount > maxPx:
|
|
|
|
|
pixelsCount = maxPx
|
|
|
|
|
bonus = float(IMAGE_SCORE_RESOLUTION_BONUS_MAX) * \
|
|
|
|
|
float((pixelsCount - minPx)) / float((maxPx - minPx))
|
|
|
|
|
score += bonus
|
|
|
|
|
|
|
|
|
|
# Get an orientation (Portrait vs Landscape) bonus. (we prefer images that are have portrait orientation.
|
|
|
|
|
ratio = thumb.width / float(thumb.height)
|
|
|
|
|
ratioDiff = math.fabs(bestRatio - ratio)
|
|
|
|
|
if ratioDiff < 0.5:
|
|
|
|
|
bonus = IMAGE_SCORE_RATIO_BONUS_MAX * (0.5 - ratioDiff) * 2.0
|
|
|
|
|
score += bonus
|
|
|
|
|
|
|
|
|
|
# Get a bonus if image has a separate thumbnail URL.
|
|
|
|
|
if thumb.thumbUrl is not None and thumb.url != thumb.thumbUrl:
|
|
|
|
|
score += IMAGE_SCORE_THUMB_BONUS
|
|
|
|
|
|
|
|
|
|
thumb.score = int(score)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def isAsciiString(mediaName):
|
|
|
|
|
""" Returns True if all characters of the string are ASCII.
|
|
|
|
|
"""
|
|
|
|
|
for index, char in enumerate(mediaName):
|
|
|
|
|
if ord(char) >= 128:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def toInteger(maybeNumber):
|
|
|
|
|
""" Returns the argument converted to an integer if it represents a number
|
|
|
|
|
or None if the argument is None or does not represent a number.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
if maybeNumber is not None and str(maybeNumber).strip() != '':
|
|
|
|
|
return int(maybeNumber)
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def computeTitlePenalty(mediaName, title):
|
|
|
|
|
""" Given media name and a candidate title, returns the title result score penalty.
|
|
|
|
|
@param mediaName Movie title parsed from the file system.
|
|
|
|
|
@param title Movie title from the website.
|
|
|
|
|
"""
|
|
|
|
|
mediaName = mediaName.lower()
|
|
|
|
|
title = title.lower()
|
|
|
|
|
if mediaName != title:
|
|
|
|
|
# First approximate the whole strings.
|
|
|
|
|
diffRatio = difflib.SequenceMatcher(None, mediaName, title).ratio()
|
|
|
|
|
penalty = int(SCORE_PENALTY_TITLE * (1 - diffRatio))
|
|
|
|
|
|
|
|
|
|
# If the penalty is more than 1/2 of max title penalty, check to see if
|
|
|
|
|
# this title starts with media name. This means that media name does not
|
|
|
|
|
# have the whole name of the movie - very common case. For example, media name
|
|
|
|
|
# "Кавказская пленница" for a movie title "Кавказская пленница, или Новые приключения Шурика".
|
|
|
|
|
if penalty >= 15: # This is so that we don't have to do the 'split' every time.
|
|
|
|
|
# Compute the scores of the
|
|
|
|
|
# First, check if the title starts with media name.
|
|
|
|
|
mediaNameParts = mediaName.split()
|
|
|
|
|
titleParts = title.split()
|
|
|
|
|
if len(mediaNameParts) <= len(titleParts):
|
|
|
|
|
i = 0
|
|
|
|
|
# Start with some small penalty, value of which depends on how
|
|
|
|
|
# many words media name has relative to the title's word count.
|
|
|
|
|
penaltyAlt = max(5, int(round((1.0 - (float(len(mediaNameParts)) / len(titleParts))) * 15 - 5)))
|
|
|
|
|
penaltyPerPart = SCORE_PENALTY_TITLE / len(mediaNameParts)
|
|
|
|
|
for mediaNamePart in mediaNameParts:
|
|
|
|
|
partDiffRatio = difflib.SequenceMatcher(None, mediaNamePart, titleParts[i]).ratio()
|
|
|
|
|
penaltyAlt = penaltyAlt + int(penaltyPerPart * (1 - partDiffRatio))
|
|
|
|
|
i = i + 1
|
|
|
|
|
penalty = min(penalty, penaltyAlt)
|
2015-06-23 23:00:27 +03:00
|
|
|
|
# print '++++++ DIFF("%s", "%s") = %g --> %d' % (mediaName.encode('utf8'), title.encode('utf8'), diffRatio, penalty)
|
|
|
|
|
# Log.Debug('++++++ DIFF("%s", "%s") = %g --> %d' % (mediaName.encode('utf8'), title.encode('utf8'), diffRatio, penalty))
|
2015-01-09 14:11:21 +03:00
|
|
|
|
return penalty
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HttpUtils():
|
|
|
|
|
def __init__(self, encoding, userAgent):
|
|
|
|
|
self.encoding = encoding
|
|
|
|
|
self.userAgent = userAgent
|
|
|
|
|
|
|
|
|
|
def requestAndParseHtmlPage(self, url):
|
|
|
|
|
return getElementFromHttpRequest(url, self.encoding, self.userAgent)
|
|
|
|
|
|
|
|
|
|
def requestImageJpeg(self, url):
|
|
|
|
|
return requestImageJpeg(url, self.userAgent)
|