7sDream 2017-05-23 13:35:40 +08:00
commit 279c8fc54d
No known key found for this signature in database
GPG Key ID: 72A6D9FCEDDAB75D
9 changed files with 501 additions and 0 deletions

67
.gitignore vendored 100644
View File

@ -0,0 +1,67 @@
# Editor
.idea/
.vscode/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
#Ipython Notebook
.ipynb_checkpoints

21
LICENSE 100644
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2017 7sDream
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

3
MANIFEST.in 100644
View File

@ -0,0 +1,3 @@
include README.md LICENSE changelog.md
include test.py
include test.torrent

50
README.md 100644
View File

@ -0,0 +1,50 @@
# Torrent file parser for Python
## Install
```
pip install torret_parser
```
## Usage:
### CLI
```
$ pytp test.torrent
```
```
$ cat test.torrent | pytp
```
![][screenshots-help]
![][screenshots-normal]
![][screenshots-indent]
### As a module
```pycon
>>> import torrent_parser as tp
>>> data = tp.parse_torrent_file('test.torrent')
>>> print(data['announce'])
http://tracker.trackerfix.com:80/announce
```
## Test
```bash
python -m unittest test
```
## LICENSE
See [License][LICENSE].
[screenshots-help]: http://rikka-10066868.image.myqcloud.com/7c23f6d0-b23f-4c57-be93-d37fafe3292a.png
[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png
[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png
[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE

6
setup.cfg 100644
View File

@ -0,0 +1,6 @@
[metadata]
description-file = README.md
license-file = LICENSE
[bdist_wheel]
universal = 1

41
setup.py 100755
View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
# coding=utf-8
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
import torrent_parser
setup(
name='torrent_parser',
keywords=['file', 'torrent', 'JSON', 'parser'],
version=torrent_parser.__version__,
py_modules=['torrent_parser'],
url='https://github.com/7sDream/torrent_parser',
license='MIT',
author='7sDream',
author_email='7seconddream@gmail.com',
description='A .torrent file parser for both Python 2 and 3',
install_requires=[],
entry_points={
'console_scripts': ['pytp=torrent_parser:__main']
},
classifiers=[
'Development Status :: 3 - Alpha',
'Environment :: Console',
'Intended Audience :: Developers',
'Intended Audience :: End Users/Desktop',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Topic :: Multimedia',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Utilities',
]
)

49
test.py 100644
View File

@ -0,0 +1,49 @@
from __future__ import unicode_literals
import unittest
import collections
from torrent_parser import TorrentFileParser, parse_torrent_file
class Test(unittest.TestCase):
TEST_FILENAME = 'test.torrent'
def test_parse_torrent_file_use_shortcut(self):
parse_torrent_file(self.TEST_FILENAME)
def test_parse_torrent_file_use_class(self):
with open(self.TEST_FILENAME, 'rb') as fp:
TorrentFileParser(fp).parse()
def test_parse_torrent_file_to_ordered_dict(self):
data = parse_torrent_file(self.TEST_FILENAME, True)
self.assertIsInstance(data, collections.OrderedDict)
with open(self.TEST_FILENAME, 'rb') as fp:
data = TorrentFileParser(fp, True).parse()
self.assertIsInstance(data, collections.OrderedDict)
def test_parse_correctness(self):
data = parse_torrent_file(self.TEST_FILENAME)
self.assertIn(['udp://p4p.arenabg.ch:1337/announce'],
data['announce-list'])
self.assertEqual(data['comment'],
'Torrent downloaded from https://rarbg.to')
self.assertEqual(data['creation date'], 1472762993)
def test_parse_two_times(self):
with open(self.TEST_FILENAME, 'rb') as fp:
parser = TorrentFileParser(fp)
data = parser.parse()
self.assertIn(['udp://p4p.arenabg.ch:1337/announce'],
data['announce-list'])
self.assertEqual(data['comment'],
'Torrent downloaded from https://rarbg.to')
self.assertEqual(data['creation date'], 1472762993)
data = parser.parse()
self.assertIn(['udp://p4p.arenabg.ch:1337/announce'],
data['announce-list'])
self.assertEqual(data['comment'],
'Torrent downloaded from https://rarbg.to')
self.assertEqual(data['creation date'], 1472762993)

2
test.torrent 100644
View File

@ -0,0 +1,2 @@
d8:announce41:http://tracker.trackerfix.com:80/announce13:announce-listll41:http://tracker.trackerfix.com:80/announceel30:udp://9.rarbg.me:2710/announceel30:udp://9.rarbg.to:2710/announceel43:udp://tracker.coppersurfer.tk:6969/announceel34:udp://glotorrents.pw:6969/announceel40:udp://tracker.trackerfix.com:80/announceel40:udp://inferno.demonoid.ooo:3392/announceel34:udp://p4p.arenabg.ch:1337/announceel30:udp://9.rarbg.me:2710/announceel30:udp://9.rarbg.to:2710/announceel38:udp://torrent.gresille.org:80/announceel35:http://retracker.krs-ix.ru/announceel34:http://mgtracker.org:2710/announceel30:http://thetracker.org/announceel33:http://explodie.org:6969/announceee7:comment40:Torrent downloaded from https://rarbg.to10:created by13:uTorrent/221013:creation datei1472762993e8:encoding5:UTF-84:infod5:filesld6:lengthi505957e4:pathl83:Streaming, Sharing, Stealing - Big Data and the Future of Entertainment (2016).epubeed6:lengthi36e4:pathl16:Come Join Us.txteee4:name92:Streaming, Sharing, Stealing - Big Data and the Future of Entertainment (2016) (Epub) Gooner12:piece lengthi65536e6:pieces160:™d\6Zþ<1E>Š¸l0  èPZ™6HÿV'†",fÄ{3™üG2õ0½íOoj¹/v%YG†•VÞƒW&®YoÏ=-5ö<35>À¬-¡rCÆÉbñÂ,ìB,¹<C2B9>Äw¸Ç—f{PJ|—‰C/ä<>+
+ B¾+¹žÓ ò±!ЧæTåO0pSDšlüÇ9¸3Ý8ÅAee

262
torrent_parser.py 100644
View File

@ -0,0 +1,262 @@
#!/usr/bin/env python
# coding: utf-8
"""
A .torrent file parser for both Python 2 and 3
Usage:
data = parse_torrent_file(filename)
# or
with open(filename, 'rb') as f: # the binary mode 'b' is necessary
data = TorrentFileParser(f).parse()
"""
from __future__ import print_function, unicode_literals
import argparse
import collections
import io
import json
import sys
__all__ = [
'InvalidTorrentFileException',
'parse_torrent_file',
'TorrentFileParser',
]
__version__ = '0.1.0'
class InvalidTorrentFileException(Exception):
def __init__(self, pos, msg=None):
msg = msg or "Invalid torrent format when reading at pos " + str(pos)
super(InvalidTorrentFileException, self).__init__(msg)
class TorrentFileParser(object):
TYPE_LIST = 'list'
TYPE_DICT = 'dict'
TYPE_INT = 'int'
TYPE_STRING = 'string'
TYPE_END = 'end'
LIST_INDICATOR = b'l'
DICT_INDICATOR = b'd'
INT_INDICATOR = b'i'
END_INDICATOR = b'e'
STRING_INDICATOR = b''
TYPES = [
(TYPE_LIST, LIST_INDICATOR),
(TYPE_DICT, DICT_INDICATOR),
(TYPE_INT, INT_INDICATOR),
(TYPE_END, END_INDICATOR),
(TYPE_STRING, STRING_INDICATOR),
]
def __init__(self, fp, use_ordered_dict=False, encoding='utf-8'):
"""
:param fp: a **binary** file-like object to parse,
which means need 'b' mode when use built-in open function
:param encoding: file content encoding, default utf-8
:param use_ordered_dict: Use collections.OrderedDict as dict container
default False, which mean use built-in dict
"""
if getattr(fp, 'read', ) is None \
or getattr(fp, 'seek') is None:
raise ValueError('Argument fp needs a file like object')
self._pos = 0
self._encoding = encoding
self._content = fp
self._use_ordered_dict = use_ordered_dict
def parse(self):
"""
:return: the parse result
:type: depends on ``use_ordered_dict`` option when init the parser
see :any:`TorrentFileParser.__init__`
"""
self._restart()
data = self._next_element()
try:
c = self._read_byte(1, True)
raise InvalidTorrentFileException(
0, 'Expect EOF, but get [{}] at pos {}'.format(c, self._pos)
)
except EOFError: # expect EOF
pass
if isinstance(data, dict):
return data
raise InvalidTorrentFileException('Outermost element is not a dict')
def _read_byte(self, count=1, raise_eof=False):
assert count >= 0
gotten = self._content.read(count)
if count != 0 and len(gotten) == 0:
if raise_eof:
raise EOFError()
raise InvalidTorrentFileException(
self._pos,
'Unexpected EOF when reading torrent file'
)
self._pos += count
return gotten
def _seek_back(self, count):
self._content.seek(-count, 1)
def _restart(self):
self._content.seek(0, 0)
self._pos = 0
def _dict_items_generator(self):
while True:
try:
k = self._next_element()
except InvalidTorrentFileException:
return
if k == 'pieces':
v = self._pieces()
else:
v = self._next_element()
if k == 'encoding':
self._encoding = v
yield k, v
def _next_dict(self):
data = collections.OrderedDict() if self._use_ordered_dict else dict()
for key, element in self._dict_items_generator():
data[key] = element
return data
def _list_items_generator(self):
while True:
try:
element = self._next_element()
except InvalidTorrentFileException:
return
yield element
def _next_list(self):
return [element for element in self._list_items_generator()]
def _next_int(self, end=END_INDICATOR):
value = 0
char = self._read_byte(1)
while char != end:
# noinspection PyTypeChecker
if not b'0' <= char <= b'9':
raise InvalidTorrentFileException(self._pos)
value = value * 10 + int(char) - int(b'0')
char = self._read_byte(1)
return value
def _next_string(self, decode=True):
length = self._next_int(b':')
raw = self._read_byte(length)
if decode:
string = raw.decode(self._encoding)
return string
return raw
@staticmethod
def __to_hex(v):
return hex(ord(v) if isinstance(v, str) else v)[2:].rjust(2, str(0))
def _pieces(self):
raw = self._next_string(decode=False)
if len(raw) % 20 != 0:
raise InvalidTorrentFileException(self._pos)
return [
''.join([self.__to_hex(c) for c in h])
for h in (raw[x:x+20] for x in range(0, len(raw), 20))
]
def _next_end(self):
raise InvalidTorrentFileException(self._pos)
def _next_type(self):
for (element_type, indicator) in self.TYPES:
indicator_length = len(indicator)
char = self._read_byte(indicator_length)
if indicator == char:
return element_type
self._seek_back(indicator_length)
raise InvalidTorrentFileException(self._pos)
def _type_to_func(self, t):
return getattr(self, '_next_' + t)
def _next_element(self):
element_type = self._next_type()
element = self._type_to_func(element_type)()
return element
def parse_torrent_file(filename, use_ordered_dict=False):
"""
Shortcut function for parse torrent object use TorrentFileParser
:param string filename: torrent filename
:param bool use_ordered_dict: see :any:`TorrentFileParser.__init__`
:rtype: dict if ``use_ordered_dict`` is false,
collections.OrderedDict otherwise
"""
with open(filename, 'rb') as f:
return TorrentFileParser(f, use_ordered_dict).parse()
def __main():
parser = argparse.ArgumentParser()
parser.add_argument('file', nargs='?', default='',
help='input file, will read form stdin if empty')
parser.add_argument('--dict', '-d', action='store_true', default=False,
help='use built-in dict, default will be OrderedDict')
parser.add_argument('--sort', '-s', action='store_true', default=False,
help='sort output json item by key')
parser.add_argument('--indent', '-i', type=int, default=None,
help='json output indent for every inner level')
parser.add_argument('--ascii', '-a', action='store_true', default=False,
help='ensure output json use ascii char, '
'escape other char use \\u')
parser.add_argument('--version', '-v', action='store_true', default=False,
help='print version and exit')
args = parser.parse_args()
if args.version:
print(__version__)
exit(0)
try:
if args.file == '':
target_file = io.BytesIO(
getattr(sys.stdin, 'buffer', sys.stdin).read()
)
else:
target_file = open(args.file, 'rb')
except FileNotFoundError:
sys.stderr.write('Unable to find file {}\n'.format(args.file))
exit(1)
# noinspection PyUnboundLocalVariable
data = TorrentFileParser(target_file, not args.dict).parse()
data = json.dumps(
data, ensure_ascii=args.ascii,
sort_keys=args.sort, indent=args.indent
)
print(data)
if __name__ == '__main__':
__main()