commit 279c8fc54d31a2f86d0966c5a87632f50765b369 Author: 7sDream <7seconddream@gmail.com> Date: Tue May 23 13:35:40 2017 +0800 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f3bbb0a --- /dev/null +++ b/.gitignore @@ -0,0 +1,67 @@ + +# Editor +.idea/ +.vscode/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +#Ipython Notebook +.ipynb_checkpoints diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a4edad4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017 7sDream + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..13a538d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.md LICENSE changelog.md +include test.py +include test.torrent diff --git a/README.md b/README.md new file mode 100644 index 0000000..30694dc --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# Torrent file parser for Python + +## Install + +``` +pip install torret_parser +``` + +## Usage: + +### CLI + +``` +$ pytp test.torrent +``` + +``` +$ cat test.torrent | pytp +``` + +![][screenshots-help] + +![][screenshots-normal] + +![][screenshots-indent] + + +### As a module + +```pycon +>>> import torrent_parser as tp +>>> data = tp.parse_torrent_file('test.torrent') +>>> print(data['announce']) +http://tracker.trackerfix.com:80/announce +``` + +## Test + +```bash +python -m unittest test +``` + +## LICENSE + +See [License][LICENSE]. + +[screenshots-help]: http://rikka-10066868.image.myqcloud.com/7c23f6d0-b23f-4c57-be93-d37fafe3292a.png +[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png +[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png +[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..d5df43f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[metadata] +description-file = README.md +license-file = LICENSE + +[bdist_wheel] +universal = 1 diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..0e625b7 --- /dev/null +++ b/setup.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# coding=utf-8 + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +import torrent_parser + +setup( + name='torrent_parser', + keywords=['file', 'torrent', 'JSON', 'parser'], + version=torrent_parser.__version__, + py_modules=['torrent_parser'], + url='https://github.com/7sDream/torrent_parser', + license='MIT', + author='7sDream', + author_email='7seconddream@gmail.com', + description='A .torrent file parser for both Python 2 and 3', + install_requires=[], + entry_points={ + 'console_scripts': ['pytp=torrent_parser:__main'] + }, + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: End Users/Desktop', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Multimedia', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Utilities', + ] +) diff --git a/test.py b/test.py new file mode 100644 index 0000000..9add257 --- /dev/null +++ b/test.py @@ -0,0 +1,49 @@ +from __future__ import unicode_literals + +import unittest +import collections + +from torrent_parser import TorrentFileParser, parse_torrent_file + + +class Test(unittest.TestCase): + TEST_FILENAME = 'test.torrent' + + def test_parse_torrent_file_use_shortcut(self): + parse_torrent_file(self.TEST_FILENAME) + + def test_parse_torrent_file_use_class(self): + with open(self.TEST_FILENAME, 'rb') as fp: + TorrentFileParser(fp).parse() + + def test_parse_torrent_file_to_ordered_dict(self): + data = parse_torrent_file(self.TEST_FILENAME, True) + self.assertIsInstance(data, collections.OrderedDict) + + with open(self.TEST_FILENAME, 'rb') as fp: + data = TorrentFileParser(fp, True).parse() + self.assertIsInstance(data, collections.OrderedDict) + + def test_parse_correctness(self): + data = parse_torrent_file(self.TEST_FILENAME) + self.assertIn(['udp://p4p.arenabg.ch:1337/announce'], + data['announce-list']) + self.assertEqual(data['comment'], + 'Torrent downloaded from https://rarbg.to') + self.assertEqual(data['creation date'], 1472762993) + + def test_parse_two_times(self): + with open(self.TEST_FILENAME, 'rb') as fp: + parser = TorrentFileParser(fp) + data = parser.parse() + self.assertIn(['udp://p4p.arenabg.ch:1337/announce'], + data['announce-list']) + self.assertEqual(data['comment'], + 'Torrent downloaded from https://rarbg.to') + self.assertEqual(data['creation date'], 1472762993) + data = parser.parse() + self.assertIn(['udp://p4p.arenabg.ch:1337/announce'], + data['announce-list']) + self.assertEqual(data['comment'], + 'Torrent downloaded from https://rarbg.to') + self.assertEqual(data['creation date'], 1472762993) diff --git a/test.torrent b/test.torrent new file mode 100644 index 0000000..fc49690 --- /dev/null +++ b/test.torrent @@ -0,0 +1,2 @@ +d8:announce41:http://tracker.trackerfix.com:80/announce13:announce-listll41:http://tracker.trackerfix.com:80/announceel30:udp://9.rarbg.me:2710/announceel30:udp://9.rarbg.to:2710/announceel43:udp://tracker.coppersurfer.tk:6969/announceel34:udp://glotorrents.pw:6969/announceel40:udp://tracker.trackerfix.com:80/announceel40:udp://inferno.demonoid.ooo:3392/announceel34:udp://p4p.arenabg.ch:1337/announceel30:udp://9.rarbg.me:2710/announceel30:udp://9.rarbg.to:2710/announceel38:udp://torrent.gresille.org:80/announceel35:http://retracker.krs-ix.ru/announceel34:http://mgtracker.org:2710/announceel30:http://thetracker.org/announceel33:http://explodie.org:6969/announceee7:comment40:Torrent downloaded from https://rarbg.to10:created by13:uTorrent/221013:creation datei1472762993e8:encoding5:UTF-84:infod5:filesld6:lengthi505957e4:pathl83:Streaming, Sharing, Stealing - Big Data and the Future of Entertainment (2016).epubeed6:lengthi36e4:pathl16:Come Join Us.txteee4:name92:Streaming, Sharing, Stealing - Big Data and the Future of Entertainment (2016) (Epub) Gooner12:piece lengthi65536e6:pieces160:d\6Z)l0a PZ6HV'",f{93G20Ooj/v%YGVރW&Yo=-5-rCb,B,wǗf{PJ|C/+ ++ B+Ӡ!ЧTO0pSDl9238Aee \ No newline at end of file diff --git a/torrent_parser.py b/torrent_parser.py new file mode 100644 index 0000000..9e41049 --- /dev/null +++ b/torrent_parser.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +A .torrent file parser for both Python 2 and 3 + +Usage: + + data = parse_torrent_file(filename) + + # or + + with open(filename, 'rb') as f: # the binary mode 'b' is necessary + data = TorrentFileParser(f).parse() +""" + +from __future__ import print_function, unicode_literals + +import argparse +import collections +import io +import json +import sys + + +__all__ = [ + 'InvalidTorrentFileException', + 'parse_torrent_file', + 'TorrentFileParser', +] + +__version__ = '0.1.0' + + +class InvalidTorrentFileException(Exception): + def __init__(self, pos, msg=None): + msg = msg or "Invalid torrent format when reading at pos " + str(pos) + super(InvalidTorrentFileException, self).__init__(msg) + + +class TorrentFileParser(object): + + TYPE_LIST = 'list' + TYPE_DICT = 'dict' + TYPE_INT = 'int' + TYPE_STRING = 'string' + TYPE_END = 'end' + + LIST_INDICATOR = b'l' + DICT_INDICATOR = b'd' + INT_INDICATOR = b'i' + END_INDICATOR = b'e' + STRING_INDICATOR = b'' + + TYPES = [ + (TYPE_LIST, LIST_INDICATOR), + (TYPE_DICT, DICT_INDICATOR), + (TYPE_INT, INT_INDICATOR), + (TYPE_END, END_INDICATOR), + (TYPE_STRING, STRING_INDICATOR), + ] + + def __init__(self, fp, use_ordered_dict=False, encoding='utf-8'): + """ + :param fp: a **binary** file-like object to parse, + which means need 'b' mode when use built-in open function + :param encoding: file content encoding, default utf-8 + :param use_ordered_dict: Use collections.OrderedDict as dict container + default False, which mean use built-in dict + """ + if getattr(fp, 'read', ) is None \ + or getattr(fp, 'seek') is None: + raise ValueError('Argument fp needs a file like object') + + self._pos = 0 + self._encoding = encoding + self._content = fp + self._use_ordered_dict = use_ordered_dict + + def parse(self): + """ + :return: the parse result + :type: depends on ``use_ordered_dict`` option when init the parser + see :any:`TorrentFileParser.__init__` + """ + self._restart() + data = self._next_element() + + try: + c = self._read_byte(1, True) + raise InvalidTorrentFileException( + 0, 'Expect EOF, but get [{}] at pos {}'.format(c, self._pos) + ) + except EOFError: # expect EOF + pass + + if isinstance(data, dict): + return data + + raise InvalidTorrentFileException('Outermost element is not a dict') + + def _read_byte(self, count=1, raise_eof=False): + assert count >= 0 + gotten = self._content.read(count) + if count != 0 and len(gotten) == 0: + if raise_eof: + raise EOFError() + raise InvalidTorrentFileException( + self._pos, + 'Unexpected EOF when reading torrent file' + ) + self._pos += count + return gotten + + def _seek_back(self, count): + self._content.seek(-count, 1) + + def _restart(self): + self._content.seek(0, 0) + self._pos = 0 + + def _dict_items_generator(self): + while True: + try: + k = self._next_element() + except InvalidTorrentFileException: + return + if k == 'pieces': + v = self._pieces() + else: + v = self._next_element() + if k == 'encoding': + self._encoding = v + yield k, v + + def _next_dict(self): + data = collections.OrderedDict() if self._use_ordered_dict else dict() + for key, element in self._dict_items_generator(): + data[key] = element + return data + + def _list_items_generator(self): + while True: + try: + element = self._next_element() + except InvalidTorrentFileException: + return + yield element + + def _next_list(self): + return [element for element in self._list_items_generator()] + + def _next_int(self, end=END_INDICATOR): + value = 0 + char = self._read_byte(1) + while char != end: + # noinspection PyTypeChecker + if not b'0' <= char <= b'9': + raise InvalidTorrentFileException(self._pos) + value = value * 10 + int(char) - int(b'0') + char = self._read_byte(1) + return value + + def _next_string(self, decode=True): + length = self._next_int(b':') + raw = self._read_byte(length) + if decode: + string = raw.decode(self._encoding) + return string + return raw + + @staticmethod + def __to_hex(v): + return hex(ord(v) if isinstance(v, str) else v)[2:].rjust(2, str(0)) + + def _pieces(self): + raw = self._next_string(decode=False) + if len(raw) % 20 != 0: + raise InvalidTorrentFileException(self._pos) + return [ + ''.join([self.__to_hex(c) for c in h]) + for h in (raw[x:x+20] for x in range(0, len(raw), 20)) + ] + + def _next_end(self): + raise InvalidTorrentFileException(self._pos) + + def _next_type(self): + for (element_type, indicator) in self.TYPES: + indicator_length = len(indicator) + char = self._read_byte(indicator_length) + if indicator == char: + return element_type + self._seek_back(indicator_length) + raise InvalidTorrentFileException(self._pos) + + def _type_to_func(self, t): + return getattr(self, '_next_' + t) + + def _next_element(self): + element_type = self._next_type() + element = self._type_to_func(element_type)() + return element + + +def parse_torrent_file(filename, use_ordered_dict=False): + """ + Shortcut function for parse torrent object use TorrentFileParser + + :param string filename: torrent filename + :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` + :rtype: dict if ``use_ordered_dict`` is false, + collections.OrderedDict otherwise + """ + with open(filename, 'rb') as f: + return TorrentFileParser(f, use_ordered_dict).parse() + + +def __main(): + parser = argparse.ArgumentParser() + parser.add_argument('file', nargs='?', default='', + help='input file, will read form stdin if empty') + parser.add_argument('--dict', '-d', action='store_true', default=False, + help='use built-in dict, default will be OrderedDict') + parser.add_argument('--sort', '-s', action='store_true', default=False, + help='sort output json item by key') + parser.add_argument('--indent', '-i', type=int, default=None, + help='json output indent for every inner level') + parser.add_argument('--ascii', '-a', action='store_true', default=False, + help='ensure output json use ascii char, ' + 'escape other char use \\u') + parser.add_argument('--version', '-v', action='store_true', default=False, + help='print version and exit') + args = parser.parse_args() + + if args.version: + print(__version__) + exit(0) + + try: + if args.file == '': + target_file = io.BytesIO( + getattr(sys.stdin, 'buffer', sys.stdin).read() + ) + else: + target_file = open(args.file, 'rb') + except FileNotFoundError: + sys.stderr.write('Unable to find file {}\n'.format(args.file)) + exit(1) + + # noinspection PyUnboundLocalVariable + data = TorrentFileParser(target_file, not args.dict).parse() + + data = json.dumps( + data, ensure_ascii=args.ascii, + sort_keys=args.sort, indent=args.indent + ) + + print(data) + +if __name__ == '__main__': + __main()