From 279c8fc54d31a2f86d0966c5a87632f50765b369 Mon Sep 17 00:00:00 2001 From: 7sDream <7seconddream@gmail.com> Date: Tue, 23 May 2017 13:35:40 +0800 Subject: [PATCH] init --- .gitignore | 67 ++++++++++++ LICENSE | 21 ++++ MANIFEST.in | 3 + README.md | 50 +++++++++ setup.cfg | 6 ++ setup.py | 41 ++++++++ test.py | 49 +++++++++ test.torrent | 2 + torrent_parser.py | 262 ++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 501 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 test.py create mode 100644 test.torrent create mode 100644 torrent_parser.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f3bbb0a --- /dev/null +++ b/.gitignore @@ -0,0 +1,67 @@ + +# Editor +.idea/ +.vscode/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +#Ipython Notebook +.ipynb_checkpoints diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a4edad4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017 7sDream + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..13a538d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.md LICENSE changelog.md +include test.py +include test.torrent diff --git a/README.md b/README.md new file mode 100644 index 0000000..30694dc --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# Torrent file parser for Python + +## Install + +``` +pip install torret_parser +``` + +## Usage: + +### CLI + +``` +$ pytp test.torrent +``` + +``` +$ cat test.torrent | pytp +``` + +![][screenshots-help] + +![][screenshots-normal] + +![][screenshots-indent] + + +### As a module + +```pycon +>>> import torrent_parser as tp +>>> data = tp.parse_torrent_file('test.torrent') +>>> print(data['announce']) +http://tracker.trackerfix.com:80/announce +``` + +## Test + +```bash +python -m unittest test +``` + +## LICENSE + +See [License][LICENSE]. + +[screenshots-help]: http://rikka-10066868.image.myqcloud.com/7c23f6d0-b23f-4c57-be93-d37fafe3292a.png +[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png +[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png +[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..d5df43f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[metadata] +description-file = README.md +license-file = LICENSE + +[bdist_wheel] +universal = 1 diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..0e625b7 --- /dev/null +++ b/setup.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# coding=utf-8 + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +import torrent_parser + +setup( + name='torrent_parser', + keywords=['file', 'torrent', 'JSON', 'parser'], + version=torrent_parser.__version__, + py_modules=['torrent_parser'], + url='https://github.com/7sDream/torrent_parser', + license='MIT', + author='7sDream', + author_email='7seconddream@gmail.com', + description='A .torrent file parser for both Python 2 and 3', + install_requires=[], + entry_points={ + 'console_scripts': ['pytp=torrent_parser:__main'] + }, + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: End Users/Desktop', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Multimedia', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Utilities', + ] +) diff --git a/test.py b/test.py new file mode 100644 index 0000000..9add257 --- /dev/null +++ b/test.py @@ -0,0 +1,49 @@ +from __future__ import unicode_literals + +import unittest +import collections + +from torrent_parser import TorrentFileParser, parse_torrent_file + + +class Test(unittest.TestCase): + TEST_FILENAME = 'test.torrent' + + def test_parse_torrent_file_use_shortcut(self): + parse_torrent_file(self.TEST_FILENAME) + + def test_parse_torrent_file_use_class(self): + with open(self.TEST_FILENAME, 'rb') as fp: + TorrentFileParser(fp).parse() + + def test_parse_torrent_file_to_ordered_dict(self): + data = parse_torrent_file(self.TEST_FILENAME, True) + self.assertIsInstance(data, collections.OrderedDict) + + with open(self.TEST_FILENAME, 'rb') as fp: + data = TorrentFileParser(fp, True).parse() + self.assertIsInstance(data, collections.OrderedDict) + + def test_parse_correctness(self): + data = parse_torrent_file(self.TEST_FILENAME) + self.assertIn(['udp://p4p.arenabg.ch:1337/announce'], + data['announce-list']) + self.assertEqual(data['comment'], + 'Torrent downloaded from https://rarbg.to') + self.assertEqual(data['creation date'], 1472762993) + + def test_parse_two_times(self): + with open(self.TEST_FILENAME, 'rb') as fp: + parser = TorrentFileParser(fp) + data = parser.parse() + self.assertIn(['udp://p4p.arenabg.ch:1337/announce'], + data['announce-list']) + self.assertEqual(data['comment'], + 'Torrent downloaded from https://rarbg.to') + self.assertEqual(data['creation date'], 1472762993) + data = parser.parse() + self.assertIn(['udp://p4p.arenabg.ch:1337/announce'], + data['announce-list']) + self.assertEqual(data['comment'], + 'Torrent downloaded from https://rarbg.to') + self.assertEqual(data['creation date'], 1472762993) diff --git a/test.torrent b/test.torrent new file mode 100644 index 0000000..fc49690 --- /dev/null +++ b/test.torrent @@ -0,0 +1,2 @@ +d8:announce41:http://tracker.trackerfix.com:80/announce13:announce-listll41:http://tracker.trackerfix.com:80/announceel30:udp://9.rarbg.me:2710/announceel30:udp://9.rarbg.to:2710/announceel43:udp://tracker.coppersurfer.tk:6969/announceel34:udp://glotorrents.pw:6969/announceel40:udp://tracker.trackerfix.com:80/announceel40:udp://inferno.demonoid.ooo:3392/announceel34:udp://p4p.arenabg.ch:1337/announceel30:udp://9.rarbg.me:2710/announceel30:udp://9.rarbg.to:2710/announceel38:udp://torrent.gresille.org:80/announceel35:http://retracker.krs-ix.ru/announceel34:http://mgtracker.org:2710/announceel30:http://thetracker.org/announceel33:http://explodie.org:6969/announceee7:comment40:Torrent downloaded from https://rarbg.to10:created by13:uTorrent/221013:creation datei1472762993e8:encoding5:UTF-84:infod5:filesld6:lengthi505957e4:pathl83:Streaming, Sharing, Stealing - Big Data and the Future of Entertainment (2016).epubeed6:lengthi36e4:pathl16:Come Join Us.txteee4:name92:Streaming, Sharing, Stealing - Big Data and the Future of Entertainment (2016) (Epub) Gooner12:piece lengthi65536e6:pieces160:d\6Z)l0a PZ6HV'",f{93G20Ooj/v%YGVރW&Yo=-5-rCb,B,wǗf{PJ|C/+ ++ B+Ӡ!ЧTO0pSDl9238Aee \ No newline at end of file diff --git a/torrent_parser.py b/torrent_parser.py new file mode 100644 index 0000000..9e41049 --- /dev/null +++ b/torrent_parser.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +A .torrent file parser for both Python 2 and 3 + +Usage: + + data = parse_torrent_file(filename) + + # or + + with open(filename, 'rb') as f: # the binary mode 'b' is necessary + data = TorrentFileParser(f).parse() +""" + +from __future__ import print_function, unicode_literals + +import argparse +import collections +import io +import json +import sys + + +__all__ = [ + 'InvalidTorrentFileException', + 'parse_torrent_file', + 'TorrentFileParser', +] + +__version__ = '0.1.0' + + +class InvalidTorrentFileException(Exception): + def __init__(self, pos, msg=None): + msg = msg or "Invalid torrent format when reading at pos " + str(pos) + super(InvalidTorrentFileException, self).__init__(msg) + + +class TorrentFileParser(object): + + TYPE_LIST = 'list' + TYPE_DICT = 'dict' + TYPE_INT = 'int' + TYPE_STRING = 'string' + TYPE_END = 'end' + + LIST_INDICATOR = b'l' + DICT_INDICATOR = b'd' + INT_INDICATOR = b'i' + END_INDICATOR = b'e' + STRING_INDICATOR = b'' + + TYPES = [ + (TYPE_LIST, LIST_INDICATOR), + (TYPE_DICT, DICT_INDICATOR), + (TYPE_INT, INT_INDICATOR), + (TYPE_END, END_INDICATOR), + (TYPE_STRING, STRING_INDICATOR), + ] + + def __init__(self, fp, use_ordered_dict=False, encoding='utf-8'): + """ + :param fp: a **binary** file-like object to parse, + which means need 'b' mode when use built-in open function + :param encoding: file content encoding, default utf-8 + :param use_ordered_dict: Use collections.OrderedDict as dict container + default False, which mean use built-in dict + """ + if getattr(fp, 'read', ) is None \ + or getattr(fp, 'seek') is None: + raise ValueError('Argument fp needs a file like object') + + self._pos = 0 + self._encoding = encoding + self._content = fp + self._use_ordered_dict = use_ordered_dict + + def parse(self): + """ + :return: the parse result + :type: depends on ``use_ordered_dict`` option when init the parser + see :any:`TorrentFileParser.__init__` + """ + self._restart() + data = self._next_element() + + try: + c = self._read_byte(1, True) + raise InvalidTorrentFileException( + 0, 'Expect EOF, but get [{}] at pos {}'.format(c, self._pos) + ) + except EOFError: # expect EOF + pass + + if isinstance(data, dict): + return data + + raise InvalidTorrentFileException('Outermost element is not a dict') + + def _read_byte(self, count=1, raise_eof=False): + assert count >= 0 + gotten = self._content.read(count) + if count != 0 and len(gotten) == 0: + if raise_eof: + raise EOFError() + raise InvalidTorrentFileException( + self._pos, + 'Unexpected EOF when reading torrent file' + ) + self._pos += count + return gotten + + def _seek_back(self, count): + self._content.seek(-count, 1) + + def _restart(self): + self._content.seek(0, 0) + self._pos = 0 + + def _dict_items_generator(self): + while True: + try: + k = self._next_element() + except InvalidTorrentFileException: + return + if k == 'pieces': + v = self._pieces() + else: + v = self._next_element() + if k == 'encoding': + self._encoding = v + yield k, v + + def _next_dict(self): + data = collections.OrderedDict() if self._use_ordered_dict else dict() + for key, element in self._dict_items_generator(): + data[key] = element + return data + + def _list_items_generator(self): + while True: + try: + element = self._next_element() + except InvalidTorrentFileException: + return + yield element + + def _next_list(self): + return [element for element in self._list_items_generator()] + + def _next_int(self, end=END_INDICATOR): + value = 0 + char = self._read_byte(1) + while char != end: + # noinspection PyTypeChecker + if not b'0' <= char <= b'9': + raise InvalidTorrentFileException(self._pos) + value = value * 10 + int(char) - int(b'0') + char = self._read_byte(1) + return value + + def _next_string(self, decode=True): + length = self._next_int(b':') + raw = self._read_byte(length) + if decode: + string = raw.decode(self._encoding) + return string + return raw + + @staticmethod + def __to_hex(v): + return hex(ord(v) if isinstance(v, str) else v)[2:].rjust(2, str(0)) + + def _pieces(self): + raw = self._next_string(decode=False) + if len(raw) % 20 != 0: + raise InvalidTorrentFileException(self._pos) + return [ + ''.join([self.__to_hex(c) for c in h]) + for h in (raw[x:x+20] for x in range(0, len(raw), 20)) + ] + + def _next_end(self): + raise InvalidTorrentFileException(self._pos) + + def _next_type(self): + for (element_type, indicator) in self.TYPES: + indicator_length = len(indicator) + char = self._read_byte(indicator_length) + if indicator == char: + return element_type + self._seek_back(indicator_length) + raise InvalidTorrentFileException(self._pos) + + def _type_to_func(self, t): + return getattr(self, '_next_' + t) + + def _next_element(self): + element_type = self._next_type() + element = self._type_to_func(element_type)() + return element + + +def parse_torrent_file(filename, use_ordered_dict=False): + """ + Shortcut function for parse torrent object use TorrentFileParser + + :param string filename: torrent filename + :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` + :rtype: dict if ``use_ordered_dict`` is false, + collections.OrderedDict otherwise + """ + with open(filename, 'rb') as f: + return TorrentFileParser(f, use_ordered_dict).parse() + + +def __main(): + parser = argparse.ArgumentParser() + parser.add_argument('file', nargs='?', default='', + help='input file, will read form stdin if empty') + parser.add_argument('--dict', '-d', action='store_true', default=False, + help='use built-in dict, default will be OrderedDict') + parser.add_argument('--sort', '-s', action='store_true', default=False, + help='sort output json item by key') + parser.add_argument('--indent', '-i', type=int, default=None, + help='json output indent for every inner level') + parser.add_argument('--ascii', '-a', action='store_true', default=False, + help='ensure output json use ascii char, ' + 'escape other char use \\u') + parser.add_argument('--version', '-v', action='store_true', default=False, + help='print version and exit') + args = parser.parse_args() + + if args.version: + print(__version__) + exit(0) + + try: + if args.file == '': + target_file = io.BytesIO( + getattr(sys.stdin, 'buffer', sys.stdin).read() + ) + else: + target_file = open(args.file, 'rb') + except FileNotFoundError: + sys.stderr.write('Unable to find file {}\n'.format(args.file)) + exit(1) + + # noinspection PyUnboundLocalVariable + data = TorrentFileParser(target_file, not args.dict).parse() + + data = json.dumps( + data, ensure_ascii=args.ascii, + sort_keys=args.sort, indent=args.indent + ) + + print(data) + +if __name__ == '__main__': + __main()