Many changes, see bellow:

- Added
  - BEncoder
  - BDecoder
  - encode
  - decode
  - errors argument for TorrentFileParser and parse_torrent_file (#4)
- Changed
  - TorrentFileCreator rename to BEncoder
  - TorrentFileParser, BEncoder don't need outmost level is dict
- Updated
  - README
  - CHANGELOG
  - LICENSE

and tests for above changes.
dev
7sDream 2018-06-22 22:42:15 +08:00
parent 94b8f1eee0
commit ee3128b32b
12 changed files with 206 additions and 54 deletions

View File

@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased] ## [Unreleased]
### Added
- Add `errors` option in `TorrentFileParser` and `parse_torrent_file` to let user set the encoding error handler. (Thanks [@yasuotakei](https://github.com/yasuotakei))
- Add `-e`/`--error` to CLI option to set the `errors` option of `parse_torrent_file`.
- `BDecoder` class and `decode` shortcut function to directly decode bytes.
- `decode` shortcut function to directly encode data to bytes.
### Changed
- **BreakChange** `TorrentFileCreator` rename to `BEncoder` as the origin name don't describe its function.
- `TorrentFileParser` don't need the outmost level of parsed data to be a `dict` now
- `BEncoder` don't need the outmost level of encoded data to be a `dict` now
## [0.2.0] - 2018.5.25 ## [0.2.0] - 2018.5.25
### Change ### Change
@ -17,7 +30,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Added ### Added
- `TorrentFileCreator` class and `create_torrent_file` shortcut function for write back data to a torrent file - `TorrentFileCreator` class and `create_torrent_file` shortcut function for write back data to a torrent file.
## [0.1.4] - 2018-04-06 ## [0.1.4] - 2018-04-06

View File

@ -1,6 +1,6 @@
The MIT License (MIT) The MIT License (MIT)
Copyright (c) 2017 7sDream Copyright (c) 2017 - 2018 7sDream
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@ -4,6 +4,15 @@ A simple parser for `.torrent` file.
Can also edit and write back to torrent format after version 0.2.0. Can also edit and write back to torrent format after version 0.2.0.
## Features
- Decoder and encoder for torrent files
- Auto decode bytes field to string with used specified encoding and error handler
- Auto detect encoding when use `auto` as encoding(need `chardet` installed)
- Auto decode hash value filed to hash blocks
- Uniform exception type
- CLI provided, with JSON output
## Install ## Install
``` ```
@ -34,12 +43,23 @@ $ cat test.torrent | pytp
```pycon ```pycon
>>> import torrent_parser as tp >>> import torrent_parser as tp
>>> data = tp.parse_torrent_file('test.torrent') >>> data = tp.parse_torrent_file('test.torrent')
>>> print(data['announce']) >>> data['announce']
http://tracker.trackerfix.com:80/announce http://tracker.trackerfix.com:80/announce
>>> data['announce'] = 'http://127.0.0.1:12345' >>> data['announce'] = 'http://127.0.0.1:12345'
>>> tp.create_torrent_file('new.torrent', data) >>> tp.create_torrent_file('new.torrent', data)
``` ```
or you don't operate with file, just raw bytes:
```pycon
>>> import torrent_parser as tp
>>> data = tp.decode(b'd3:negi-1ee')
>>> data['neg']
-1
>>> tp.encode(data)
b'd3:negi-1ee'
```
## Test ## Test
```bash ```bash
@ -58,4 +78,4 @@ See [License][LICENSE].
[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png [screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png
[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png [screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png
[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE [LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE
[CHANGELOG]: https://github.com/7sDream/torrent_parser/blob/master/CHANGELOG.md [CHANGELOG]: https://github.com/7sDream/torrent_parser/blob/master/CHANGELOG.md

View File

@ -1,2 +1,5 @@
from .test_create import * from .test_create import *
from .test_parse import * from .test_parse import *
from .test_encoding_error import *
from .test_encode import *
from .test_decode import *

View File

@ -6,7 +6,7 @@ import io
import os.path import os.path
import unittest import unittest
from torrent_parser import TorrentFileParser, TorrentFileCreator from torrent_parser import TorrentFileParser, BEncoder
class TestCreate(unittest.TestCase): class TestCreate(unittest.TestCase):
@ -17,15 +17,19 @@ class TestCreate(unittest.TestCase):
data = collections.OrderedDict() data = collections.OrderedDict()
data['a'] = 1 data['a'] = 1
data['b'] = 2 data['b'] = 2
self.assertEqual(TorrentFileCreator(data).encode(), b'd1:ai1e1:bi2ee') self.assertEqual(BEncoder(data).encode(), b'd1:ai1e1:bi2ee')
def test_same_output_if_no_edit(self): def test_same_output_if_no_edit(self):
with open(self.REAL_FILE, 'rb') as fp: with open(self.REAL_FILE, 'rb') as fp:
in_data = fp.read() in_data = fp.read()
data = TorrentFileParser(io.BytesIO(in_data), True).parse() data = TorrentFileParser(io.BytesIO(in_data), True).parse()
out_data = TorrentFileCreator(data).encode() out_data = BEncoder(data).encode()
m1 = hashlib.md5() m1 = hashlib.md5()
m1.update(in_data) m1.update(in_data)
m2 = hashlib.md5() m2 = hashlib.md5()
m2.update(out_data) m2.update(out_data)
self.assertEqual(m1.digest(), m2.digest()) self.assertEqual(m1.digest(), m2.digest())
def test_dont_need_dict_outmost(self):
data = 123456
self.assertEqual(BEncoder(data).encode(), b'i123456e')

View File

@ -0,0 +1,11 @@
from __future__ import unicode_literals
import unittest
from torrent_parser import decode
class TestDecode(unittest.TestCase):
def test_decode(self):
self.assertEqual(decode(b'i12345e'), 12345)

View File

@ -0,0 +1,11 @@
from __future__ import unicode_literals
import unittest
from torrent_parser import encode
class TestEncode(unittest.TestCase):
def test_encode(self):
self.assertEqual(encode(12345), b'i12345e')

View File

@ -0,0 +1,25 @@
from __future__ import unicode_literals
import os.path
import unittest
from torrent_parser import (
TorrentFileParser, parse_torrent_file, InvalidTorrentDataException
)
class TestDecodingError(unittest.TestCase):
TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files')
FILE = os.path.join(TEST_FILES_DIR, 'utf8.encoding.error.torrent')
def test_default_option_will_raise_exception(self):
with self.assertRaises(InvalidTorrentDataException):
parse_torrent_file(self.FILE)
with self.assertRaises(InvalidTorrentDataException):
with open(self.FILE, 'rb') as f:
TorrentFileParser(f).parse()
def test_not_raise_exception_when_use_ignore(self):
parse_torrent_file(self.FILE, errors='ignore')
with open(self.FILE, 'rb') as f:
TorrentFileParser(f, errors='ignore').parse()

View File

@ -0,0 +1 @@
8:announce

Binary file not shown.

View File

@ -11,6 +11,7 @@ class TestParse(unittest.TestCase):
TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files') TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files')
REAL_FILE = os.path.join(TEST_FILES_DIR, 'real.torrent') REAL_FILE = os.path.join(TEST_FILES_DIR, 'real.torrent')
NEG_FILE = os.path.join(TEST_FILES_DIR, 'neg.torrent') NEG_FILE = os.path.join(TEST_FILES_DIR, 'neg.torrent')
STRING_FILE = os.path.join(TEST_FILES_DIR, 'outmost.string.torrent')
def test_parse_torrent_file_use_shortcut(self): def test_parse_torrent_file_use_shortcut(self):
parse_torrent_file(self.REAL_FILE) parse_torrent_file(self.REAL_FILE)
@ -53,6 +54,10 @@ class TestParse(unittest.TestCase):
data = parse_torrent_file(self.NEG_FILE) data = parse_torrent_file(self.NEG_FILE)
self.assertEqual(data['neg'], -1) self.assertEqual(data['neg'], -1)
def test_dont_need_dict_outmost(self):
data = parse_torrent_file(self.STRING_FILE)
self.assertEqual(data, 'announce')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

153
torrent_parser.py 100644 → 100755
View File

@ -62,11 +62,14 @@ except NameError:
str_type = str str_type = str
__all__ = [ __all__ = [
'InvalidTorrentDataException',
'BEncoder',
'BDecoder',
'encode',
'decode',
'TorrentFileParser',
'create_torrent_file', 'create_torrent_file',
'parse_torrent_file', 'parse_torrent_file',
'InvalidTorrentDataException',
'TorrentFileCreator',
'TorrentFileParser',
] ]
__version__ = '0.2.0' __version__ = '0.2.0'
@ -105,7 +108,7 @@ class TorrentFileParser(object):
STRING_INDICATOR = b'' STRING_INDICATOR = b''
STRING_DELIMITER = b':' STRING_DELIMITER = b':'
RAW_FIELD_PARAMS = { HASH_FIELD_PARAMS = {
# field length need_list # field length need_list
'pieces': (20, True), 'pieces': (20, True),
'ed2k': (16, False), 'ed2k': (16, False),
@ -120,14 +123,18 @@ class TorrentFileParser(object):
(TYPE_STRING, STRING_INDICATOR), (TYPE_STRING, STRING_INDICATOR),
] ]
def __init__(self, fp, use_ordered_dict=False, encoding='utf-8'): def __init__(
self, fp, use_ordered_dict=False, encoding='utf-8', errors='strict'
):
""" """
:param fp: a **binary** file-like object to parse, :param fp: a **binary** file-like object to parse,
which means need 'b' mode when use built-in open function which means need 'b' mode when use built-in open function
:param encoding: file content encoding, default utf-8, use 'auto' to :param bool use_ordered_dict: Use collections.OrderedDict as dict
enable charset auto detection ('chardet' package should be installed) container default False, which mean use built-in dict
:param use_ordered_dict: Use collections.OrderedDict as dict container :param string encoding: file content encoding, default utf-8, use 'auto'
default False, which mean use built-in dict to enable charset auto detection (need 'chardet' package installed)
:param string errors: how to deal with encoding error when try to parse
string from content with ``encoding``
""" """
if getattr(fp, 'read', ) is None \ if getattr(fp, 'read', ) is None \
or getattr(fp, 'seek') is None: or getattr(fp, 'seek') is None:
@ -137,12 +144,14 @@ class TorrentFileParser(object):
self._encoding = encoding self._encoding = encoding
self._content = fp self._content = fp
self._use_ordered_dict = use_ordered_dict self._use_ordered_dict = use_ordered_dict
self._error_handler = errors
def parse(self): def parse(self):
""" """
:return: the parse result :return: the parse result
:type: depends on ``use_ordered_dict`` option when init the parser :rtype: dict|list|int|string
see :any:`TorrentFileParser.__init__` :raise: :any:`InvalidTorrentDataException` when parse failed or error
happened when decode string using specified encoding
""" """
self._restart() self._restart()
data = self._next_element() data = self._next_element()
@ -155,10 +164,7 @@ class TorrentFileParser(object):
except EOFError: # expect EOF except EOFError: # expect EOF
pass pass
if isinstance(data, dict): return data
return data
raise InvalidTorrentDataException('Outermost element is not a dict')
def _read_byte(self, count=1, raise_eof=False): def _read_byte(self, count=1, raise_eof=False):
assert count >= 0 assert count >= 0
@ -186,9 +192,8 @@ class TorrentFileParser(object):
k = self._next_element() k = self._next_element()
if k is _END: if k is _END:
return return
if k in self.RAW_FIELD_PARAMS: if k in self.HASH_FIELD_PARAMS:
length, need_list = self.RAW_FIELD_PARAMS[k] v = self._next_hash(*self.HASH_FIELD_PARAMS[k])
v = self._next_hash(length, need_list)
else: else:
v = self._next_element() v = self._next_element()
if k == 'encoding': if k == 'encoding':
@ -225,25 +230,26 @@ class TorrentFileParser(object):
char = self._read_byte(1) char = self._read_byte(1)
return -value if neg else value return -value if neg else value
def _next_string(self, decode=True): def _next_string(self, need_decode=True):
length = self._next_int(self.STRING_DELIMITER) length = self._next_int(self.STRING_DELIMITER)
raw = self._read_byte(length) raw = self._read_byte(length)
if decode: if need_decode:
encoding = self._encoding encoding = self._encoding
if encoding == 'auto': if encoding == 'auto':
encoding = detect(raw) self.encoding = encoding = detect(raw)
try: try:
string = raw.decode(encoding, "ignore") string = raw.decode(encoding, self._error_handler)
except UnicodeDecodeError as e: except UnicodeDecodeError as e:
raise InvalidTorrentDataException( raise InvalidTorrentDataException(
self._pos - length + e.start, self._pos - length + e.start,
"Fail to decode string at pos {pos} using " + e.encoding "Fail to decode string at pos {pos} using encoding " +
e.encoding
) )
return string return string
return raw return raw
def _next_hash(self, p_len, need_list): def _next_hash(self, p_len, need_list):
raw = self._next_string(decode=False) raw = self._next_string(need_decode=False)
if len(raw) % p_len != 0: if len(raw) % p_len != 0:
raise InvalidTorrentDataException( raise InvalidTorrentDataException(
self._pos - len(raw), "Hash bit length not match at pos {pos}" self._pos - len(raw), "Hash bit length not match at pos {pos}"
@ -280,7 +286,7 @@ class TorrentFileParser(object):
return element return element
class TorrentFileCreator(object): class BEncoder(object):
TYPES = { TYPES = {
(dict,): TorrentFileParser.TYPE_DICT, (dict,): TorrentFileParser.TYPE_DICT,
@ -291,27 +297,25 @@ class TorrentFileCreator(object):
def __init__(self, data, encoding='utf-8'): def __init__(self, data, encoding='utf-8'):
""" """
:param data: torrent data, must be a dict or OrderedDict :param dict|list|int|string data: data will be encoded
:param encoding: string field output encoding :param string encoding: string field output encoding
""" """
if not isinstance(data, dict):
raise InvalidTorrentDataException(
None,
"Top level structure should be a dict"
)
self._data = data self._data = data
self._encoding = encoding self._encoding = encoding
def encode(self): def encode(self):
""" """
Encode data to bytes that conform to torrent file format Encode to bytes
:rtype: bytes
""" """
return b''.join(self._output_element(self._data)) return b''.join(self._output_element(self._data))
def encode_to_readable(self): def encode_to_filelike(self):
""" """
Encode data to a file-like(BytesIO) object which contains the result of Encode to a file-like(BytesIO) object
`TorrentFileCreator.encode()`
:rtype: BytesIO
""" """
return io.BytesIO(self.encode()) return io.BytesIO(self.encode())
@ -364,7 +368,7 @@ class TorrentFileCreator(object):
) )
for x in self._output_element(k): for x in self._output_element(k):
yield x yield x
if k in TorrentFileParser.RAW_FIELD_PARAMS: if k in TorrentFileParser.HASH_FIELD_PARAMS:
for x in self._output_decode_hash(v): for x in self._output_decode_hash(v):
yield x yield x
else: else:
@ -393,29 +397,79 @@ class TorrentFileCreator(object):
) )
def parse_torrent_file(filename, use_ordered_dict=False): class BDecoder(object):
def __init__(
self, data, use_ordered_dict=False, encoding='utf-8', errors='strict'
):
"""
:param bytes data: raw data to be decoded
:param bool use_ordered_dict: see :any:`TorrentFileParser.__init__`
:param string encoding: see :any:`TorrentFileParser.__init__`
:param string errors: see :any:`TorrentFileParser.__init__`
"""
self._data = bytes(data)
self._use_ordered_dict = use_ordered_dict
self._encoding = encoding
self._errors = errors
def decode(self):
return TorrentFileParser(
io.BytesIO(self._data), self._use_ordered_dict, self._encoding,
self._errors,
).parse()
def encode(data, encoding='utf-8'):
"""
Shortcut function for encode python object to torrent file format(bencode)
:param dict|list|int|string data: data to be encoded
:param string encoding: see :any:`TorrentFileParser.__init__`
:rtype: bytes
"""
return BEncoder(data, encoding).encode()
def decode(data, use_ordered_dict=False, encoding='utf-8', errors='strict'):
"""
Shortcut function for decode bytes as torrent file format(bencode) to python
object
:param bytes data: raw data to be decoded
:param bool use_ordered_dict: see :any:`TorrentFileParser.__init__`
:param string encoding: see :any:`TorrentFileParser.__init__`
:param string errors: see :any:`TorrentFileParser.__init__`
:rtype: dict|list|int|string
"""
return BDecoder(data, use_ordered_dict, encoding, errors).decode()
def parse_torrent_file(
filename, use_ordered_dict=False, encoding='utf-8', errors='strict',
):
""" """
Shortcut function for parse torrent object using TorrentFileParser Shortcut function for parse torrent object using TorrentFileParser
:param string filename: torrent filename :param string filename: torrent filename
:param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__`
:rtype: dict if ``use_ordered_dict`` is false, :param string encoding: see :any:`TorrentFileParser.__init__`
collections.OrderedDict otherwise :param string errors: see :any:`TorrentFileParser.__init__`
:rtype: dict|list|int|string
""" """
with open(filename, 'rb') as f: with open(filename, 'rb') as f:
return TorrentFileParser(f, use_ordered_dict).parse() return TorrentFileParser(f, use_ordered_dict, encoding, errors).parse()
def create_torrent_file(filename, data, encoding='utf-8'): def create_torrent_file(filename, data, encoding='utf-8'):
""" """
Shortcut function for create a torrent file using TorrentFileCreator Shortcut function for create a torrent file using TorrentFileCreator
:param filename: output torrent filename :param string filename: output torrent filename
:param data: torrent data, must be a dict or OrderedDict :param dict|list|int|string data: torrent data
:param encoding: string field output encoding :param string encoding: string field output encoding
""" """
with open(filename, 'wb') as f: with open(filename, 'wb') as f:
f.write(TorrentFileCreator(data, encoding).encode()) f.write(BEncoder(data, encoding).encode())
def __main(): def __main():
@ -432,7 +486,10 @@ def __main():
help='ensure output json use ascii char, ' help='ensure output json use ascii char, '
'escape other char use \\u') 'escape other char use \\u')
parser.add_argument('--coding', '-c', default='utf-8', parser.add_argument('--coding', '-c', default='utf-8',
help='string encoding, default utf-8') help='string encoding, default "utf-8"')
parser.add_argument('--errors', '-e', default='strict',
help='decoding error handler, default "strict", you can'
' use "ignore" or "replace" to avoid exception')
parser.add_argument('--version', '-v', action='store_true', default=False, parser.add_argument('--version', '-v', action='store_true', default=False,
help='print version and exit') help='print version and exit')
args = parser.parse_args() args = parser.parse_args()
@ -453,7 +510,9 @@ def __main():
exit(1) exit(1)
# noinspection PyUnboundLocalVariable # noinspection PyUnboundLocalVariable
data = TorrentFileParser(target_file, not args.dict, args.coding).parse() data = TorrentFileParser(
target_file, not args.dict, args.coding, args.errors
).parse()
data = json.dumps( data = json.dumps(
data, ensure_ascii=args.ascii, data, ensure_ascii=args.ascii,