Finish enhancement by issue #3

- Fixed: Support negative int
- Fixed: seek_back not move pos
- Added: Auto encoding
dev
7sDream 2018-04-06 13:41:51 +08:00
parent 817d816d7f
commit c2ce49250b
No known key found for this signature in database
GPG Key ID: 72A6D9FCEDDAB75D
7 changed files with 125 additions and 14 deletions

68
CHANGELOG.md 100644
View File

@ -0,0 +1,68 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- `encoding` option can be `auto`, which will use `chardet` package to decide which encoding to use. If `chardet` is noe installed, will raise a warning and fallback to 'utf-8'. (Thanks to [@ltfychrise])
- Add changelog.
### Change
- Reorganize test codes/files.
### Fixed
- Fix integer filed can't be negative bug. (Thanks to [@ltfychrise])
- Fix `_seek_back` method not make `_pos` back bug. (Thanks to [@ltfychrise])
## [0.1.3] - 2017-06-21
### Added
- Now `UnicodeDecodeError` is wrapped in `InvalidTorrentDataException`.
### Fixed
- Use `IOError` instead of `FileNotFoundError` in Python 2.
### Changed
- `InvalidTorrentFileException` rename to `InvalidTorrentDataException`.
## [0.1.2] - 2017-06-21
### Changed
- Emm, I don't know, I just changed the version code...
## [0.1.1] - 2017-06-20
### Added
- CLI add coding `--coding/-c` option for file string filed encoding.
### Changed
- `ed2k` and `filehash` field now use same structure as 'pieces'.
## [0.1.0] - 2017-05-23
### Added
- Parse torrent from file and data into a dict.
- CLI provided.
- Simple tests.
- Available on pip.
[@ltfychrise]: https://github.com/ltfychrise
[Unreleased]: https://github.com/7sDream/torrent_parser/compare/v0.1.3...HEAD
[0.1.3]: https://github.com/7sDream/torrent_parser/compare/v0.1.2...v0.1.3
[0.1.2]: https://github.com/7sDream/torrent_parser/compare/v0.1.1...v0.1.2
[0.1.1]: https://github.com/7sDream/torrent_parser/compare/v0.1.0...v0.1.1
[0.1.0]: https://github.com/7sDream/torrent_parser/tree/v0.1.0

View File

@ -37,9 +37,13 @@ http://tracker.trackerfix.com:80/announce
## Test
```bash
python -m unittest test
python -m unittest tests
```
## Changelog
See [Changelog][CHANGELOG].
## LICENSE
See [License][LICENSE].
@ -48,3 +52,4 @@ See [License][LICENSE].
[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png
[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png
[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE
[CHANGELOG]: https://github.com/7sDream/torrent_parser/blob/master/CHANGELOG.md

View File

@ -0,0 +1 @@
from .test_all import *

View File

@ -7,31 +7,36 @@ from torrent_parser import TorrentFileParser, parse_torrent_file
class Test(unittest.TestCase):
TEST_FILENAME = 'test.torrent'
REAL_FILE = 'tests/testfiles/real.torrent'
NEG_FILE = 'tests/testfiles/neg.torrent'
def test_parse_torrent_file_use_shortcut(self):
parse_torrent_file(self.TEST_FILENAME)
parse_torrent_file(self.REAL_FILE)
def test_parse_torrent_file_use_class(self):
with open(self.TEST_FILENAME, 'rb') as fp:
with open(self.REAL_FILE, 'rb') as fp:
TorrentFileParser(fp).parse()
def test_encoding_auto(self):
with open(self.REAL_FILE, 'rb') as fp:
TorrentFileParser(fp, encoding='auto').parse()
def test_parse_torrent_file_to_ordered_dict(self):
data = parse_torrent_file(self.TEST_FILENAME, True)
data = parse_torrent_file(self.REAL_FILE, True)
self.assertIsInstance(data, collections.OrderedDict)
with open(self.TEST_FILENAME, 'rb') as fp:
with open(self.REAL_FILE, 'rb') as fp:
data = TorrentFileParser(fp, True).parse()
self.assertIsInstance(data, collections.OrderedDict)
def test_parse_correctness(self):
data = parse_torrent_file(self.TEST_FILENAME)
data = parse_torrent_file(self.REAL_FILE)
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
data['announce-list'])
self.assertEqual(data['creation date'], 1409254242)
def test_parse_two_times(self):
with open(self.TEST_FILENAME, 'rb') as fp:
with open(self.REAL_FILE, 'rb') as fp:
parser = TorrentFileParser(fp)
data = parser.parse()
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
@ -41,3 +46,11 @@ class Test(unittest.TestCase):
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
data['announce-list'])
self.assertEqual(data['creation date'], 1409254242)
def test_int_is_negative(self):
data = parse_torrent_file(self.NEG_FILE)
self.assertEqual(data['neg'], -1)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1 @@
d3:negi-1ee

View File

@ -21,6 +21,7 @@ import collections
import io
import json
import sys
import warnings
try:
FileNotFoundError
@ -29,6 +30,14 @@ except NameError:
# noinspection PyShadowingBuiltins
FileNotFoundError = IOError
try:
# noinspection PyPackageRequirements
from chardet import detect as _detect
except ImportError:
def _detect(_):
warnings.warn("No chardet module installed, encoding will be utf-8")
return {'encoding': 'utf-8', 'confidence': 1}
__all__ = [
'InvalidTorrentDataException',
'parse_torrent_file',
@ -38,6 +47,10 @@ __all__ = [
__version__ = '0.1.3'
def detect(content):
return _detect(content)['encoding']
class InvalidTorrentDataException(Exception):
def __init__(self, pos, msg=None):
msg = msg or "Invalid torrent format when read at pos {pos}"
@ -48,6 +61,7 @@ class InvalidTorrentDataException(Exception):
class __EndCls(object):
pass
_END = __EndCls()
@ -77,7 +91,8 @@ class TorrentFileParser(object):
"""
:param fp: a **binary** file-like object to parse,
which means need 'b' mode when use built-in open function
:param encoding: file content encoding, default utf-8
:param encoding: file content encoding, default utf-8, use 'auto' to
enable charset auto detection ('chardet' package should be installed)
:param use_ordered_dict: Use collections.OrderedDict as dict container
default False, which mean use built-in dict
"""
@ -127,6 +142,7 @@ class TorrentFileParser(object):
def _seek_back(self, count):
self._content.seek(-count, 1)
self._pos = self._pos - count
def _restart(self):
self._content.seek(0, 0)
@ -168,20 +184,26 @@ class TorrentFileParser(object):
def _next_int(self, end=END_INDICATOR):
value = 0
char = self._read_byte(1)
neg = False
while char != end:
# noinspection PyTypeChecker
if not b'0' <= char <= b'9':
if not neg and char == b'-':
neg = True
elif not b'0' <= char <= b'9':
raise InvalidTorrentDataException(self._pos - 1)
value = value * 10 + int(char) - int(b'0')
else:
value = value * 10 + int(char) - int(b'0')
char = self._read_byte(1)
return value
return -value if neg else value
def _next_string(self, decode=True):
length = self._next_int(b':')
raw = self._read_byte(length)
if decode:
encoding = self._encoding
if encoding == 'auto':
encoding = detect(raw)
try:
string = raw.decode(self._encoding)
string = raw.decode(encoding)
except UnicodeDecodeError as e:
raise InvalidTorrentDataException(
self._pos - length + e.start,
@ -289,5 +311,6 @@ def __main():
print(data)
if __name__ == '__main__':
__main()