Finish enhancement by issue #3
- Fixed: Support negative int - Fixed: seek_back not move pos - Added: Auto encodingdev
parent
817d816d7f
commit
c2ce49250b
|
@ -0,0 +1,68 @@
|
|||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
||||
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- `encoding` option can be `auto`, which will use `chardet` package to decide which encoding to use. If `chardet` is noe installed, will raise a warning and fallback to 'utf-8'. (Thanks to [@ltfychrise])
|
||||
- Add changelog.
|
||||
|
||||
### Change
|
||||
|
||||
- Reorganize test codes/files.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix integer filed can't be negative bug. (Thanks to [@ltfychrise])
|
||||
- Fix `_seek_back` method not make `_pos` back bug. (Thanks to [@ltfychrise])
|
||||
|
||||
## [0.1.3] - 2017-06-21
|
||||
|
||||
### Added
|
||||
|
||||
- Now `UnicodeDecodeError` is wrapped in `InvalidTorrentDataException`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Use `IOError` instead of `FileNotFoundError` in Python 2.
|
||||
|
||||
### Changed
|
||||
|
||||
- `InvalidTorrentFileException` rename to `InvalidTorrentDataException`.
|
||||
|
||||
## [0.1.2] - 2017-06-21
|
||||
|
||||
### Changed
|
||||
|
||||
- Emm, I don't know, I just changed the version code...
|
||||
|
||||
## [0.1.1] - 2017-06-20
|
||||
|
||||
### Added
|
||||
|
||||
- CLI add coding `--coding/-c` option for file string filed encoding.
|
||||
|
||||
### Changed
|
||||
|
||||
- `ed2k` and `filehash` field now use same structure as 'pieces'.
|
||||
|
||||
## [0.1.0] - 2017-05-23
|
||||
|
||||
### Added
|
||||
|
||||
- Parse torrent from file and data into a dict.
|
||||
- CLI provided.
|
||||
- Simple tests.
|
||||
- Available on pip.
|
||||
|
||||
[@ltfychrise]: https://github.com/ltfychrise
|
||||
[Unreleased]: https://github.com/7sDream/torrent_parser/compare/v0.1.3...HEAD
|
||||
[0.1.3]: https://github.com/7sDream/torrent_parser/compare/v0.1.2...v0.1.3
|
||||
[0.1.2]: https://github.com/7sDream/torrent_parser/compare/v0.1.1...v0.1.2
|
||||
[0.1.1]: https://github.com/7sDream/torrent_parser/compare/v0.1.0...v0.1.1
|
||||
[0.1.0]: https://github.com/7sDream/torrent_parser/tree/v0.1.0
|
|
@ -37,9 +37,13 @@ http://tracker.trackerfix.com:80/announce
|
|||
## Test
|
||||
|
||||
```bash
|
||||
python -m unittest test
|
||||
python -m unittest tests
|
||||
```
|
||||
|
||||
## Changelog
|
||||
|
||||
See [Changelog][CHANGELOG].
|
||||
|
||||
## LICENSE
|
||||
|
||||
See [License][LICENSE].
|
||||
|
@ -48,3 +52,4 @@ See [License][LICENSE].
|
|||
[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png
|
||||
[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png
|
||||
[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE
|
||||
[CHANGELOG]: https://github.com/7sDream/torrent_parser/blob/master/CHANGELOG.md
|
|
@ -0,0 +1 @@
|
|||
from .test_all import *
|
|
@ -7,31 +7,36 @@ from torrent_parser import TorrentFileParser, parse_torrent_file
|
|||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
TEST_FILENAME = 'test.torrent'
|
||||
REAL_FILE = 'tests/testfiles/real.torrent'
|
||||
NEG_FILE = 'tests/testfiles/neg.torrent'
|
||||
|
||||
def test_parse_torrent_file_use_shortcut(self):
|
||||
parse_torrent_file(self.TEST_FILENAME)
|
||||
parse_torrent_file(self.REAL_FILE)
|
||||
|
||||
def test_parse_torrent_file_use_class(self):
|
||||
with open(self.TEST_FILENAME, 'rb') as fp:
|
||||
with open(self.REAL_FILE, 'rb') as fp:
|
||||
TorrentFileParser(fp).parse()
|
||||
|
||||
def test_encoding_auto(self):
|
||||
with open(self.REAL_FILE, 'rb') as fp:
|
||||
TorrentFileParser(fp, encoding='auto').parse()
|
||||
|
||||
def test_parse_torrent_file_to_ordered_dict(self):
|
||||
data = parse_torrent_file(self.TEST_FILENAME, True)
|
||||
data = parse_torrent_file(self.REAL_FILE, True)
|
||||
self.assertIsInstance(data, collections.OrderedDict)
|
||||
|
||||
with open(self.TEST_FILENAME, 'rb') as fp:
|
||||
with open(self.REAL_FILE, 'rb') as fp:
|
||||
data = TorrentFileParser(fp, True).parse()
|
||||
self.assertIsInstance(data, collections.OrderedDict)
|
||||
|
||||
def test_parse_correctness(self):
|
||||
data = parse_torrent_file(self.TEST_FILENAME)
|
||||
data = parse_torrent_file(self.REAL_FILE)
|
||||
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
||||
data['announce-list'])
|
||||
self.assertEqual(data['creation date'], 1409254242)
|
||||
|
||||
def test_parse_two_times(self):
|
||||
with open(self.TEST_FILENAME, 'rb') as fp:
|
||||
with open(self.REAL_FILE, 'rb') as fp:
|
||||
parser = TorrentFileParser(fp)
|
||||
data = parser.parse()
|
||||
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
||||
|
@ -41,3 +46,11 @@ class Test(unittest.TestCase):
|
|||
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
||||
data['announce-list'])
|
||||
self.assertEqual(data['creation date'], 1409254242)
|
||||
|
||||
def test_int_is_negative(self):
|
||||
data = parse_torrent_file(self.NEG_FILE)
|
||||
self.assertEqual(data['neg'], -1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -0,0 +1 @@
|
|||
d3:negi-1ee
|
|
@ -21,6 +21,7 @@ import collections
|
|||
import io
|
||||
import json
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
try:
|
||||
FileNotFoundError
|
||||
|
@ -29,6 +30,14 @@ except NameError:
|
|||
# noinspection PyShadowingBuiltins
|
||||
FileNotFoundError = IOError
|
||||
|
||||
try:
|
||||
# noinspection PyPackageRequirements
|
||||
from chardet import detect as _detect
|
||||
except ImportError:
|
||||
def _detect(_):
|
||||
warnings.warn("No chardet module installed, encoding will be utf-8")
|
||||
return {'encoding': 'utf-8', 'confidence': 1}
|
||||
|
||||
__all__ = [
|
||||
'InvalidTorrentDataException',
|
||||
'parse_torrent_file',
|
||||
|
@ -38,6 +47,10 @@ __all__ = [
|
|||
__version__ = '0.1.3'
|
||||
|
||||
|
||||
def detect(content):
|
||||
return _detect(content)['encoding']
|
||||
|
||||
|
||||
class InvalidTorrentDataException(Exception):
|
||||
def __init__(self, pos, msg=None):
|
||||
msg = msg or "Invalid torrent format when read at pos {pos}"
|
||||
|
@ -48,6 +61,7 @@ class InvalidTorrentDataException(Exception):
|
|||
class __EndCls(object):
|
||||
pass
|
||||
|
||||
|
||||
_END = __EndCls()
|
||||
|
||||
|
||||
|
@ -77,7 +91,8 @@ class TorrentFileParser(object):
|
|||
"""
|
||||
:param fp: a **binary** file-like object to parse,
|
||||
which means need 'b' mode when use built-in open function
|
||||
:param encoding: file content encoding, default utf-8
|
||||
:param encoding: file content encoding, default utf-8, use 'auto' to
|
||||
enable charset auto detection ('chardet' package should be installed)
|
||||
:param use_ordered_dict: Use collections.OrderedDict as dict container
|
||||
default False, which mean use built-in dict
|
||||
"""
|
||||
|
@ -127,6 +142,7 @@ class TorrentFileParser(object):
|
|||
|
||||
def _seek_back(self, count):
|
||||
self._content.seek(-count, 1)
|
||||
self._pos = self._pos - count
|
||||
|
||||
def _restart(self):
|
||||
self._content.seek(0, 0)
|
||||
|
@ -168,20 +184,26 @@ class TorrentFileParser(object):
|
|||
def _next_int(self, end=END_INDICATOR):
|
||||
value = 0
|
||||
char = self._read_byte(1)
|
||||
neg = False
|
||||
while char != end:
|
||||
# noinspection PyTypeChecker
|
||||
if not b'0' <= char <= b'9':
|
||||
if not neg and char == b'-':
|
||||
neg = True
|
||||
elif not b'0' <= char <= b'9':
|
||||
raise InvalidTorrentDataException(self._pos - 1)
|
||||
value = value * 10 + int(char) - int(b'0')
|
||||
else:
|
||||
value = value * 10 + int(char) - int(b'0')
|
||||
char = self._read_byte(1)
|
||||
return value
|
||||
return -value if neg else value
|
||||
|
||||
def _next_string(self, decode=True):
|
||||
length = self._next_int(b':')
|
||||
raw = self._read_byte(length)
|
||||
if decode:
|
||||
encoding = self._encoding
|
||||
if encoding == 'auto':
|
||||
encoding = detect(raw)
|
||||
try:
|
||||
string = raw.decode(self._encoding)
|
||||
string = raw.decode(encoding)
|
||||
except UnicodeDecodeError as e:
|
||||
raise InvalidTorrentDataException(
|
||||
self._pos - length + e.start,
|
||||
|
@ -289,5 +311,6 @@ def __main():
|
|||
|
||||
print(data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
__main()
|
||||
|
|
Loading…
Reference in New Issue