Finish enhancement by issue #3
- Fixed: Support negative int - Fixed: seek_back not move pos - Added: Auto encodingdev
parent
817d816d7f
commit
c2ce49250b
|
@ -0,0 +1,68 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
||||||
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- `encoding` option can be `auto`, which will use `chardet` package to decide which encoding to use. If `chardet` is noe installed, will raise a warning and fallback to 'utf-8'. (Thanks to [@ltfychrise])
|
||||||
|
- Add changelog.
|
||||||
|
|
||||||
|
### Change
|
||||||
|
|
||||||
|
- Reorganize test codes/files.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Fix integer filed can't be negative bug. (Thanks to [@ltfychrise])
|
||||||
|
- Fix `_seek_back` method not make `_pos` back bug. (Thanks to [@ltfychrise])
|
||||||
|
|
||||||
|
## [0.1.3] - 2017-06-21
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Now `UnicodeDecodeError` is wrapped in `InvalidTorrentDataException`.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Use `IOError` instead of `FileNotFoundError` in Python 2.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- `InvalidTorrentFileException` rename to `InvalidTorrentDataException`.
|
||||||
|
|
||||||
|
## [0.1.2] - 2017-06-21
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Emm, I don't know, I just changed the version code...
|
||||||
|
|
||||||
|
## [0.1.1] - 2017-06-20
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- CLI add coding `--coding/-c` option for file string filed encoding.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- `ed2k` and `filehash` field now use same structure as 'pieces'.
|
||||||
|
|
||||||
|
## [0.1.0] - 2017-05-23
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Parse torrent from file and data into a dict.
|
||||||
|
- CLI provided.
|
||||||
|
- Simple tests.
|
||||||
|
- Available on pip.
|
||||||
|
|
||||||
|
[@ltfychrise]: https://github.com/ltfychrise
|
||||||
|
[Unreleased]: https://github.com/7sDream/torrent_parser/compare/v0.1.3...HEAD
|
||||||
|
[0.1.3]: https://github.com/7sDream/torrent_parser/compare/v0.1.2...v0.1.3
|
||||||
|
[0.1.2]: https://github.com/7sDream/torrent_parser/compare/v0.1.1...v0.1.2
|
||||||
|
[0.1.1]: https://github.com/7sDream/torrent_parser/compare/v0.1.0...v0.1.1
|
||||||
|
[0.1.0]: https://github.com/7sDream/torrent_parser/tree/v0.1.0
|
|
@ -37,9 +37,13 @@ http://tracker.trackerfix.com:80/announce
|
||||||
## Test
|
## Test
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m unittest test
|
python -m unittest tests
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Changelog
|
||||||
|
|
||||||
|
See [Changelog][CHANGELOG].
|
||||||
|
|
||||||
## LICENSE
|
## LICENSE
|
||||||
|
|
||||||
See [License][LICENSE].
|
See [License][LICENSE].
|
||||||
|
@ -48,3 +52,4 @@ See [License][LICENSE].
|
||||||
[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png
|
[screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png
|
||||||
[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png
|
[screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png
|
||||||
[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE
|
[LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE
|
||||||
|
[CHANGELOG]: https://github.com/7sDream/torrent_parser/blob/master/CHANGELOG.md
|
|
@ -0,0 +1 @@
|
||||||
|
from .test_all import *
|
|
@ -7,31 +7,36 @@ from torrent_parser import TorrentFileParser, parse_torrent_file
|
||||||
|
|
||||||
|
|
||||||
class Test(unittest.TestCase):
|
class Test(unittest.TestCase):
|
||||||
TEST_FILENAME = 'test.torrent'
|
REAL_FILE = 'tests/testfiles/real.torrent'
|
||||||
|
NEG_FILE = 'tests/testfiles/neg.torrent'
|
||||||
|
|
||||||
def test_parse_torrent_file_use_shortcut(self):
|
def test_parse_torrent_file_use_shortcut(self):
|
||||||
parse_torrent_file(self.TEST_FILENAME)
|
parse_torrent_file(self.REAL_FILE)
|
||||||
|
|
||||||
def test_parse_torrent_file_use_class(self):
|
def test_parse_torrent_file_use_class(self):
|
||||||
with open(self.TEST_FILENAME, 'rb') as fp:
|
with open(self.REAL_FILE, 'rb') as fp:
|
||||||
TorrentFileParser(fp).parse()
|
TorrentFileParser(fp).parse()
|
||||||
|
|
||||||
|
def test_encoding_auto(self):
|
||||||
|
with open(self.REAL_FILE, 'rb') as fp:
|
||||||
|
TorrentFileParser(fp, encoding='auto').parse()
|
||||||
|
|
||||||
def test_parse_torrent_file_to_ordered_dict(self):
|
def test_parse_torrent_file_to_ordered_dict(self):
|
||||||
data = parse_torrent_file(self.TEST_FILENAME, True)
|
data = parse_torrent_file(self.REAL_FILE, True)
|
||||||
self.assertIsInstance(data, collections.OrderedDict)
|
self.assertIsInstance(data, collections.OrderedDict)
|
||||||
|
|
||||||
with open(self.TEST_FILENAME, 'rb') as fp:
|
with open(self.REAL_FILE, 'rb') as fp:
|
||||||
data = TorrentFileParser(fp, True).parse()
|
data = TorrentFileParser(fp, True).parse()
|
||||||
self.assertIsInstance(data, collections.OrderedDict)
|
self.assertIsInstance(data, collections.OrderedDict)
|
||||||
|
|
||||||
def test_parse_correctness(self):
|
def test_parse_correctness(self):
|
||||||
data = parse_torrent_file(self.TEST_FILENAME)
|
data = parse_torrent_file(self.REAL_FILE)
|
||||||
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
||||||
data['announce-list'])
|
data['announce-list'])
|
||||||
self.assertEqual(data['creation date'], 1409254242)
|
self.assertEqual(data['creation date'], 1409254242)
|
||||||
|
|
||||||
def test_parse_two_times(self):
|
def test_parse_two_times(self):
|
||||||
with open(self.TEST_FILENAME, 'rb') as fp:
|
with open(self.REAL_FILE, 'rb') as fp:
|
||||||
parser = TorrentFileParser(fp)
|
parser = TorrentFileParser(fp)
|
||||||
data = parser.parse()
|
data = parser.parse()
|
||||||
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
||||||
|
@ -41,3 +46,11 @@ class Test(unittest.TestCase):
|
||||||
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
self.assertIn(['udp://tracker.publicbt.com:80/announce'],
|
||||||
data['announce-list'])
|
data['announce-list'])
|
||||||
self.assertEqual(data['creation date'], 1409254242)
|
self.assertEqual(data['creation date'], 1409254242)
|
||||||
|
|
||||||
|
def test_int_is_negative(self):
|
||||||
|
data = parse_torrent_file(self.NEG_FILE)
|
||||||
|
self.assertEqual(data['neg'], -1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -0,0 +1 @@
|
||||||
|
d3:negi-1ee
|
|
@ -21,6 +21,7 @@ import collections
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
try:
|
try:
|
||||||
FileNotFoundError
|
FileNotFoundError
|
||||||
|
@ -29,6 +30,14 @@ except NameError:
|
||||||
# noinspection PyShadowingBuiltins
|
# noinspection PyShadowingBuiltins
|
||||||
FileNotFoundError = IOError
|
FileNotFoundError = IOError
|
||||||
|
|
||||||
|
try:
|
||||||
|
# noinspection PyPackageRequirements
|
||||||
|
from chardet import detect as _detect
|
||||||
|
except ImportError:
|
||||||
|
def _detect(_):
|
||||||
|
warnings.warn("No chardet module installed, encoding will be utf-8")
|
||||||
|
return {'encoding': 'utf-8', 'confidence': 1}
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'InvalidTorrentDataException',
|
'InvalidTorrentDataException',
|
||||||
'parse_torrent_file',
|
'parse_torrent_file',
|
||||||
|
@ -38,6 +47,10 @@ __all__ = [
|
||||||
__version__ = '0.1.3'
|
__version__ = '0.1.3'
|
||||||
|
|
||||||
|
|
||||||
|
def detect(content):
|
||||||
|
return _detect(content)['encoding']
|
||||||
|
|
||||||
|
|
||||||
class InvalidTorrentDataException(Exception):
|
class InvalidTorrentDataException(Exception):
|
||||||
def __init__(self, pos, msg=None):
|
def __init__(self, pos, msg=None):
|
||||||
msg = msg or "Invalid torrent format when read at pos {pos}"
|
msg = msg or "Invalid torrent format when read at pos {pos}"
|
||||||
|
@ -48,6 +61,7 @@ class InvalidTorrentDataException(Exception):
|
||||||
class __EndCls(object):
|
class __EndCls(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
_END = __EndCls()
|
_END = __EndCls()
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,7 +91,8 @@ class TorrentFileParser(object):
|
||||||
"""
|
"""
|
||||||
:param fp: a **binary** file-like object to parse,
|
:param fp: a **binary** file-like object to parse,
|
||||||
which means need 'b' mode when use built-in open function
|
which means need 'b' mode when use built-in open function
|
||||||
:param encoding: file content encoding, default utf-8
|
:param encoding: file content encoding, default utf-8, use 'auto' to
|
||||||
|
enable charset auto detection ('chardet' package should be installed)
|
||||||
:param use_ordered_dict: Use collections.OrderedDict as dict container
|
:param use_ordered_dict: Use collections.OrderedDict as dict container
|
||||||
default False, which mean use built-in dict
|
default False, which mean use built-in dict
|
||||||
"""
|
"""
|
||||||
|
@ -127,6 +142,7 @@ class TorrentFileParser(object):
|
||||||
|
|
||||||
def _seek_back(self, count):
|
def _seek_back(self, count):
|
||||||
self._content.seek(-count, 1)
|
self._content.seek(-count, 1)
|
||||||
|
self._pos = self._pos - count
|
||||||
|
|
||||||
def _restart(self):
|
def _restart(self):
|
||||||
self._content.seek(0, 0)
|
self._content.seek(0, 0)
|
||||||
|
@ -168,20 +184,26 @@ class TorrentFileParser(object):
|
||||||
def _next_int(self, end=END_INDICATOR):
|
def _next_int(self, end=END_INDICATOR):
|
||||||
value = 0
|
value = 0
|
||||||
char = self._read_byte(1)
|
char = self._read_byte(1)
|
||||||
|
neg = False
|
||||||
while char != end:
|
while char != end:
|
||||||
# noinspection PyTypeChecker
|
if not neg and char == b'-':
|
||||||
if not b'0' <= char <= b'9':
|
neg = True
|
||||||
|
elif not b'0' <= char <= b'9':
|
||||||
raise InvalidTorrentDataException(self._pos - 1)
|
raise InvalidTorrentDataException(self._pos - 1)
|
||||||
value = value * 10 + int(char) - int(b'0')
|
else:
|
||||||
|
value = value * 10 + int(char) - int(b'0')
|
||||||
char = self._read_byte(1)
|
char = self._read_byte(1)
|
||||||
return value
|
return -value if neg else value
|
||||||
|
|
||||||
def _next_string(self, decode=True):
|
def _next_string(self, decode=True):
|
||||||
length = self._next_int(b':')
|
length = self._next_int(b':')
|
||||||
raw = self._read_byte(length)
|
raw = self._read_byte(length)
|
||||||
if decode:
|
if decode:
|
||||||
|
encoding = self._encoding
|
||||||
|
if encoding == 'auto':
|
||||||
|
encoding = detect(raw)
|
||||||
try:
|
try:
|
||||||
string = raw.decode(self._encoding)
|
string = raw.decode(encoding)
|
||||||
except UnicodeDecodeError as e:
|
except UnicodeDecodeError as e:
|
||||||
raise InvalidTorrentDataException(
|
raise InvalidTorrentDataException(
|
||||||
self._pos - length + e.start,
|
self._pos - length + e.start,
|
||||||
|
@ -289,5 +311,6 @@ def __main():
|
||||||
|
|
||||||
print(data)
|
print(data)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
__main()
|
__main()
|
||||||
|
|
Loading…
Reference in New Issue