From 4eeceb975b604bf2a0c2b4e2e2d4a7c25d673bb6 Mon Sep 17 00:00:00 2001 From: 7sDream <7seconddream@gmail.com> Date: Sun, 29 Apr 2018 00:20:08 +0800 Subject: [PATCH 1/5] Removed: unused test files in MANIFEST.in --- MANIFEST.in | 2 -- 1 file changed, 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index a51d5d3..e2e577a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1 @@ include README.md LICENSE CHANGELOG.md -include test.py -include test.torrent From c1212c0203302847107496748f77ea8131d66c07 Mon Sep 17 00:00:00 2001 From: Yasuo <40465782+yasuotakei@users.noreply.github.com> Date: Thu, 21 Jun 2018 13:56:46 +0000 Subject: [PATCH 2/5] Fix Issue #4 --- torrent_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torrent_parser.py b/torrent_parser.py index 4ab2c98..97777f5 100644 --- a/torrent_parser.py +++ b/torrent_parser.py @@ -233,7 +233,7 @@ class TorrentFileParser(object): if encoding == 'auto': encoding = detect(raw) try: - string = raw.decode(encoding) + string = raw.decode(encoding, "ignore") except UnicodeDecodeError as e: raise InvalidTorrentDataException( self._pos - length + e.start, From ee3128b32bc9542568c356ce91ecea6d7978eb57 Mon Sep 17 00:00:00 2001 From: 7sDream <7seconddream@gmail.com> Date: Fri, 22 Jun 2018 22:42:15 +0800 Subject: [PATCH 3/5] Many changes, see bellow: - Added - BEncoder - BDecoder - encode - decode - errors argument for TorrentFileParser and parse_torrent_file (#4) - Changed - TorrentFileCreator rename to BEncoder - TorrentFileParser, BEncoder don't need outmost level is dict - Updated - README - CHANGELOG - LICENSE and tests for above changes. --- CHANGELOG.md | 15 +- LICENSE | 2 +- README.md | 24 ++- tests/__init__.py | 3 + tests/test_create.py | 10 +- tests/test_decode.py | 11 ++ tests/test_encode.py | 11 ++ tests/test_encoding_error.py | 25 +++ tests/test_files/outmost.string.torrent | 1 + tests/test_files/utf8.encoding.error.torrent | Bin 0 -> 16457 bytes tests/test_parse.py | 5 + torrent_parser.py | 153 +++++++++++++------ 12 files changed, 206 insertions(+), 54 deletions(-) create mode 100644 tests/test_decode.py create mode 100644 tests/test_encode.py create mode 100644 tests/test_encoding_error.py create mode 100644 tests/test_files/outmost.string.torrent create mode 100644 tests/test_files/utf8.encoding.error.torrent mode change 100644 => 100755 torrent_parser.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e5f4ae..340c53b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Add `errors` option in `TorrentFileParser` and `parse_torrent_file` to let user set the encoding error handler. (Thanks [@yasuotakei](https://github.com/yasuotakei)) +- Add `-e`/`--error` to CLI option to set the `errors` option of `parse_torrent_file`. +- `BDecoder` class and `decode` shortcut function to directly decode bytes. +- `decode` shortcut function to directly encode data to bytes. + +### Changed + +- **BreakChange** `TorrentFileCreator` rename to `BEncoder` as the origin name don't describe its function. +- `TorrentFileParser` don't need the outmost level of parsed data to be a `dict` now +- `BEncoder` don't need the outmost level of encoded data to be a `dict` now + ## [0.2.0] - 2018.5.25 ### Change @@ -17,7 +30,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added -- `TorrentFileCreator` class and `create_torrent_file` shortcut function for write back data to a torrent file +- `TorrentFileCreator` class and `create_torrent_file` shortcut function for write back data to a torrent file. ## [0.1.4] - 2018-04-06 diff --git a/LICENSE b/LICENSE index a4edad4..70ebd9a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2017 7sDream +Copyright (c) 2017 - 2018 7sDream Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index a71a935..7fb6dc8 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,15 @@ A simple parser for `.torrent` file. Can also edit and write back to torrent format after version 0.2.0. +## Features + +- Decoder and encoder for torrent files +- Auto decode bytes field to string with used specified encoding and error handler +- Auto detect encoding when use `auto` as encoding(need `chardet` installed) +- Auto decode hash value filed to hash blocks +- Uniform exception type +- CLI provided, with JSON output + ## Install ``` @@ -34,12 +43,23 @@ $ cat test.torrent | pytp ```pycon >>> import torrent_parser as tp >>> data = tp.parse_torrent_file('test.torrent') ->>> print(data['announce']) +>>> data['announce'] http://tracker.trackerfix.com:80/announce >>> data['announce'] = 'http://127.0.0.1:12345' >>> tp.create_torrent_file('new.torrent', data) ``` +or you don't operate with file, just raw bytes: + +```pycon +>>> import torrent_parser as tp +>>> data = tp.decode(b'd3:negi-1ee') +>>> data['neg'] +-1 +>>> tp.encode(data) +b'd3:negi-1ee' +``` + ## Test ```bash @@ -58,4 +78,4 @@ See [License][LICENSE]. [screenshots-normal]: http://rikka-10066868.image.myqcloud.com/1492616d-9f14-4fe2-9146-9a3ac06c6868.png [screenshots-indent]: http://rikka-10066868.image.myqcloud.com/eadc4184-6deb-42eb-bfd4-239da8f50c08.png [LICENSE]: https://github.com/7sDream/torrent_parser/blob/master/LICENSE -[CHANGELOG]: https://github.com/7sDream/torrent_parser/blob/master/CHANGELOG.md \ No newline at end of file +[CHANGELOG]: https://github.com/7sDream/torrent_parser/blob/master/CHANGELOG.md diff --git a/tests/__init__.py b/tests/__init__.py index fb128c4..10492bb 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,2 +1,5 @@ from .test_create import * from .test_parse import * +from .test_encoding_error import * +from .test_encode import * +from .test_decode import * diff --git a/tests/test_create.py b/tests/test_create.py index 8826896..d051505 100644 --- a/tests/test_create.py +++ b/tests/test_create.py @@ -6,7 +6,7 @@ import io import os.path import unittest -from torrent_parser import TorrentFileParser, TorrentFileCreator +from torrent_parser import TorrentFileParser, BEncoder class TestCreate(unittest.TestCase): @@ -17,15 +17,19 @@ class TestCreate(unittest.TestCase): data = collections.OrderedDict() data['a'] = 1 data['b'] = 2 - self.assertEqual(TorrentFileCreator(data).encode(), b'd1:ai1e1:bi2ee') + self.assertEqual(BEncoder(data).encode(), b'd1:ai1e1:bi2ee') def test_same_output_if_no_edit(self): with open(self.REAL_FILE, 'rb') as fp: in_data = fp.read() data = TorrentFileParser(io.BytesIO(in_data), True).parse() - out_data = TorrentFileCreator(data).encode() + out_data = BEncoder(data).encode() m1 = hashlib.md5() m1.update(in_data) m2 = hashlib.md5() m2.update(out_data) self.assertEqual(m1.digest(), m2.digest()) + + def test_dont_need_dict_outmost(self): + data = 123456 + self.assertEqual(BEncoder(data).encode(), b'i123456e') diff --git a/tests/test_decode.py b/tests/test_decode.py new file mode 100644 index 0000000..4257277 --- /dev/null +++ b/tests/test_decode.py @@ -0,0 +1,11 @@ +from __future__ import unicode_literals + +import unittest + +from torrent_parser import decode + + +class TestDecode(unittest.TestCase): + + def test_decode(self): + self.assertEqual(decode(b'i12345e'), 12345) diff --git a/tests/test_encode.py b/tests/test_encode.py new file mode 100644 index 0000000..7914bac --- /dev/null +++ b/tests/test_encode.py @@ -0,0 +1,11 @@ +from __future__ import unicode_literals + +import unittest + +from torrent_parser import encode + + +class TestEncode(unittest.TestCase): + + def test_encode(self): + self.assertEqual(encode(12345), b'i12345e') diff --git a/tests/test_encoding_error.py b/tests/test_encoding_error.py new file mode 100644 index 0000000..a95a950 --- /dev/null +++ b/tests/test_encoding_error.py @@ -0,0 +1,25 @@ +from __future__ import unicode_literals + +import os.path +import unittest + +from torrent_parser import ( + TorrentFileParser, parse_torrent_file, InvalidTorrentDataException +) + + +class TestDecodingError(unittest.TestCase): + TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files') + FILE = os.path.join(TEST_FILES_DIR, 'utf8.encoding.error.torrent') + + def test_default_option_will_raise_exception(self): + with self.assertRaises(InvalidTorrentDataException): + parse_torrent_file(self.FILE) + with self.assertRaises(InvalidTorrentDataException): + with open(self.FILE, 'rb') as f: + TorrentFileParser(f).parse() + + def test_not_raise_exception_when_use_ignore(self): + parse_torrent_file(self.FILE, errors='ignore') + with open(self.FILE, 'rb') as f: + TorrentFileParser(f, errors='ignore').parse() diff --git a/tests/test_files/outmost.string.torrent b/tests/test_files/outmost.string.torrent new file mode 100644 index 0000000..d15055f --- /dev/null +++ b/tests/test_files/outmost.string.torrent @@ -0,0 +1 @@ +8:announce \ No newline at end of file diff --git a/tests/test_files/utf8.encoding.error.torrent b/tests/test_files/utf8.encoding.error.torrent new file mode 100644 index 0000000000000000000000000000000000000000..3f159d3b73c78daf0edcbac83cd4c3a23cb1b3f2 GIT binary patch literal 16457 zcma*OWl-GN_BD*V1b2tv?(XhxK^kbFgS)#!a1ZVpEVyfM3nXZ8cX!Ck%$>}CCim7; z@29TnUT2@R_gZ_OUv>8}ik6Z_AYzeKRH__g&P zw)tJE-;ML9#euIyb@q7m+wbE4&w?_7Y@LCQwm@elTkGE!u>aBxWc%twTf6_%)DC3! zhqKr@Sb6@znB!OR|C7;cQ`no@IodM*lm73{`6ckbEN%IxqyG_^?H3E6+3N)1_#Zqu zey_tH2KaMh$3MI6j~4kyz}E@(Z)14^9a+o(9)C2$Z%_`_-=MAlTadHIYi7V#b^ohg zziQxQ2{5yBbFy?`^7!Aw>30UdL~sK*I)dyRotRz)`JKyu%^emyCXmzb9R5pW6YJLj z?_vx3Lk%W(56}Nv@Oz3*f6UZBN7f%Ce$(u~Cnd<%gVELA=})u&2L79|DZuGf4l|J5 zpLzX;`Y-iNJ?vkHgT0Hbl@pVr%YQNWo$vp%Mc95-8sPMY$6tN>TMzxs@!w&75e&6xG&fb69^&d|CzfcCaul}{QwgZ>}%}C81?QBT@@&u_V;B|770-QVBECxx9EC*L0jxRbRR7oRoI*238m#LdRZ%FDvV$pz%#vj;d^TC;QWsoOgO z+&qlrUhN4m(g1i^+c^S^SlAd9olHTdAmHmf2DpNN!2b)v#`E`bHWofjdr~o7Qf)hH zSD+)v7Rbcp?C$(8G%NeB%~-klv_U{q;CoUjkhP7S+wV|TUY=i2HjZBv@E`?|D#=Qc zYUnD+D>Jz`zMfhCM}_tOg0r#mIoW`KPM)?vQctt@{pqG4z<=PcF>L`hKrS9WeSpjB zv&UauWp8aj`bW~F|H+wDorP7Bg^iSkjfItkmQ-IwLPN`dRA0vxB=P#9@aGc_sfvOD zD;uBvA4!n@RZ>J)qr z$I{$not{cz6d<;5x-eBoBOKw(uO-%#l-E*7-ypWeBu_aGc#}@@5nN$RZ%oZZrqU-t z=rZ%?LJx5vi>I6^ni_4$>Wv0W<;!e(+9G)Hanl+0=VZ20@OI)WagN0#-H#BU&qZYw zvUT!|Gc!8GpJZ&PZ8zAbnhFR?5~a;{)R7Vfxe%pgc`l+yXMb|Q<&H}zRcrQlxEVMj z^htF`Kt)m7WgHKZ)ZmCz=6<)|=5epd+$Zw<&O@V{biJF*8b?C-qgmVz>s0BynA2JP zeKPo#RC>*>z%IaH!f{ijZH?7=JgmEd`Q(2_8=iH2Do2?`Ur+zcjSD5o@y>ajM5O zdy8BCmjV1rogj#P53(aV-I7Zu$fy?cLCUVJQz+z@b01%ILdAzb3;K`{tRDfzqiCe= zS>g=x&+74MDQwf|1DqY<)J#{)ksGJ-3|jf8N`920@+p={U_Ht7fm7~g?=9rF7rZ|# zBZ0Zpz({_bh@QKmnO4jnyqlF2u(;dcFF9D1Ln(kpVkdD23CDdyv%Q+wWy&cH5N5=U z-)+$`Ikc^-z)EiF2rgt-m5GR3NK8KAKLL!*X;4$ z+6oQbefw@;HKY=`tpve5f)r5{J^8$*c526-_Y9R=l;-_%UwPZ?(ePm?zo;l4Db1h_V=y^plqVGpKmvSO-U zuBjo(6#^?GQ_f1Y58!$q-OzH-nP=(xfAZ(%2!72lv+3SMQ0Hbn_nYWk zko5W2JzRnqBU2A5st{YElP`1m#P;Ys_YHl|uu(d9;c_-QGvarx{7mSThNQ}bF?=3M z(sf=xdE|UyZ{J*aW~H-#1o5?cihaGJO@6rID%jMGn>tG}VSat5nq46d`W&i@jV|0C zW-Pi4`u;+P$&9S76ECjSz77_mUXU-s{bM4W&g}Sxa{YQPaXku9{)v`iGEfR-SlPaw zXV@y5uLOa!|hub5GNv$UjO8K0I_{ffn=Gbt;d z#IA#`qdHQR-+iabR9Oo`1n&gc(=_TG)1>%;Y%Yry`4X=&4yUK?a=9sFU!jRxxo~rq z29b>y83l(j$K8iq;&0BToqNj|tyKGcc~r|(m+em9 zWd{nTHP}MFA&MaMix_>Yin5kcZ$_o4=DOFNGc_#G4D;C?iOh^E?O2^N30oXPxVvvr zQiCd8-gwzhzFh3|bRWC3x^OjGggIWmzQMJjnA|_2(AfIOAm5IK9W7~W)ALcV{v0m) zqna|`xzzlj9p$?Cp78B8l;jg(FY>(N#0-(s{DFKoOsUVzoSV^1V-&%t9*QxycB(Zx z{_r=norI-OQbnEBUUV}|_S8T%BPduMF zv+222r;9l9UWA2;&nhxd>F9bT9@kn6_eE(^TW$*cP$EOP&hgNb&A=1Bz{eD}nF%jJ zaAmX>Zq)$;wUWoK7soeb8nf?B<}YN$dpBB_*Rs+#u>H$X4Ho)f*8@|V2xFg@OGBvJ zC&9Zm;nnz+SQ7nx9aYyk4vU#fwtGu;>|G*5HnVdHouxce&#dsuGkRn+<(S75-$EO< z)$BM?wS+)$?10xSTJF|Jk+pcr8q`!M&x>P4)w0yv_VMo5EPy_q6Jz=t{eXpTS29GS zX;iAdARCyW@&}uQ*f##A`lobi}zy(r2p8$^N1?yMsX7jE&4DR@*$tTm$h zzNFM5N=((BWxN3tL2S@>xCo>^#-y+GZE(~PU7nDH&%!pYPDjl^wC5;%d+2R6g7DS< z%Au`G?+93}+Nt30nPHi?xT#K<=Vz+d)W%VpC0{*Hhe7uqiaQeO8ny^2nsdeI62Ha01Us1h$I)*YV*pjkH5Kb$`Ds*2>$W{#m#Ag zmfI7h(FO{$TU_zh^u9i58b$*c{?+Rx-ntInw$7>;&jr17-j|hmG>5%~W0mkp^=&q1 z{txn%)){$?nHHoxBHZtH(@xa~EOUMi@|}mH&0I}Yb%aa1p{?Uz_YvI0#y-!nx24K# zl>Zi#_Jk27#z39^b=UlNS7SAO*)rYheV;>DGcL23{vnEU$DbO z!-R)wgl9`MMmDF1`2o{0dgFw{1rm`ys>%M&PS=!-?U%FQ>7FDjwc^g(`cP@YfAmvs+tS(M(8nK>C&|9+1HdyYkw0++Cvp!L z-B_7@9%+Nq5`0g|lM2J1yj=)CeE2DK)cX^GO!kWEgNv2>qiS5lB511}61VO$-7+SR z=)NTj5KN!D-E=Bn!XRelFMnGk*A4!@G!;$rj5uR``C{q%+&W{az2E!(!5gK&kr(M2 zxmw}xnEZ#S!3UeDw`!=iiui za1P!%e2^<>Gs3`VNwTmb)x!+2Yg0L6YZ%xpcaXG{Az0$!wcybrjQA#6t#}EYA31H0 z;?bO{54m8+^N5vYXc$UgCK*Uk#OW;m#uc}AFR)Goz6 zD)xE|oB$WjmLy<6WR56(wwtl~7zK5QW-OOWRdC(3F+Eg?Ffl*N`R!A3J;U>U5=^^bNI$`hv>VX!sz(si+ogHYwD@%^E?ENlP*{ z@^?U^z}*k)6VV3RtMWHCV7yl5kf%hH61RfmX#PwS}4VX{I{t?j;9 zGf$H;dtOsYaH-~Qy)ZkdKoy6SJ)77H3_jQKGzm>1QV!bPI;W^HuP;M$H&m&|3sr5% zTM$rr>y%*WMY)@;_46nEaDU$vY1pZqzi=x#&}IzcE!CN#s+0;Pn_}NU^?f-@pw$o< z1j_WKCzGn=d{%I(eb6@c7@?s;IDbFogBJW-qGJJUA~x;FcD@hx>W|{jJt%RJ$KnUr zI&MF#$}=mHbP&gB)5;KijF!*Z;TImP3!Y@m!sc&0td4M~-MhZUNBl%9V0=rWWISUW zhvtj%qQV1*CzZ=eY9-C~2sJ>dUHzm!Vt9$nkCl~?1J2ikr&ivx1q1mdPf~c&ZVr#- z%o=Z3GlBbH?_EAVpj0!eDVDk3PHzOivqh6oM`$a>5=@gMDz+?kMcMgGgbYE$u#{6p zOJc!uAm@qHcium!<^w^N&FqH&Q#Kt(&PtwE*`M7545x=XL_37c!)H(1q3qr{!Ub|) z8;`@m<1onIhNcKaB-?$5-nC3zH-M<^6&tIUcQmVfl)JWXoXn2uM);Zi<_G|@gA9W{ zNIB~V-c;K_6_vlkD8{MxF1U>)gJpUtQ-W&+suH=UxLvWv4|fJ%$;B zLi9e3#9;o}ndpKSmBL*|QjCz&1{wtAP2}>F;oEGe1{!w44WpER0LIV64s!J{xlCo4 zl9x>ocqa3hVr+vgqGixme^VeTm5b80LZpYDYX`C1&SL;sq6eJOno$WSZC)pjf5{aA z`NaDg&2Ig5L4A`W-DYQFYf7*2Z)$wP77;BcBk`sBbmm<~%lBS3ca3yO+5tUlAA$-r ziqTtreuAr*fOg8z`;qGGf;qI^LRcp@O8Dj=J%yGs=;!ev#LFx+u~9nE-drXJNH3y0 zJY|~|$TiZ5LJBfH#gMbKsx8LdQ0%lV9KPQPU>6-PDMGXKnKCN@&k%Da$FDz=(MuF@ z$=ozj!)C`HoMKNyT3HGcTN=_S+HhZ$lA*0ryV0vXo8_716ol$S9Cz#U7U^S&bhEVD z!pQN#`^;hDzv*aXeV*{umEV(EMU0^N!l|O-K1dX4Ky~uA!9hbL0;NreMS6GJLnG3J zBEn_%xYf#>HpI0jmc(<9)m)*1+^RN`DFYw1Bm9nIYjm@>*Lt-hg%Li}?55erU)PPx z1n`|nV$WA3jn4;LVdEk9#XV%P&g?pe_r*uhFaOZ{9bf(4lyy76&wLuE=o?|iO+FL| zN;96N^>*K@ERK!^hN`x_^6jvKrC# zv`{sZnE=y;A_l#q#VKNvADJP{tHtcC8lU?7= zc*PH`)t82lw8ONddi77B}hv7pvQP58!qD<~!s0TH?patC9xY@=ahSDt&d1MI0w3TdO)zBX zT3h-;O{+K$0ktP_dL4U1W0px;FmnwRv<;h<*e%X5nM-2yG`$VoWP(vw3LY^6zz}jF z{$SwYGMa&;u*zFE4g!hKBqygVf!c; z?c_7FHqWbkF_<($57FuJEieQrl^SK=Ssu---Zt> z6hmFvt5i1}^BRzs1tQ6J4x?q(f+AC^0}>!;&j1)#!NNJRH@#-0IQ4{7?63>nB4^8= zo69`|z7PmovJsmakyr(hH|+Y

l{kcymN!&N}nCaA^swo!D!SUsNk3i?BQ@vWHe6ia3_6 zN%Q;r3e~tNB;z%N!VkTv3HGF!0wl=Yty3Uc^S8XT`dIzE&EJ>D=nr9%C0Z`qmN8>O z^<|U!5FzbsC*5p*5GPyG=8eJ;PHPbEB;f9~0oz<%&V-qS<8IxU-B8kb9@ew%EyQ># zn~9hg#^|gw*eL{_J{8SUbtt?v=@;jXpHN2UKxbJrnR$+RbvBFb`=K!tBX~wRxNs%W zH8{?=YdT72%L#EQ>oOVCN`%0eMq!>s%n=ROo+^FV6Ov!NAo5&r6ipd_MorV=>jYD$ zC!!%5BIovpq1BX|-C#(|U3Hq-(ihKI=q23rp^L&N>f5FPX2@-3c{&RfU266t!x8Jj z5&b+Uad0p7>Ig}K(>x&`2z4oALzrDJ*lTsvcHO2ny+pZBQP|M8I-Patf`i)9T~>)VAy5o8m*vR! zt9j!&gNs1^Aa%G`T`uYNf;KZ)ynXvna)hV1JXeE?fTdn>rBerj%D@Lm9){}R)`D+% zSS>#a$EgbPFq%TN-JP;VK&exJBRwMI`3rWZ0A2*U*e=Lt9d-K*wzJ+Y)-LbIPS@Zj zh1yewxM(##gZp+aH+lXa;s(UpWfsW;Ht$dF@;5S6eMI#k`Vm~^UeeZ_W;$8*;luCV z1WFU$V#YJdW?)EF+=ik0!d?^n$Z+C521f;JYdU+3wn$EQr)9mOOmfc6-goAghCT2w z;&F&VqGMYNdZ`Ea>A$zRAKOxFj)~L_qLw}r1J;hp9DD3w;pk^Hzem0)<3feK*Kyxg zJ#SBvX9Mq)Q4%oJQ41Tj9Cf45msj@t+cM$e=Jw+nNnWmvMCT45&!=t+*`rYi9EZ>>< z>wqw4A#Zid4&{(2Y(Zb0VZoFm6j&UbYxK7G!W=b-okQ0kT(Astz4h(fp?5&JR*tja&l&YOP z>3WbL3@mqY(=k=m4oXy6|A-^d^(NXfM4*5FlWs_Dlc6#)QQMtpxg6Nh zjq{;vFbeN?tZqqzJvr7_GF>q*&CczH29U_=IhPz7BX2onD^k#M=#Oqzt8NZyvjexF zt!*I~<5u4;hTrh|%5jl`^{2%=FAa5#AzX!q-=zwv@(7QAS9@{3=D(xLSzl{ic4;vy zCuu@+_D4qVrPL3DV$EwCZZN9-sJ%>GH1Osr2)P0+{OAhvOXwuxcq~69QgUFT)*JpN*bBa;6Slg7F2Xij z`L!HW=wtV%PPJn+=z&LXxlpHbVe_8PTE?2SFyBN6wghyZ+E?;yWjs9sw*oeU~A2rc5> z_wG&V?e;2mZm2$WFC^Vg=@><=&4W>BQ4$+NX=A?0%WGJl8mAY zRCxn%eJR+_Xz++zKnbq|iF4@}AMh29;;9;CzF7DtcJ(E=HkubSv3ZWm!-T|+jm!6X z*5y_qZg5C%xt2tsh-8h6ja?^HIyN@Ad=^$T!6NWe@DQoCr zJ#4b-qeznJF;LT8H@hHNdW9oPRaaV)-pd>TpTr%w00T+(W>vDy5KqC;D6N~7iI#2k z(iK#NmD$$i(aPoFnZ0$gulFcoU65B4azBAF?MVE970`1OerPaC{fwgySvodi$KlS% ztAo$??L`l5NIHTVDY@inyT%mE)x}&biSNNbeJE>jc9|+;D)pOMo?NL4w6WG&)eRG` zu~fkj9+Ud0(v`QM`0UCz0Gq~FzKI8V2P1_ps8oM4qAFMGSx7i_pYH>=Un|!d*ETL3{!jC zK~jPD24nda@70|{`FB`j7(Cz}U$SQ)k4A4aQL1Nwe$G$=uA(IQTFU zeqp6?yGYzg^$I%hB3hbLv?OF|?VoZdH3iRLjOS<>#cjulQcq_6o}T_rtpr_gD_O|v z8J6!7PwAp0?!&^wJ2LH_?kPlvJL4PJ@A9ko^veyFAB~)D!nHNAVT%Izcvo#7j8&B0Px zBm!XdH{hiZ=|3HttbX9B*m{*mDvA3c89JJrM7usEGvC+q%ttz9k(3L>!|hmG&q{~g zpoJGAz;<$vWiVOxe>@61cD_JbnSo@z%|U_@Lep%(@NiooQGx5v8fKZ|!I`zj^r6z3 zY?LVR)2|&`T?A**rn~w%-mr(O`w?O2+$2O<`IAzi(OY4nuM<$d&6uHL9#~e6ACgn0 zyVUxA1TEB?%g#`+!LS4}O*r)t+fUC-cE%6n~n%URjfq^*p2ik zG<&41g}UlLG&XN0>o6}HyNBPUp&w?}L8o_45+FO_tQLACy2i`R7^ocT}> z7VRo$zg8Nric#oAO_9_QJWBfL!wbi3Y)zv_6csKi3ksA7 z^2Rn)!`$nV_>k%;2Afiei#7Y@dujEfcX|!F6;_uP6Q;YZNKs5j=-k=Qps~JM_QCw= zGThIIoL>{!HV$BV6W1BUT_F+AQ4hM03pBfmjjL1N-02zR94<_ofQY=@qD@5Ypfj=* zr*g+rbL`p6usH4q%=)XjzFd;WVl-GeXdtX89cIl@aa1~+DTLg8Z=EY*rc}tC>Z01^ zFt$EtN)a#ZcS&Q72RO!We;eeBD_vhdI?o{@Kxjr1FoC7#i@S#{mLaWAxghTgxg>sIE&dt(5Rf0 zsT1OlK?AQlZ%CrUiU`(2k8tEblxZlAe80i8fOi!Qu%@zEm+WSQa!7a7eAaAFJQ67U zTh?W+OgGb1^Wb~*9;JNpp@UqD?yskjT()fY8b-SOW_|RH z3Kk)mNxsnKgJXL=HuZY^5_ z+Rpo>351fvT;I8oPirRW90IKDI!JDC(m_q#!cvss#1_j@2zJJmW_y>o^o*X^p%!l( zICW7_tw|&U7~te^1$4}$n#8K(YN39R#yyzjmSofO|6sq>qq|ZEeVX5Pa}cS&Y_~Ow zG)haq6C~+JQ~!FpSp~mJd!rS;E0AE^*?>^e+|>a$jHjAat8>WHsYW-vw}+R4gy9wm zaVT=%`4NlbX+8s9Co7XzO4;Rep%Ky^2Gi~ygY+7n@nPFSk3*;dYd;g8^k==1d_SaG z$#dA1ZC2Quv#rU~ce8Q#Wt*66eSTJ&-yoE*#Cxf2DTXo75Z2B84)r)DOHt@tv?C3IL3aFJkZAC){I; zfb#3yZiPB}<3)pCNYB{|S2{g@5JJ@(l-8J#EkUVYaM~Cg6F>k!Gl<0;II2j2BNL%2 z2Qv&<*o*eMR|2OQqBfpOcP7NOn<4RpLS;g>zz)|${tc?zigSSliK&~7=xm9(d2bwe#epu&L)@f)NR4GuwCOQ zA8>qHoE=+TmX>;9-feuf9wC%ZDZm-i)FOe!56)_0C$jcAuk_-Kp)ntqo)*ru935)f z?{j}xh@~2KS)g0YG3ExFoz;ZgLZwz_Uc#DEFHq{VXVxeunrPs8TwA!Zs2xc>O>HDb zUuXdqlp??HK4Chm?L%=^Hr5vXPNq_7V@FF7zL&*JenQEQ5})rzeHytrwB(By{!Put z^eNYnOB@S*cWq~`d4D|@!BB4ql&nNXtIuw2Py6j#9YOR}aQ)iuwA-`&R(z7UH*z9sor!88 zV$K|U+&)s4=X{JyBCKiuDEBcVS(ENd&B1oHiF({vX}PHSk5Tb_>C%09b$>##&r-5o zUl6b`_v`IXaSyW9B}|7~K`4P!ARM3b7V|_#p~xZy;vtPL8VJ>~HR$v}Ej${UDxUq8 z4=BoUK1Sw?(UWQ1-%D$kPeL44CRhjB-*9xrFs9t4Z`O~RH8vp{P{c5cI&+VH<(E93 zS~xF2luii6sT_MQBosa%{lql$qt8eo25(}d#~j&+3q_|7zTJxRVmY1M9DsaWts2RV zC|vpQ#5Hay!o75&a_YMYcIb_5{|nZ|!)+Y?V)3 z>^vx(HM;>=2=^(^0_V(;lmt^>Vn15HY`}U=HXQm+b!Qx_2d=of=%Q8Aa)-MSfvLP< z9+{G-G8GD`!cD}P>R2qrOX#t(^7#2o3|X>zY(QSgAiiU~oHGo}*-OWcl`GeR`J1>PAfe(G0I`t47T&q8$)j20Ejycf(-q{H^<`zXU?4`t{U%evb?VtyO(x$rVidan7oXlX&b!=fh{e14Qn zEHijDRuhmya;CWLBJ=|fa$HD2CDG-)W)R*}7H9N8Fho_OK8}&vzGRKd-GcZ1Z5rfP zW~wpo7V)BhF&*Qe)bq#$2jinQ-0bXY1io7ch~#W_w(8lpG2fx{gnB#{M^4c$&@2kT zxbwWG4vA@U43EN{n{-Pg$riDP(Bt13pRcC7+-UR653YAAMsn6p7<{y--~YHD>1&&A2fXHoS1iPxD)%eHNh?tlPK z=0ZP#N-PZH!*NLvpOl=pJcCVGcb;dwLL@DC!#?Jm2W^ji)~DR6c)R&672z+XHT7EV z4o=yf+r9|>Kz;!6j~7{u>z5l&4pzkvJT6Kg_Mg|dZaM75FHRnMRqG;I~u+jJOzpftILofQx4QX+3nBS_7tf6 z&X-hrzRaS-&D02@;5)`94RW3$WGu&hTmO;g7klNdA~>uo zf_V+|wVP;NdWq7%W3R3vus|6%0)w)Y?W5@yEjFG^%U$g{HKm`Lp`j7N*qoD&LBlC0 z$5spbc3_>{0A{zoN)T^DEO+yHf0imzP+zyX3Sp~)x~axz{3iMARCok>bMM*}-NW<= z9#<3zb@YA1Pn4cQs4XPGccdO&2((I%q$Z4+4o*XA=c=wZ2g~WM0!Dh&ZaS8sHOaA^ zmF&ceQp6oyyW>P=?=f8p%|K8=m#y+|3_f3*9PE30qAi0cI$+HwD2FSa1anC?u)F&v z6KZ0?vgIz+;k^?>8kyf2)DkqiCB_V;O|g7)RR4slL2j(#Z*LM8s=<$|w5FUhohmVE zyr&o?>TXcc6J}?U!4C{eJCKqmvcChUS^^VQTDByRttD5hNY0caPXb)~^oWa~&y>wG zls@fxNbH?#`@p)p9E9t`O^z_UJmlZ+|6t*cEKaGbZYB$X-3&EXd}jFAdhAih%WmVn zPz0gC4X+ix_O%+v)jMTM;`u zKfU*-nF>hYhorhkx3Y(JI3as5izvIZX8eE(-*TsstScZr5H&ug0X1wu8cUV{C zCV44okGTxu*mwEzqNLxUjFc2A;_grcER1j5p$rhATOa*I^{d`f72uG6!_JVNT@R=3 zTlAFU2Pc<+3*gvcO=5oGD$Oep-%kJVNdpbWG@(iO7EdIN`XiN!z*np&b~$oW-#2fU zN=y!Sj{RMHVF)^)ow>osx{NeRy~y^wIWx#98TZqJpEUXdtZb1SNvb1stET4k6F~_b zMHeoMwC0mkHie!2iDXf;WM3S%=k=U@7ZOw1Ni)Je3s4?7X?pm$CuZ8d{mSG(pQD_{ z6W6VhA&m8Xx)?KnbaD=Q3cNqI(;bNJp|celV&=wv^^8m+aE>81pL?a6CqS!4)iaQXI*m~!b|Oit1T zK{3IMtS(_q-5YVIdEqfs=gvIxjfaDavK-KrZ^|`pvFD=Jrvu17=9OX@YhKbBx)1ju zGJ7&74K1_XXhXX_069;6H{?)~d?pG~J2_%XRJ14-3IMJ;$#?Kt2z_AmE@29e z5_;5A%9C;X6LbB(T0e_Lu%*^QravK`$2XRIO0~(fj}(Cu`m{jpAmfIYPJ%5@cl=aX zf~r$Is860cS&94Ripq?*kISRJ1ZRF{`fMlui!n84jCxNY(c!XBi)9H5`~w$9c887LtVdzXS;LKFV_e6Ms9+pl-Ws`DG9@k2LgBN#6FMzlkbSUaLl!w6RuH3R&Dm{l zNd)T`HBc_l_IA&wyAfhKy4JX%dcgwmIm-;EOJ``OXih2WS?T5%UPUks6}j{7e$tup z74984nTw}(B&?HIfqTl*m^DnWgk%DP0PUPACBE;kqsIor?e&k*KA!2b5J}*oV=ta_ z_|hG|n?XoQ)gzN-dSQtSmf@GqYM(s4rOjFqP1IsNv|ZD@7b#VrnqBextVJ_IrgjmgggFob*T z!BCuzmH41fpq$1DsmN#77}mu?>6SIHV`q@C4&C55d$o9HX4sSt3lzkT?>7?qhpu^n zSWfx9{?Bc7RMf=B2xE1*`3?(2e0|PZIV=ce=XjPX$5Ny=w@IZs&mM`Hgq80)xzrP_(=J33lH1UvtElSnQ#bpdU8f`gNJTF5Q-X6 zESJZj-R$G=soKn^&iNt}9Xz~fwIUFX!XFyY0tN=wtbfJxc6O;yFx^Q6rcjJxVccch95K6u^*B+XH0Ill z)-URHY?H8bAu&_?i;y_~bHW0+>z`liR_Tw~yhQ|X%EAuv< zz>nopD7l@HsklJj5!UR*1#@7S?c>#^H^KwO;P|#B2QPGRYWRo6yUBL(&96hp;YNcg zbzD825H-Ginm+_6wz1Q6&^y0=_fDK%4tiV&^mU-{0sj#zJ9cGw zG)%}Ec&0SvQGnSB&F0%^(8@KoZzXj#;^VX;Za>`7)FfE6k*I~zkOBT>0@SL33iRY! z3sw~HI?v;lsP-Hr^&CL-F{}+Z>I}K)9&%&kT@#Mx~CFaJcKI? z3SGA?pkb(0$`$nzr0MbPkgoOt*zCGd2d+ImRP@F)STLc!Q*rR`s){_(@i%cYrK6<~ z($0kf!0I2c1Ij9OO2E4UWHJYdTNO;CASg^#+p2UTY6Ivm`bnZ|1 zpAEkmZ7+S$72NUcdA2gLL_wVgz5`}f85a)FIwcHVQizB1HowQnfCm$0sL{N0q*+@* z-sIqppkH$*bdP}4ueVtk{}`j=GpC_r?yz*;VLjH2(W17G*`?fAaZ|oXE_`|I{#ZWw z2|`Zeovjeg3ABz)It&O_Nkiafc0^zBa`EJCFWBd=y5{#44Vd_xg{)wz(=I7}PU-Fz2j(5!8Wsj1AO+)LQf z3PSqk0t5v7pL=nM<)?^y=eahtGXY-a?wfox6qSM{tH9E#%(kUoF4(bl^Vz2WX?7;5 z=~#ves~mZV!j_C$> zP^d@1AUG$nJ|#1&$=6^MQ0G=-&04AD1&n;=v9wfmyAF4UpGnNa+=&36jT+sdAPm!b16QlOay2N#V7dxB|(}bV(!9JPMR^FVJ zPO5W4l{#)%RSsLT*MjQ<2=xpKywxRZx}8^Qnl`n}MW`5F4uoy`=S_LB&he)g1%6Ti!UL zaOuNdHR<@;OIG4hw>E;!u}W|HSs{DWMF&&abnYAtCw{6>FVluAaBd2I?cRY=cXyBo zz%Q{*O`dHKsz@}!6&oXm)M{oUDZ=zUm3pJja@=ZLSoEmC69g?K>U*-GeOF<0(gW4} zfU&NXQgkybo2MJgqu>+{vI4xDK?xhM84JgX@Sv4E)9&=MEt2+m(^7$E!Ag%nA1*CJ z+pivSO+h49z3n4***SG0N6tBXh@9ElA%eq`l}yj@g!_=fgj5#|7uKYHA`B-^*bUu$J@S&5+~gQ zb1CE*(Lpai-=G-V&UNrfT38B97?(qQ`~W30Sy2r9)8G+58a4>9dd#lID{{(#`uKwN>?=YbPvuqN6#B)7I&Xd}(hTOM`o#5Q`l-h9%l92dwKb(9eSf>k?y@s->C;V`b+viRlk~q){R<+Sp>|r zMXRZYsBQBAGT&27hkdbguwW~6+~n*s{RXgIayLl#iG4-3A;`8pPjP{Fj`0!|G=gTG z?GNMoMB&129~^>qHH}U%7Dwzp^+Csna(N_tNS4ltp754x3L2=Nm1zXp-PA2-#du5LBOMenyN3{$azl7fgs?&mm6Vl(eVoXbTY&A;a7+&z`ChuqO?@xC2Xw1_564E(ehQKOXV2dCawSL$-`_2V#?;p2fjGAIHdiCj6hiQvzrDVl??c?q!Zo^1$ojcd*U ztn1TEY-pzSbDA^sq(8m|Jh&#N46~cGJ6t}yG47KU7)c3_o8N}Y4|u!KmX==~6$E@C z4Tbn*a^ z`_}f>01w7L?xwMF^09KTzTReI;bh}t;^gAx<^A&tpOGcN$@2A*9oI8VLo!+h5AxOf Qx{KTfk6I^%QXugE14ckl82|tP literal 0 HcmV?d00001 diff --git a/tests/test_parse.py b/tests/test_parse.py index c660b39..80962b5 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -11,6 +11,7 @@ class TestParse(unittest.TestCase): TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files') REAL_FILE = os.path.join(TEST_FILES_DIR, 'real.torrent') NEG_FILE = os.path.join(TEST_FILES_DIR, 'neg.torrent') + STRING_FILE = os.path.join(TEST_FILES_DIR, 'outmost.string.torrent') def test_parse_torrent_file_use_shortcut(self): parse_torrent_file(self.REAL_FILE) @@ -53,6 +54,10 @@ class TestParse(unittest.TestCase): data = parse_torrent_file(self.NEG_FILE) self.assertEqual(data['neg'], -1) + def test_dont_need_dict_outmost(self): + data = parse_torrent_file(self.STRING_FILE) + self.assertEqual(data, 'announce') + if __name__ == '__main__': unittest.main() diff --git a/torrent_parser.py b/torrent_parser.py old mode 100644 new mode 100755 index 97777f5..b3909da --- a/torrent_parser.py +++ b/torrent_parser.py @@ -62,11 +62,14 @@ except NameError: str_type = str __all__ = [ + 'InvalidTorrentDataException', + 'BEncoder', + 'BDecoder', + 'encode', + 'decode', + 'TorrentFileParser', 'create_torrent_file', 'parse_torrent_file', - 'InvalidTorrentDataException', - 'TorrentFileCreator', - 'TorrentFileParser', ] __version__ = '0.2.0' @@ -105,7 +108,7 @@ class TorrentFileParser(object): STRING_INDICATOR = b'' STRING_DELIMITER = b':' - RAW_FIELD_PARAMS = { + HASH_FIELD_PARAMS = { # field length need_list 'pieces': (20, True), 'ed2k': (16, False), @@ -120,14 +123,18 @@ class TorrentFileParser(object): (TYPE_STRING, STRING_INDICATOR), ] - def __init__(self, fp, use_ordered_dict=False, encoding='utf-8'): + def __init__( + self, fp, use_ordered_dict=False, encoding='utf-8', errors='strict' + ): """ :param fp: a **binary** file-like object to parse, which means need 'b' mode when use built-in open function - :param encoding: file content encoding, default utf-8, use 'auto' to - enable charset auto detection ('chardet' package should be installed) - :param use_ordered_dict: Use collections.OrderedDict as dict container - default False, which mean use built-in dict + :param bool use_ordered_dict: Use collections.OrderedDict as dict + container default False, which mean use built-in dict + :param string encoding: file content encoding, default utf-8, use 'auto' + to enable charset auto detection (need 'chardet' package installed) + :param string errors: how to deal with encoding error when try to parse + string from content with ``encoding`` """ if getattr(fp, 'read', ) is None \ or getattr(fp, 'seek') is None: @@ -137,12 +144,14 @@ class TorrentFileParser(object): self._encoding = encoding self._content = fp self._use_ordered_dict = use_ordered_dict + self._error_handler = errors def parse(self): """ :return: the parse result - :type: depends on ``use_ordered_dict`` option when init the parser - see :any:`TorrentFileParser.__init__` + :rtype: dict|list|int|string + :raise: :any:`InvalidTorrentDataException` when parse failed or error + happened when decode string using specified encoding """ self._restart() data = self._next_element() @@ -155,10 +164,7 @@ class TorrentFileParser(object): except EOFError: # expect EOF pass - if isinstance(data, dict): - return data - - raise InvalidTorrentDataException('Outermost element is not a dict') + return data def _read_byte(self, count=1, raise_eof=False): assert count >= 0 @@ -186,9 +192,8 @@ class TorrentFileParser(object): k = self._next_element() if k is _END: return - if k in self.RAW_FIELD_PARAMS: - length, need_list = self.RAW_FIELD_PARAMS[k] - v = self._next_hash(length, need_list) + if k in self.HASH_FIELD_PARAMS: + v = self._next_hash(*self.HASH_FIELD_PARAMS[k]) else: v = self._next_element() if k == 'encoding': @@ -225,25 +230,26 @@ class TorrentFileParser(object): char = self._read_byte(1) return -value if neg else value - def _next_string(self, decode=True): + def _next_string(self, need_decode=True): length = self._next_int(self.STRING_DELIMITER) raw = self._read_byte(length) - if decode: + if need_decode: encoding = self._encoding if encoding == 'auto': - encoding = detect(raw) + self.encoding = encoding = detect(raw) try: - string = raw.decode(encoding, "ignore") + string = raw.decode(encoding, self._error_handler) except UnicodeDecodeError as e: raise InvalidTorrentDataException( self._pos - length + e.start, - "Fail to decode string at pos {pos} using " + e.encoding + "Fail to decode string at pos {pos} using encoding " + + e.encoding ) return string return raw def _next_hash(self, p_len, need_list): - raw = self._next_string(decode=False) + raw = self._next_string(need_decode=False) if len(raw) % p_len != 0: raise InvalidTorrentDataException( self._pos - len(raw), "Hash bit length not match at pos {pos}" @@ -280,7 +286,7 @@ class TorrentFileParser(object): return element -class TorrentFileCreator(object): +class BEncoder(object): TYPES = { (dict,): TorrentFileParser.TYPE_DICT, @@ -291,27 +297,25 @@ class TorrentFileCreator(object): def __init__(self, data, encoding='utf-8'): """ - :param data: torrent data, must be a dict or OrderedDict - :param encoding: string field output encoding + :param dict|list|int|string data: data will be encoded + :param string encoding: string field output encoding """ - if not isinstance(data, dict): - raise InvalidTorrentDataException( - None, - "Top level structure should be a dict" - ) self._data = data self._encoding = encoding def encode(self): """ - Encode data to bytes that conform to torrent file format + Encode to bytes + + :rtype: bytes """ return b''.join(self._output_element(self._data)) - def encode_to_readable(self): + def encode_to_filelike(self): """ - Encode data to a file-like(BytesIO) object which contains the result of - `TorrentFileCreator.encode()` + Encode to a file-like(BytesIO) object + + :rtype: BytesIO """ return io.BytesIO(self.encode()) @@ -364,7 +368,7 @@ class TorrentFileCreator(object): ) for x in self._output_element(k): yield x - if k in TorrentFileParser.RAW_FIELD_PARAMS: + if k in TorrentFileParser.HASH_FIELD_PARAMS: for x in self._output_decode_hash(v): yield x else: @@ -393,29 +397,79 @@ class TorrentFileCreator(object): ) -def parse_torrent_file(filename, use_ordered_dict=False): +class BDecoder(object): + def __init__( + self, data, use_ordered_dict=False, encoding='utf-8', errors='strict' + ): + """ + :param bytes data: raw data to be decoded + :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` + :param string encoding: see :any:`TorrentFileParser.__init__` + :param string errors: see :any:`TorrentFileParser.__init__` + """ + self._data = bytes(data) + self._use_ordered_dict = use_ordered_dict + self._encoding = encoding + self._errors = errors + + def decode(self): + return TorrentFileParser( + io.BytesIO(self._data), self._use_ordered_dict, self._encoding, + self._errors, + ).parse() + + +def encode(data, encoding='utf-8'): + """ + Shortcut function for encode python object to torrent file format(bencode) + + :param dict|list|int|string data: data to be encoded + :param string encoding: see :any:`TorrentFileParser.__init__` + :rtype: bytes + """ + return BEncoder(data, encoding).encode() + + +def decode(data, use_ordered_dict=False, encoding='utf-8', errors='strict'): + """ + Shortcut function for decode bytes as torrent file format(bencode) to python + object + + :param bytes data: raw data to be decoded + :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` + :param string encoding: see :any:`TorrentFileParser.__init__` + :param string errors: see :any:`TorrentFileParser.__init__` + :rtype: dict|list|int|string + """ + return BDecoder(data, use_ordered_dict, encoding, errors).decode() + + +def parse_torrent_file( + filename, use_ordered_dict=False, encoding='utf-8', errors='strict', +): """ Shortcut function for parse torrent object using TorrentFileParser :param string filename: torrent filename :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` - :rtype: dict if ``use_ordered_dict`` is false, - collections.OrderedDict otherwise + :param string encoding: see :any:`TorrentFileParser.__init__` + :param string errors: see :any:`TorrentFileParser.__init__` + :rtype: dict|list|int|string """ with open(filename, 'rb') as f: - return TorrentFileParser(f, use_ordered_dict).parse() + return TorrentFileParser(f, use_ordered_dict, encoding, errors).parse() def create_torrent_file(filename, data, encoding='utf-8'): """ Shortcut function for create a torrent file using TorrentFileCreator - :param filename: output torrent filename - :param data: torrent data, must be a dict or OrderedDict - :param encoding: string field output encoding + :param string filename: output torrent filename + :param dict|list|int|string data: torrent data + :param string encoding: string field output encoding """ with open(filename, 'wb') as f: - f.write(TorrentFileCreator(data, encoding).encode()) + f.write(BEncoder(data, encoding).encode()) def __main(): @@ -432,7 +486,10 @@ def __main(): help='ensure output json use ascii char, ' 'escape other char use \\u') parser.add_argument('--coding', '-c', default='utf-8', - help='string encoding, default utf-8') + help='string encoding, default "utf-8"') + parser.add_argument('--errors', '-e', default='strict', + help='decoding error handler, default "strict", you can' + ' use "ignore" or "replace" to avoid exception') parser.add_argument('--version', '-v', action='store_true', default=False, help='print version and exit') args = parser.parse_args() @@ -453,7 +510,9 @@ def __main(): exit(1) # noinspection PyUnboundLocalVariable - data = TorrentFileParser(target_file, not args.dict, args.coding).parse() + data = TorrentFileParser( + target_file, not args.dict, args.coding, args.errors + ).parse() data = json.dumps( data, ensure_ascii=args.ascii, From 177d1c7de9cc9275084ea43658933ab58df6f414 Mon Sep 17 00:00:00 2001 From: 7sDream <7seconddream@gmail.com> Date: Sat, 23 Jun 2018 12:31:39 +0800 Subject: [PATCH 4/5] hash_fields(finish #4) and hash raw: - hash fields method and parameter allow user customize hash field list - hash raw parameter allow the output of hash field to be raw bytes - BEncoder now support encode raw bytes --- CHANGELOG.md | 7 +- README.md | 3 +- tests/__init__.py | 2 + tests/test_hash_field.py | 21 ++++ tests/test_hash_raw.py | 23 ++++ torrent_parser.py | 225 ++++++++++++++++++++++++++++++--------- 6 files changed, 224 insertions(+), 57 deletions(-) create mode 100644 tests/test_hash_field.py create mode 100644 tests/test_hash_raw.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 340c53b..26ed202 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Add `-e`/`--error` to CLI option to set the `errors` option of `parse_torrent_file`. - `BDecoder` class and `decode` shortcut function to directly decode bytes. - `decode` shortcut function to directly encode data to bytes. +- Added `hash_fields` parameter and method to customize hash field list. +- Added `hash_raw` parameter to let all hash field be parsed as raw bytes. ### Changed - **BreakChange** `TorrentFileCreator` rename to `BEncoder` as the origin name don't describe its function. -- `TorrentFileParser` don't need the outmost level of parsed data to be a `dict` now -- `BEncoder` don't need the outmost level of encoded data to be a `dict` now +- `TorrentFileParser` don't need the outmost level of parsed data to be a `dict` now. +- `BEncoder` don't need the outmost level of encoded data to be a `dict` now. +- `BEncoder` now support encode raw bytes. ## [0.2.0] - 2018.5.25 diff --git a/README.md b/README.md index 7fb6dc8..8a751d6 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,7 @@ Can also edit and write back to torrent format after version 0.2.0. - Decoder and encoder for torrent files - Auto decode bytes field to string with used specified encoding and error handler - Auto detect encoding when use `auto` as encoding(need `chardet` installed) -- Auto decode hash value filed to hash blocks -- Uniform exception type +- Auto decode hash value filed to hash blocks, also customizable - CLI provided, with JSON output ## Install diff --git a/tests/__init__.py b/tests/__init__.py index 10492bb..4e7bb8c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -3,3 +3,5 @@ from .test_parse import * from .test_encoding_error import * from .test_encode import * from .test_decode import * +from .test_hash_field import * +from .test_hash_raw import * \ No newline at end of file diff --git a/tests/test_hash_field.py b/tests/test_hash_field.py new file mode 100644 index 0000000..a7c1e17 --- /dev/null +++ b/tests/test_hash_field.py @@ -0,0 +1,21 @@ +from __future__ import unicode_literals + +import os.path +import unittest + +from torrent_parser import ( + TorrentFileParser, parse_torrent_file, decode +) + + +class TestHashField(unittest.TestCase): + TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files') + FILE = os.path.join(TEST_FILES_DIR, 'utf8.encoding.error.torrent') + + def test_not_raise_exception_when_add_hash_fields(self): + parse_torrent_file(self.FILE, hash_fields={'info_hash': (20, False)}) + with open(self.FILE, 'rb') as f: + TorrentFileParser(f).hash_field('info_hash').parse() + with open(self.FILE, 'rb') as f: + data = f.read() + decode(data, hash_fields={'info_hash': (20, False)}) diff --git a/tests/test_hash_raw.py b/tests/test_hash_raw.py new file mode 100644 index 0000000..f6b19b9 --- /dev/null +++ b/tests/test_hash_raw.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +import os.path +import unittest + +from torrent_parser import decode, encode + + +class TestHashRaw(unittest.TestCase): + TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files') + FILE = os.path.join(TEST_FILES_DIR, 'utf8.encoding.error.torrent') + + def test_hash_raw_decode(self): + data = b'd4:hash4:\xAA\xBB\xCC\xDDe' + res = decode(data, hash_fields={'hash': (4, False)}, hash_raw=False) + self.assertEqual(res['hash'], 'aabbccdd') + res = decode(data, hash_fields={'hash': (4, False)}, hash_raw=True) + self.assertEqual(res['hash'], b'\xAA\xBB\xCC\xDD') + + def test_raw_bytes_encode(self): + res = {'hash': b'\xAA\xBB\xCC\xDD'} + data = encode(res) + self.assertEqual(data, b'd4:hash4:\xAA\xBB\xCC\xDDe') diff --git a/torrent_parser.py b/torrent_parser.py index b3909da..942266d 100755 --- a/torrent_parser.py +++ b/torrent_parser.py @@ -26,6 +26,11 @@ Usage: with open('new.torrent', 'wb') as f: f.write(TorrentFileCreator(data).encode()) + # or you don't deal with file, just object in memory + + data = decode(b'i12345e') # data = 12345 + content = encode(data) # content = b'i12345e' + """ from __future__ import print_function, unicode_literals @@ -93,6 +98,12 @@ class __EndCls(object): _END = __EndCls() +def _check_hash_field_params(name, value): + return isinstance(name, str_type) \ + and isinstance(value, tuple) and len(value) == 2 \ + and isinstance(value[0], int) and isinstance(value[1], bool) + + class TorrentFileParser(object): TYPE_LIST = 'list' @@ -124,32 +135,66 @@ class TorrentFileParser(object): ] def __init__( - self, fp, use_ordered_dict=False, encoding='utf-8', errors='strict' + self, fp, use_ordered_dict=False, encoding='utf-8', errors='strict', + hash_fields=None, hash_raw=False, ): """ :param fp: a **binary** file-like object to parse, which means need 'b' mode when use built-in open function :param bool use_ordered_dict: Use collections.OrderedDict as dict container default False, which mean use built-in dict - :param string encoding: file content encoding, default utf-8, use 'auto' + :param str encoding: file content encoding, default utf-8, use 'auto' to enable charset auto detection (need 'chardet' package installed) - :param string errors: how to deal with encoding error when try to parse + :param str errors: how to deal with encoding error when try to parse string from content with ``encoding`` + :param Dict[str, Tuple[int, bool]] hash_fields: extra fields should + be treated as hash value. dict key is the field name, value is a + two-element tuple of (hash_block_length, as_a_list). + See :any:`hash_field` for detail """ if getattr(fp, 'read', ) is None \ or getattr(fp, 'seek') is None: - raise ValueError('Argument fp needs a file like object') + raise ValueError('Parameter fp needs a file like object') self._pos = 0 self._encoding = encoding self._content = fp self._use_ordered_dict = use_ordered_dict self._error_handler = errors + self._hash_fields = dict(TorrentFileParser.HASH_FIELD_PARAMS) + if hash_fields is not None: + for k, v in hash_fields.items(): + if _check_hash_field_params(k, v): + self._hash_fields[k] = v + else: + raise ValueError( + "Invalid hash field parameter, it should be type of " + "Dict[str, Tuple[int, bool]]" + ) + self._hash_raw = bool(hash_raw) + + def hash_field(self, name, block_length=20, need_list=False): + """ + Let field with the `name` to be treated as hash value, don't decode it + as a string. + + :param str name: field name + :param int block_length: hash block length for split + :param bool need_list: if True, when the field only has one block( + or even empty) its parse result will be a one-element list( + or empty list); If False, will be a string in 0 or 1 block condition + :return: return self, so you can chained call + """ + v = (block_length, need_list) + if _check_hash_field_params(name, v): + self._hash_fields[name] = v + else: + raise ValueError("Invalid hash field parameter") + return self def parse(self): """ - :return: the parse result - :rtype: dict|list|int|string + :rtype: dict|list|int|str|bytes :raise: :any:`InvalidTorrentDataException` when parse failed or error happened when decode string using specified encoding """ @@ -192,10 +237,14 @@ class TorrentFileParser(object): k = self._next_element() if k is _END: return - if k in self.HASH_FIELD_PARAMS: - v = self._next_hash(*self.HASH_FIELD_PARAMS[k]) + if not isinstance(k, str_type): + raise InvalidTorrentDataException( + self._pos, "Type of dict key can't be " + type(k).__name__ + ) + if k in self._hash_fields: + v = self._next_hash(*self._hash_fields[k]) else: - v = self._next_element() + v = self._next_element(k) if k == 'encoding': self._encoding = v yield k, v @@ -230,7 +279,7 @@ class TorrentFileParser(object): char = self._read_byte(1) return -value if neg else value - def _next_string(self, need_decode=True): + def _next_string(self, need_decode=True, field=None): length = self._next_int(self.STRING_DELIMITER) raw = self._read_byte(length) if need_decode: @@ -240,10 +289,21 @@ class TorrentFileParser(object): try: string = raw.decode(encoding, self._error_handler) except UnicodeDecodeError as e: + msg = [ + "Fail to decode string at pos {pos} using encoding ", + e.encoding + ] + if field: + msg.extend([ + ' when parser field "', field, '"' + ', maybe it is an hash field. ', + 'You can use self.hash_field("', field, '") ', + 'to let it be treated as hash value, ', + 'so this error may disappear' + ]) raise InvalidTorrentDataException( self._pos - length + e.start, - "Fail to decode string at pos {pos} using encoding " + - e.encoding + ''.join(msg) ) return string return raw @@ -254,6 +314,8 @@ class TorrentFileParser(object): raise InvalidTorrentDataException( self._pos - len(raw), "Hash bit length not match at pos {pos}" ) + if self._hash_raw: + return raw res = [ binascii.hexlify(chunk).decode('ascii') for chunk in (raw[x:x+p_len] for x in range(0, len(raw), p_len)) @@ -280,9 +342,12 @@ class TorrentFileParser(object): def _type_to_func(self, t): return getattr(self, '_next_' + t) - def _next_element(self): + def _next_element(self, field=None): element_type = self._next_type() - element = self._type_to_func(element_type)() + if element_type is TorrentFileParser.TYPE_STRING and field is not None: + element = self._type_to_func(element_type)(field=field) + else: + element = self._type_to_func(element_type)() return element @@ -292,16 +357,30 @@ class BEncoder(object): (dict,): TorrentFileParser.TYPE_DICT, (list,): TorrentFileParser.TYPE_LIST, (int,): TorrentFileParser.TYPE_INT, - (str_type,): TorrentFileParser.TYPE_STRING, + (str_type, bytes): TorrentFileParser.TYPE_STRING, } - def __init__(self, data, encoding='utf-8'): + def __init__(self, data, encoding='utf-8', hash_fields=None): """ - :param dict|list|int|string data: data will be encoded - :param string encoding: string field output encoding + :param dict|list|int|str data: data will be encoded + :param str encoding: string field output encoding + :param List[str] hash_fields: see + :any:`TorrentFileParser.__init__` """ self._data = data self._encoding = encoding + self._hash_fields = list(TorrentFileParser.HASH_FIELD_PARAMS.keys()) + if hash_fields is not None: + self._hash_fields.extend(str_type(hash_fields)) + + def hash_fields(self, name): + """ + see :any:`TorrentFileParser.hash_field` + + :param str name: + :return: return self, so you can chained call + """ + return self._hash_fields.append(str_type(name)) def encode(self): """ @@ -368,7 +447,7 @@ class BEncoder(object): ) for x in self._output_element(k): yield x - if k in TorrentFileParser.HASH_FIELD_PARAMS: + if k in self._hash_fields: for x in self._output_decode_hash(v): yield x else: @@ -399,77 +478,117 @@ class BEncoder(object): class BDecoder(object): def __init__( - self, data, use_ordered_dict=False, encoding='utf-8', errors='strict' + self, data, use_ordered_dict=False, encoding='utf-8', errors='strict', + hash_fields=None, hash_raw=False, ): """ + See :any:`TorrentFileParser.__init__` for parameter description. + :param bytes data: raw data to be decoded - :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` - :param string encoding: see :any:`TorrentFileParser.__init__` - :param string errors: see :any:`TorrentFileParser.__init__` + :param bool use_ordered_dict: + :param str encoding: + :param str errors: + :param Dict[str, Tuple[int, bool]] hash_fields: + :param bool hash_raw: """ - self._data = bytes(data) - self._use_ordered_dict = use_ordered_dict - self._encoding = encoding - self._errors = errors + self._parser = TorrentFileParser( + io.BytesIO(bytes(data)), + use_ordered_dict, + encoding, + errors, + hash_fields, + hash_raw, + ) + + def hash_field(self, name, block_length=20, need_dict=False): + """ + See :any:`TorrentFileParser.hash_field` for parameter description + + :param name: + :param block_length: + :param need_dict: + :return: return self, so you can chained call + """ + self._parser.hash_field(name, block_length, need_dict) + return self def decode(self): - return TorrentFileParser( - io.BytesIO(self._data), self._use_ordered_dict, self._encoding, - self._errors, - ).parse() + return self._parser.parse() -def encode(data, encoding='utf-8'): +def encode(data, encoding='utf-8', hash_fields=None): """ Shortcut function for encode python object to torrent file format(bencode) - :param dict|list|int|string data: data to be encoded - :param string encoding: see :any:`TorrentFileParser.__init__` + See :any:`BEncoder.__init__` for parameter description + + :param dict|list|int|str|bytes data: data to be encoded + :param str encoding: + :param List[str] hash_fields: :rtype: bytes """ - return BEncoder(data, encoding).encode() + return BEncoder(data, encoding, hash_fields).encode() -def decode(data, use_ordered_dict=False, encoding='utf-8', errors='strict'): +def decode( + data, use_ordered_dict=False, encoding='utf-8', errors='strict', + hash_fields=None, hash_raw=False, +): """ Shortcut function for decode bytes as torrent file format(bencode) to python object + See :any:`BDecoder.__init__` for parameter description + :param bytes data: raw data to be decoded - :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` - :param string encoding: see :any:`TorrentFileParser.__init__` - :param string errors: see :any:`TorrentFileParser.__init__` - :rtype: dict|list|int|string + :param bool use_ordered_dict: + :param str encoding: + :param str errors: + :param Dict[str, Tuple[int, bool]] hash_fields: + :param bool hash_raw: + :rtype: dict|list|int|str|bytes|bytes """ - return BDecoder(data, use_ordered_dict, encoding, errors).decode() + return BDecoder( + data, use_ordered_dict, encoding, errors, hash_fields, hash_raw, + ).decode() def parse_torrent_file( filename, use_ordered_dict=False, encoding='utf-8', errors='strict', + hash_fields=None, hash_raw=False, ): """ Shortcut function for parse torrent object using TorrentFileParser - :param string filename: torrent filename - :param bool use_ordered_dict: see :any:`TorrentFileParser.__init__` - :param string encoding: see :any:`TorrentFileParser.__init__` - :param string errors: see :any:`TorrentFileParser.__init__` - :rtype: dict|list|int|string + See :any:`TorrentFileParser.__init__` for parameter description + + :param str filename: torrent filename + :param bool use_ordered_dict: + :param str encoding: + :param str errors: + :param Dict[str, Tuple[int, bool]] hash_fields: + :param bool hash_raw: + :rtype: dict|list|int|str|bytes """ with open(filename, 'rb') as f: - return TorrentFileParser(f, use_ordered_dict, encoding, errors).parse() + return TorrentFileParser( + f, use_ordered_dict, encoding, errors, hash_fields, hash_raw, + ).parse() -def create_torrent_file(filename, data, encoding='utf-8'): +def create_torrent_file(filename, data, encoding='utf-8', hash_fields=None): """ - Shortcut function for create a torrent file using TorrentFileCreator + Shortcut function for create a torrent file using BEncoder - :param string filename: output torrent filename - :param dict|list|int|string data: torrent data - :param string encoding: string field output encoding + see :any:`BDecoder.__init__` for parameter description + + :param str filename: output torrent filename + :param dict|list|int|str|bytes data: + :param str encoding: + :param List[str] hash_fields: """ with open(filename, 'wb') as f: - f.write(BEncoder(data, encoding).encode()) + f.write(BEncoder(data, encoding, hash_fields).encode()) def __main(): From 58b29be0a6012e6704cd1eb3e4905a73bf93eab5 Mon Sep 17 00:00:00 2001 From: 7sDream <7seconddream@gmail.com> Date: Sat, 23 Jun 2018 13:01:08 +0800 Subject: [PATCH 5/5] fix typo hash_fields -> hash_field --- torrent_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torrent_parser.py b/torrent_parser.py index 942266d..99436c2 100755 --- a/torrent_parser.py +++ b/torrent_parser.py @@ -373,7 +373,7 @@ class BEncoder(object): if hash_fields is not None: self._hash_fields.extend(str_type(hash_fields)) - def hash_fields(self, name): + def hash_field(self, name): """ see :any:`TorrentFileParser.hash_field`