From aafaebd5debce46b6fe085a5660e0fb5d4774373 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Wed, 21 Aug 2024 15:17:28 +0200 Subject: [PATCH] crates: Use looseversion.LooseVersion2 to parse crate versions packaging.version.parse is dedicated to parse Python package version numbers but crate versions do not necessarily respect Python version number conventions and thus some crate versions cannot be parsed. Prefer to use looseversion.LooseVersion2 instead which in a drop-in replacement for deprecated distutils.version.LooseVersion and enables to parse all kind of version numbers. --- .pre-commit-config.yaml | 1 + requirements.txt | 1 + swh/lister/crates/lister.py | 4 ++-- .../tests/data/fake_crates_repository_init.sh | 1 + .../https_static.crates.io/db-dump.tar.gz | Bin 1358 -> 1420 bytes .../db-dump.tar.gz_visit1 | Bin 1534 -> 1591 bytes swh/lister/crates/tests/test_lister.py | 13 +++++++++++++ 7 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ce247f18..5e130010 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,6 +32,7 @@ repos: stages: [commit] - id: codespell name: Check commit message spelling + args: [-L crate] stages: [commit-msg] - repo: local diff --git a/requirements.txt b/requirements.txt index e8bf14cc..dead79e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ dateparser dulwich iso8601 launchpadlib +looseversion lxml psycopg2 pyreadr diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py index a17ce7d3..e31756c5 100644 --- a/swh/lister/crates/lister.py +++ b/swh/lister/crates/lister.py @@ -15,7 +15,7 @@ from typing import Any, Dict, Iterator, List, Optional from urllib.parse import urlparse import iso8601 -from packaging.version import parse as parse_version +from looseversion import LooseVersion2 from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -201,7 +201,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]): for name, item in dataset.items(): page = [] # sort crate versions - versions: list = sorted(item["versions"].keys(), key=parse_version) + versions = sorted(item["versions"].keys(), key=LooseVersion2) for version in versions: v = item["versions"][version] diff --git a/swh/lister/crates/tests/data/fake_crates_repository_init.sh b/swh/lister/crates/tests/data/fake_crates_repository_init.sh index b58d195d..8078fadc 100755 --- a/swh/lister/crates/tests/data/fake_crates_repository_init.sh +++ b/swh/lister/crates/tests/data/fake_crates_repository_init.sh @@ -17,6 +17,7 @@ echo -e '''created_at,description,documentation,downloads,homepage,id,max_upload ''' > data/crates.csv echo -e '''checksum,crate_id,crate_size,created_at,downloads,features,id,license,links,num,published_by,updated_at,yanked +d879626d5babe4ca6c4ec953d712e28d939672b325a4f9352f28ca3c82568a15,1339,,2014-12-18 06:56:46.88489,845,{},1321,MIT/Apache-2.0,,0.1.3-experimental,,2017-11-30 05:24:37.146115,f 398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944,2233,,2015-05-27 23:19:16.848643,1961,{},10855,MIT/Apache-2.0,,0.1.0,,2017-11-30 03:37:17.449539,f 343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9,545,,2014-12-18 06:56:46.88489,845,{},1321,MIT/Apache-2.0,,0.1.2,,2017-11-30 02:29:20.01125,f 6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7,1339,,2015-02-03 11:15:19.001762,8211,{},4371,MIT/Apache-2.0,,0.1.2,,2017-11-30 03:14:27.545115,f diff --git a/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz index bd74c75b0fffe577720c990446d756eed21118f8..12b3082d29757e19fbe127261bb86bec5cce6b1c 100644 GIT binary patch literal 1420 zcmV;71#|iziwFP!000001MOH#Z`(!?_F2Dz@R_1_X7<Tjk3IBK<kX9?Pn(TI3L@1e zE&AU(q?|NyWW+(721WT<@KWUL?(8>Ds=2xP<`4{{)I5Sw;=}PBu0U#yLJ87%B>)W2 zRi(bYYFAgMaq`Eiy7K+b??XOLPsn}l`F|A64GP4qu*(O9K=a}r@#Xd>C2+O>zky5k z-{;9EKl$d@ap+%QkJ-Rl$|vm)+B~wq=D({-yujX{#sA#?e{QPkdg}I>Y1kjG+v@rs ze|ROZ6{1DcXsN&f|NrH>o+rekpYnLy4YzUF@4IQ9M_CmSWI|6CeH2cCb+#l_rf5fo zWcDd%yx#n^*?jL~uXw<v_@86{C5yn9=YJ{J`v33XN&Eknk7GCVV-v^UzH*Px<bTv` z?2qkl#9aT^{QvC{?{j=Tp7!;U(r>%;a4~j&WWLONoo@Y9r{VX0H~2Kxh3}n?d8}D> z*TvkAnaBS1Sogfl;S_e=c+Zo<+xm1!@8-Yt{p*}I$r`6oCl!3iGI||lj!scBfEjIa z<g~#+sJtwW6c)76Q?#hG^`L4Xa&?WIBDR2Au$9oQ(yi2uwbHuUN>%^)7mFd(KmGXX z=7)plBW|$~buAibXq(?0@{tQ(Kl$CVlGy@mNmN3$C|fcO`y%jGWkb$7g&vEJ2@+z6 zV2mtDYqG*-!dOWAlmuGArA@_@tn{{&f+;vDYeb@D549C)i>5+q!L`8JN~<N+;501y z3aivbg~i7@5-Zq(X{2P<+^nOdkXQh?v=T|Jgib*h8M#`cN?9j^MPwVJA}W@A_RbW_ zMp_#~akXN`zJgHvSU+shwI~{Xhr^jdoe^D5<j_hi-cf{Pv;dpHnP4&mBT)%O7<>t2 zg2<2n!8=Sc7W0q6nL^G}Q~|W0n8jw!A{uLpuuv$;JWE5ifSh|8KKI$mDVKmKq7;ff z5`t4D+aRLzIva~Ad2MBckc!n#rh*O(F^|?0lbGodgRTP27H%aphK#`)q_bxVHC^_} zPbg#)xCV*0GpwMrUaBB<L~kQm;_oQ{XUhN-DXJWJw#*`mlB2>Ht<A0kNEu{QGJVP+ z+Zfo5WCPkcR*{2}&KxqKmsEsLypWdZR<rN3iba$H9L^FFF>x44NG+4+5!mN!a1c}E zNOy^Qdv^Mex!A;%y+);?dc+}Bh%JZ~m>hJ{$(m5eg+vhIIdOumg4xQ*R#m_ndSOB? z&gqcIdP9+YjEyH%!U$|&8zgvdI2%cqvE)=rX2&f?#>9@PA8Dv66fHq3ZKDhyQP-A( zWl_-B=TXQX4iu0Cf>^=t(-S{g|IaP{3snEl)_+hEAHV-C{oh*uzdb%I@p+8L?l5uJ z$K^hrmJ<2!?OpM|ANKj+@A6#y_x{c8=`i0vcMHAW=bD#G%WxRRZW@jsHUXQ98Auvq zslNW%_i5Ny+(R(Q)m`p0JK%5}E52V1(>>ojE+Ty%$EuwAIIHG&-SoD(uJ5PmFt#^0 ztoPU)$D3J{)1Gt0NtfO~KC-p9=Vh9^Zn{5(rW<a;FuZ=qk;}aI;twMHj{D)v^Wx{Q z;Y=VaVdtVOO(RG-Yd06um>}Xhb^SwMb^F6EzbCgEN_EV4ryYACw<g?JEp2yIhGX?J zZ`xF^?z{2vq|=yLr0csWSN=2&dk)!!sk@VB^`^{^xhc!M==^eD@$-1!^>-D|^}Aiw z^@r0mULr2v<u|oe9E_YRpJZa^`@09(B`=;YpCf+e&2T?dft`O|AZobzvsP^`_DSS8 zqcU$7NU9G0IOgM1r`V2fnJzwzL@shhK1;i$Vf$$MmmbP+?Vh_xBnxELpE_2oSg~To aiWMtXtXQ#P#flZ*8-D{Z0kY%(C;$L6Ro}({ literal 1358 zcmV-U1+n@ciwFP!000001MOJLa@#f(_1a&-(Hf$0FW%f`*+n;*th(`lcu1@zQbST{ z(oFunmz3)|j$Aua$4#d^qY)$$mpC~01*yEc`0n5YqtrBlQT+b+CoVt=;943k<OK(= zxVb3Q_osGoaU4e97sW;HeH(5bpZC=L|0v2U<cMo-=NEF0=EWo8+x?Gd^y2^5z&Za% zKl+ypf^YY~z$O1*2aozc^nOf38HV@&c#X-yTFS@#4@%+#|Et;m`~~*+B>w0A4}Bs( z$7?^bn1;|dhq38e7Q1lVr*`zW-+pU%osR?CcKdYjo5Y&P_Wr~5@i1M#9-7}1YyCbk zdR&}`LpL;I*MDTkL%dz#63Md^WNR)8t}8GV$P%P6R<PAu-^OlVw8wp&`eKvXL;=|K zLqXSzZrswvexA~%VJPyk4HGrLYsQb|ifza7FjQAp#Cs_F;cA-7KsYCa7UJ%9Pu^ai z9#d|baeJ)Grn{=U?uI4{$JlIlH~n@>==S$5PY*w(4X!~J+*ZO=(v(~%XYI62xdyr= z3d9O{1yBku1OQvTX^Uom*rhvhi!K*^+8lQjh3Uhgr`^$2+F{ohZ)wr8c(-kahepRC z(Uhib##H#@*zGB0XQehrPxQt_w|7mO=%Hh?PX*l%+os(VWb1dkqG=DuaX2Sj+N2N6 zDvjJZ{iqT<-)?T_Dyc}f`Qvk@pL8>8t_rXf=amqp0kl@#hKE|CDNt14D$u1B4#1FZ zXGAIvz8_Nm*e=$?N0N&+Bhfiy<g?fzMGO1F{989=D&;61N)#4^xxD<hGyL!IX#Ibm z`Z*9^paVQn|4DN7u>LEv)c;pO*rsqZ9QSPQ_t#X`Pao!re!n@~wSY|5j+B8@3)nRw zwL_w@y%`wQZgx1<yJpyuQGLJkvyZ;LN%2xR>lAv-Iz)(wbpXoD9JLmf+axe#p?!=T zEsrUw5Tlje=A1FsPRiQ^JzFWH)lx8U8rY}L47gQ_{ruCrt2YNvyIf<*8RI2P1KFE3 zz*_Kvi>Y{nDW!CZT0p)OQq++qKuWZj)Qam^>nwwo3RO%x8q7S|5R?$v3ZJ|)StKK^ z4Rv;>ig?!q?9}U&t|U<;j*jY)nh6Sov;6Q&epsQake3_)717fP(WNLHQen&9i2%`P z4mN@_wMkGLiHa+Mwa>LMHBS%$!8?pHWOLi;?)74T3KXeWPI;-3S%g_aDa^ANk_Tjk zrlfrXOcZ0vH6jm^D;Yf0PUU249-P<7Sd7tYD+AOqTkT}b=)efl(Rx85Ch<T)R{)(B zNI@u#Fa{Y&XHODpah9O?N<wmhqmoR^86qgHm#UUJptnI-LEmEy$W;v@h@etU)-v(H za|#MWur|3|LrfrplJRQ||2ofTYBtD>BWkmTB<MMc@KqFw+1zT1eWI9o&Jeto5CsW` zL2#*M^fUs+oUI*%7=lwSYFbl6ov3_=n7l@%f_k9g{i6sv=_I4ySaKE-IDbx_V5MO4 zGLlyn5JS&Roid#g>L{!?B2bKx^Q4LxfE;XV3ErC&Xt{(r#heobZZ<MR3QYZgp(>CL z15im@Dnnb;%u=$<1dV;3g!H0BnOlh<f<Jic{ULC!|L@c2r!zqL>(I3?u*MVppMy3J z`oEn1E?D~iE8x>*QLLt)2qeS)u&RpHU-9sca-I(8l=Ia2zyZDgvSO3L>Cb`drn{!o z@V*%*JCRi8wT$S6MIX3RV4ckomC4!(BP9Jaq?Wq6{Jdo5f&~i}ELgB$!GZ+~7A#n> QV8N^54*=fb>i{SK01MuucK`qY diff --git a/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1 b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1 index 0b7dd384a90ce172fa758391869b2d998bfd8b5c..7164da399f78a0a27ead124833eaf7ec7b30d344 100644 GIT binary patch literal 1591 zcmV-72FUpziwFP!000001MOH#kK0BPp0j=hF=vX?Rn<@4a@<1@!0>4=gJwT8%t)j_ zQdxV0{P!(VV|zT-#A9UQAXyd!h$8#(^;fTts&20SbQ5ewsreg>5|6(>*cC{vQ7Azg zuLOVrx+>J45AEveG){gfiYwpk{h>+2^vt-gJ^!0pb%O$NE8JG7tw15rym~}@yZ=e? zt=0eE!7lm#kR~7f<g1^@zI%l|W(R92pY=az^Thuel({OzEA0JM`#<;pubZN{p4vlV z8V<+ny14!n4{rt5Leyv~Z4EmKg#Gt*IZp_KpVD~S_P3!w9NKA~7fCDh(Lxg~f5>Ev zC?%#Oth6#E6=mZCU2lHdZ2szFul8V<@_)|#*K7jcUjL;8SnL1yuxI`MOB%+u@5U;O zzx=^HzEb}Q1U;?)#?V^-zlVjp6z<2<p<GJ(Z5tmh#`eF7FB4zKTR)Yt|4+B?eH_co z_fA6^OCGy#L+ZxF-`)LKcD&5-)a={wjwdzm%hNG_nE&2)_bG0oHBO_Bs_{*d!RsJX zaEhV<OlYGcrwulQ%FFCXVMZG~1&c~s52^$rSC+^nVhgwhTL@h%T}xeA?lP{lQkB2{ z#={WGAOH3C=7*!_BW|%0WhpACXq(<0)4&a{oBV!R$!r0(BnqKwlr@=(V-a|(ydh^D z_cY8pM2Lt@0Apm1T9Xwv5r$0K$0*Q>$PF&UXr;F~bL(rIlqDk3vWJ?}3r&GkgR6nj zN~<N6;500Fg;nYzVDULdVt_T6N=jzU%@`#$kq1CZyqVNW=-B8igO&<aOgb9OBH0iW zQMTlhcP3LZ(%R5uS1M-g4-kr<<HH(VjiTarIGice3DKoU4z0xO9R-L+3$PKKX-tB~ zNK`@*Ha<6G8j&CZf_E5Y$mWy7=kl}_Q~=bVn8j+YA{tAJuuv$;ya*wCK+c+m&wciC zOu0c6K?=nY35`=Z*+vBCbut!X^xDe6vL;*YWX$Nm5c6m~F^P$uFz5=<?BQBMWylyT zK{|V;P}Aj@{DMMufvb>sJHr5_^-?ub2lO_OCH@^7;A+_b1qv!PJX<CaL{33r2-YT- z8;A*HP%?hWA^R9OjARAcIflqdNoNk3&`S{EizuXJy44)}3^9wGfzw$+Bqk0638`gd zxyP<1ZyN_82F`RBHSejRN!)BgOkSf>K|SG+3d9;j4NML?>10VL<U%3{@sd2jTEXmP zWUne<gkG2?W#@Dg$$CS9V~m|ARm1@7VB1LW-f%UNE@4hF=fr`VjSP_kQ$Ind3KTU# zEp4R?A5oW<lVt&D?8_+R4<`!9x&JZ1jgQD7CtRbO5VgrEIiW1qMa9X^>42bpc9E5o zQ6>nLB1X;@<r$(*A#f2fat5krm!8|Yosk073TH00L_p4XD~QAlmc8fr#wV!7Pj3na zUzSX01z7SpQopxWUbXz++Md;a=U2Q}P=8<5e+&Sg>OU!P)qmf^9!tD5hM_%9tom8* zh0`LD9)5ige|P;M9sMrN;_u+!-JXu~{j*x^{2`URTwK16{n$?Z@K^+HE_NVkjHUAW zjqhTAC|E->$;B>pi36}7#)9t`{dC7Shh<2Y#<9q!F3iyUzMbAz*X7+b9mo3ShH;P8 zFy72Vx$QYe+;r*V<AJ@sJug%3+Uf4pRBeCL^!@!qk6h+`9R4sQ^oPeY&x@bahC6{Q zgq=m1G!@rMXYJ-<8{?A8I_3I@u4oU(efmgl(dS}FyVIVdkfkXL>*ckp$o){f;Z3XJ z?Oi)Qopc%#4{5u0N`*g7{ee?<VQP2s3~x&Om_>QwMdv?v3Vt5$+HP0yT)*EJZFf9P z<0azKF1;(Q;$-Ar`79HA-|Zg8E`9NQ`I6yh-VB$U0_^<v5>dtC-&(b~sFTTYM`hkF zkW?J~Fs9+TQ*6igOcx(UB8wa&UxaQE?4M2l)<YTAZu46Nk_9rG`BYq7f%|7M>sF`* zR7wlsa6Z*iW*29VKBvWnY1s8$e`rH-Y`g7o=zmU2tn)cbX-SbGTYhNv<)bkdp-#TN pZ@1~@@wn&v`P$7_TWz(~R$Fbg)mB?=wblNi_CIYdJ;nei008fJ7i$0j literal 1534 zcmV<a1p)dWiwFP!000001MOH#Z{tQ1_F2E8;d6`bs($m9;~s(lgE{TRX!b*9ERh09 zB{K=~-?vCP9>*i^c(9Hai{%Fal*n%OSC6i*TGh?f-wr{rTF+OoT0Fh}i7QYNgi%&V zbtOP(VXv6}`>9=BoyI8)j9rx>bn*V>ao>9WKZ@!G1>#ott<YP6LZE%~jQIZeBU-h_ z|2=Ri{wYl1EtBB;<1bO@HU95`7vmp?Fy*m|<1hd58gqb+QZL1y2*9)WYp~{ui8rwC zoA^JEe;hK!Io*bdr#!}?Jx*=k@zlrDA$L=F`1NbI??W2-U4O{Ou*<wn{186fo{sbV z+p+yE^DZ1Rr`M(Ba_q-;>W9z#bW9H`Y>*;LNwyV|2~&fuK~<ovb&_9y2wmzAtUDc= zJg{BvG9_R?jEwHHe!8QZ!!o4H<H*XXi!-&bZ>P`IHNTst<5=I^5bv=X#+!L42kE_( zM#{&>14Vm#eoeJ&r@K>Awf#-g_xChddd`=5kHa5^#QyMf=K1C4ZG&4-Ot@OwTG>iS z?VX#=R9m1&k|EY0YJf@#DFOKP4;^a{$9;ZuF6#>$^6s>!EX<#d16fCR$-;hMA865v zeY|VO=aWujrXg+DPML+%)E{WeE}q(*0@0f?J>Iunrk76bAv1a&@7iw1Xl~f=S=${? z(|E~od6z$Nrwt14&5NGchi>;URx3?@TfV+#_}Ook$_)co3sFm1SwQFXLwe{fT86R) zUxTTf^Z=IpcEL&P7=|$qFIll2Ka;!27=_+jt6s$pDO>norhl(dw$`5Vp+f0E+Rbbg z6<62_$3Rmm)B-APgzz|9wUpT<*kdSp31J>~eb*n_$c}BdJr4cPdC7IMlPf2+w9;`7 z!YXMve=_DG)hV?1?Ka;$9e1Ydwm@`ct#M9JqY%(<HvjE{wBlXyqW=Gqhoum`K?m?H z{Wqwe>%X>k)&KWEyvy-^JRSH_h;B*N&tDcrf7%>8wg83hou~rOEx^8wxf?TGyZe!o zcJt$@*|*~zO=_M>KmQ!M`<yn?JFhWdF)=|xY$8x)F3A|_gv%0RktU=h(210@jwv}6 zTqy-p<CS_S&=-|LIin;8Z-D>)2h~TXHUIG+A8&p*2C{OCmEc@dFkfiC-2!Z-U_#E~ z4YpFsYia??Mk?7P8UQ)dV#;V?Qe%pWMrqV3n`E(w>|)eX7AHdv-WHjyaxON-pB3@3 z32<|-qfM=dB60MjM|vSB5iat>zw*NxeT||L0H}$c$%sBD>5+u31TP~bYXrCi-ZnNv zV-;$lB{rcn(l#PP0)*f(saWhor}Ly2BTbr>$yf8fG{_yo;zA|us~A!QRE@SGy8&j3 zDVGLOL?yI}0UEDMc8!QWm~0)U6pT|5npB+eDi!o#CHc_>Nlwi2nFl=s>K4g^P+4g$ za!}r#U1;P*g7PaDQV2XrGA(C`pmIU!Mwy7gMd>8{P7TnZvjG`p)VZPADvKyejv8Zh zF8k6z%AlfF>1!W;>1Pc!8_2CEYKsrap_eGaS6L{R>CRB@6U8D*ff$T}B*}3YrBFtt zKv$rgbB%|XV)WW4Lu(pnGL`R`a-edmqkiVYr>!V@<rSwhwpyGBLcC_5V69<}GE!7E z5W^sBlM8i;O_DBH87apocuFUXKmm4*f)H$uv|PrLQz@Aew^$VuC8l}CP&1@v5NhQr zZOKH#9c`9{pmncvA$@40EK(wg;3g!bloP%&O-#m?oW0NvFl*ZEv>gykC_Y(|8*QV| zIbovRq61MhImSPmEN$kHzT7o&?WnyMGg*qBryF0WvY$RMB2NulMbwbfIH~{LyUM%v zV3+j&kf$*B0M*Z9-@U;a-_U;n#y-=3Wwl)O|2^>ghOz7Uh)8ZY9ItD3{Uskh(w?US z8tr-GECq=q^`F;#o^bwi=(g=|>4<S?r+HqIhLk-yD(j9u(voDOQkb)#TA)bGlWM4Z kU2pzaPZ=v#tXQ#P#flXxR;*aDV#T}SU$VCX?EokM0KFIWZ~y=R diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py index 387ca266..ebc9220c 100644 --- a/swh/lister/crates/tests/test_lister.py +++ b/swh/lister/crates/tests/test_lister.py @@ -26,6 +26,14 @@ expected_origins = [ "filename": "rand-0.1.2.crate", "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate", }, + { + "version": "0.1.3-experimental", + "checksums": { + "sha256": "d879626d5babe4ca6c4ec953d712e28d939672b325a4f9352f28ca3c82568a15", # noqa: B950 + }, + "filename": "rand-0.1.3-experimental.crate", + "url": "https://static.crates.io/crates/rand/rand-0.1.3-experimental.crate", + }, ], "crates_metadata": [ { @@ -38,6 +46,11 @@ expected_origins = [ "last_update": "2017-11-30 03:14:27.545115", "yanked": False, }, + { + "version": "0.1.3-experimental", + "last_update": "2017-11-30 05:24:37.146115", + "yanked": False, + }, ], }, { -- GitLab