# HG changeset patch # User David Schleimer # Date 1337289314 25200 # Node ID 312f36a425f01328c9f36b831019c30b6f4957fc # Parent e42a05915edfccf818dcd19d83c83b07c7abe9ca Ignore invalid utf8 in commit messages Old svn allowed users to include invalid utf8 in their commits. Since there are real repos with said invalid utf8, we need to be able to import them, even if svn won't. diff --git a/hgsubversion/replay.py b/hgsubversion/replay.py --- a/hgsubversion/replay.py +++ b/hgsubversion/replay.py @@ -52,6 +52,17 @@ def updateexternals(ui, meta, current): else: current.delete(path) + +def _safe_message(msg): + if msg: + try: + msg.decode('utf-8') + except UnicodeDecodeError: + # ancient svn failed to enforce utf8 encoding + return msg.decode('iso-8859-1').encode('utf-8') + return msg + + def convert_rev(ui, meta, svn, r, tbdelta, firstrun): editor = meta.editor @@ -164,10 +175,11 @@ def convert_rev(ui, meta, svn, r, tbdelt islink=is_link, isexec=is_exec, copied=copied) + message = _safe_message(rev.message) meta.mapbranch(extra) current_ctx = context.memctx(meta.repo, parents, - rev.message or util.default_commit_msg(ui), + message or util.default_commit_msg(ui), files.keys(), filectxfn, meta.authors[rev.author], @@ -203,7 +215,7 @@ def convert_rev(ui, meta, svn, r, tbdelt current_ctx = context.memctx(meta.repo, (ha, node.nullid), - rev.message or ' ', + _safe_message(rev.message) or ' ', [], del_all_files, meta.authors[rev.author], diff --git a/tests/fixtures/invalid_utf8.sh b/tests/fixtures/invalid_utf8.sh new file mode 100755 --- /dev/null +++ b/tests/fixtures/invalid_utf8.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#-*- coding: utf-8 -*- +# +# Generate invalid_utf8.svndump +# + +#check svnadmin version, must be >= 1.7 +SVNVERSION=$(svnadmin --version | head -n 1 | cut -d \ -f 3) +if [[ "$SVNVERSION" < '1.7' ]] ; then + echo "You MUST have svn 1.7 or above to use this script" + exit 1 +fi + +set -x + +TMPDIR=$(mktemp -d) +WD=$(pwd) + +cd $TMPDIR + +svnadmin create failrepo +svn co file://$PWD/failrepo fail +( + cd fail + touch A + svn add A + svn ci -m blabargrod +) +svnadmin --pre-1.6-compatible create invalid_utf8 +svnadmin dump failrepo | \ + sed "s/blabargrod/$(echo blåbærgrød | iconv -f utf-8 -t latin1)/g" | \ + svnadmin load --bypass-prop-validation invalid_utf8 + +tar cz -C invalid_utf8 -f "$WD"/invalid_utf8.tar.gz . diff --git a/tests/fixtures/invalid_utf8.tar.gz b/tests/fixtures/invalid_utf8.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..537021f18693380b7f1133bc0d8b2558a9966706 GIT binary patch literal 8227 zc$@(vAl%;{iwFQ*K(tQ)1MFRUbK5wQ&)@nKI5{=4Q=;T&a_5xy*5haZ_~f_da|(rZ>(>xZ*0N+jg3usUF_U{)1ExZOezy0o&?6fh*UUG$yM*E z#4Msgi@jRy)6V{@qfRURl-^ezIKa<&yyuSp*7nZS_-~@Pi1qtwQ2Bb~<9~h;8Ug=F zah7#Q(8W;4B2hza!c-@dmje1N(hDUTW)#a{5XKFWY8}D2SB*HhF7Xawz=^)zV47EqYWX)Dp(ub{D+D1BJE$c(!ntL#&uky|F@s7Z%&Q>4m@A#|3zFY|I+S; zaob$fYAfRHX~%nWa`20IeRA?kZ3XU}gA#V4Ay87#R)q0LU#dWKRS#9IjKLE4QD(xC z67*tdps1fHnW_LxL)o%qu>s8vM2J{-!o*PDOQ=Zq6FCr2c-8nxI82*BS@~HiyAhZZ zXsApka4+>+Ya*5dXhXxbt|3-Y`HwF`{{je59cXpfxTOlj8k)1#6yq?xAmh^0QKZLM zwN3)COo5O|KO3kxHM`V#AwFz;6sMiHCuiQ<{qxswV-*VyB~s8_M`5DYxX9*5VPEE} zC=$j~Moq}B&Qc`RWzMO<650AF-ko($t3*%)nXS$~Vi{Xv`=dBGIXdbbpO;GbdVu{A zx;Op$&PQ=}-r0Y{;|;HGh>%~RTA*WTGNEd;FsYJJP5}5H0VY^qBOV=cAsS6Le$c03 zmIB-339tzH8v~61Za3*=eh#-1d^irH2ncnlW*7sL*93K6s+ep#v6}Abp-R}S3#=Sr zgb%eNO3_FmAA4X^mVngY8v~f-qMyk`#;IaFL$r^CX<3Poq;yqD@1T-$^J3MkfrimeW^Kcuw$*ep z41F0z#k+)51J-!b056eUR2@CVihu<5me4VA#*5TQ0Z&vi2n}gWgvKub1yYAJWZq#e2HR0E0y@sj}er?rjR;9ro z+w~eLEcm%oukjR+ESn(1hSW`fP0?c%LKrj&ld;!>27&?vh8i7%XQJZcEd=^IOy2XL7rqj$R;#f_^1ON^CtbyQhInRczjsH5q2JDIkyzmr=L+9G|R=eH!yXEFsfi_HG zT7r}r)M>p$gHqxq(v(e9!{Bo^1I)#evLia z6^~f2^}<@A&UloF9O_bV-@vge>_LMEH@QpIPf@0>tp5Z%Hx3P$i?M3JjO?Q;8*_C6 zrXn>3Rkl@gPFe2O6j&co?<9!^ZqhJ7>p&v~6Kl~?6rcW2(VYKn)y2l2e%P#03pO6u zId%9SQE=)ZUI1_Y8~Xs);QyU1!2DDA|HbC>CH`N;Rl)yf=liGU9_rk~^QsR}7LP0a zfHKfeKgA`?LAKIi9WrKsNCVBV-wNFq`~);cb3KKdx(R6U%3UZs3s<@dh#T(TSD1zQ zGD8)W0-QitN?<{z*s9dxaqdImDIDy--T(3M&EffB=S-|3LnApmyasrwtAGz{IeywD>8LaSYaO6ApzYr+RkCaJKzT&XE2^vW7?BiI@cdfg6SztcZhDT-RgQwRpJR)0;#2{fMpQD8 z04oiMbOKcZdNdi1zJ@sgU<_z!?oDMtvDPMnYD)^pAYCeKajs)_qM{JyO3r4$oZzq& zokZdjf~G~Mu5xgkD{rS{!(8d`03o9n;qX{Bc|;ZcwGgkUsxcpnXH-E#xp!2#dt(nt*vV57+kzp1R)(@BVGM&9GBFj8 z!bHb5ZiBo8aRog^ZVX#b&>lZQqpZi(v^Er20?%Pw2G|N4e!!?6nxit+kxW7wzm#(} z=m04SLt#~!$v|*MGRjmQqky3?SvME5o^@~{nZ;m0ZAQnzfl(-h>>3?qDce7*qJjAh zkZBlK)FW98(;YMk6tJzflTuc=)wG@PvuK!D0me{K%*|ZY1kVjnT$|fRV@KiZ991Op zlS?hx^6YS;*F1)XrC?wPNjJ$>!i+N*`mqt7xw$p4ZYbt9pEar^vgob0G`&)ytDVA( zxol(h%@At$xbdcqEx8LR)R0NotrDwMoO0`mgHv8(;rGb*0+tLT@Qh2n$(>QpqiO71 zuHSzb*3&UC-l2><1UMcv&%bO9C)2Nc{FrC^R;e^}a33%NmBR^P3z+{lY7d^n(Se{S4q=e?-9ad9^20%L6IB~hc0Sn>~qQQZEN(mfBG zNcd6t(k=0D11wD*!h%~vT+0!VvGZkY@dDHil$*9*(zm`2lxuyWRnd@0xqR>;H|o7<5)xU+V)ML-uqVIw!Hr5p5Whg z`0x47_RRX9t>+s{{I`f}X8q4x_?HXEEn@$uTbyye{UV@SuK}8cgb%$6==?9o-tqoX zrvwSnFj|O&Oe!z`nSz8s;_Up@;qga-nM3dVjzCWE&#<3zEsms(!;aElVc?DwsXoU!YIV_(kR}IG^#!NiBUJ5thIzCLS)?Tfbqoymxq9$(t<+F zN)}7>W#G}=oEd}`DUfZ0KO{K*7Y0Lo>kPqEs6UjgdWNhtFuSN=6B{5ZHba2|FS1 z7K^6SVMwp9407)^O#KnAQAxu71+JzUg9->yN{B>JH*yx?E}9w}$)w4NpcneYfdY~| zYan1Q_p)g2+O-+GZN;)oUvWQkP_j8OWZag33D zRbW6|*8ouXQ`)+EL1Jm0VPAOYl0r@O!ZWoC4RN)isDF^&=Y8+5>)yY8tcyR1cA!S> zIE$ic^5y^&H$~If-mn7Au?EE+NmHQmBy$SN1l6z|Df^mT|G>M-aGV{8!i5lBMT-Z~ zM1kS#QZ=j)$7uct z@YyxPC9mH4Ld3q@!xyU(84KF8vb1iq$!;<_gKk_bnIW!M%N@(6!$#;(MKZRI4!(<; z%r%3XC{f}11UKfPF$>nf>^n;f8WcE;frH7~gFC+E3>OP=DZkZK&|Eh}f7BG0Z60ao zuS#$&a6?ToOCOC-l3UPfYU_4;YQ7!}?_RZjXGNS`TWeDQSgcr(2irP2et@(C; z%~yVyng3ClNHPBn_uPpPohR3&qI%+8Q%^FUc3KeCjavS3M}#8BCeVI z&uQm9h6=rd*ZapmcV_cIRUx@LACxb6!M9bCPEflWJRbxNy7SIZP4a0WH_j2&R_?F? zgu_GCwxcDy0fhWgt_a(5ZwrWW4zfd=j+MN@2Jv!^$?m74k}h8BhNIXvj>BEeQ@M9+ z&5W`t%b1E?o$oFr0E2iwh!KZuRw_e#tU(KDU)i=3oc#@1k1?^AV6DNk%ipp>BVG@ zC*1R2_fL-xkAL39h+mlFI~(>Y<`N|lh%g}uM~qV|NDhN2Hs(wn3wW!2&ec3FN<=Cl zeT0U%lc%c)h5>L0O9jczfr)5fzOV;^T+KlfJExf5 z=)3}*qGZfcPLVt5hPszU!_Iy>IX;xD$W)u8Wy^jaH&%&QdVqxmpGQqG+Fg%U~L%vL{IFrHQ z1wNF4?Y}*h!S221d$jNVs4hH-=M&gMbknAu^X>R{GyY3~yYYEL{Q*uil#_X?pOqkp zWt?ZR5e_8!BJ74BQFZZ$*3d!h-)_f|L*vT z3;;_9t4%iJQyBn$VNp2%uI)R?0ubWVlbh!O951iU1fUZu%~xguTy%X-CLnLfYg<`_e zqkjsJde z0nn_JTzMYQyahlQZlf^UotFVQbaD4{VCVGVMnUAlW4PBK@)yDL92A7mBDyy^95yoWhfv>I5x>q3&{ zfk(@gB#&t&Ns+H8)cEGi$wKm ziU_0{Ulb z;@#<+;)4!_r$(j2BmtLVB;&NTR--$2|7L9M1AiJW`dDKgVUkP7_H+QU*mO{}{af4b zGguB*`mTsV$y{TNSzP_j$Es{ywq%PcnGAFWn3zP4Ei$rZ=CdFMgN1mgHZEk35W8ZJ zWfa=A=)6Y0P6x`=o1%{XPn{8S9coc^E^(&P{DXq*UO}t*g?vpVxb5IG&|yF1c>@U~ z3%_3y4W`WlX-a1TNt<_}I=Lb4CFh3;%N-fC>4hrms@U8PgkjiaxZ#FBN|K*IW>Zi+ zMs((Q&$_QUZ=JXTd&yOpZt!g7O^gmiQy*uUM<(aZ7dujsr9CQGTd4@t9uvv?jQ%e8 zUV#^Mt*}*RYvht@&f&N369GIL3BMddvjt|~yW$(njzM-AAARS5Kg&o6y4@rKlqZF< z4O3J^Q#z6;e6Ej0wy%76+lo!YRuu|0g&yKDb^B*;kf4eYYQtedm%Gf&Mg01b-2-EEG9gT=f;c?(ttiG2>xCKHrq%e4z$ zcvP5{4@%DWJ^oqC&6FJoZe?f_D5Xisj**N8#a^kZ{=kIHSQodE2A{HJo$a~Qi0PC3 z%Q`D7Q)P0oax;xp-YE3>gfeO4M6TOJlx_{Bj|fpO${|dQ^3>^@6Z|gyocoqDmDYHz z^0%^v^kvt)r_y*K2MBP@1<`@lT42z03rBcl%92W3+Cn@ckN1YnaDGoM3#|Jt)fb!cxi&&i|2xs z2-b3m2svQX%|cwRfy#yIW=MeQ*ubFZBz;M;gXbMJ>D-q_PQmjm*BT`38056k51jM4 zS*IPKMZc!!7lu+s;Gm=8Z()oQSm7ZHfuU)Vc)s`ST_GZQ}^Q zffegWCVjUvWrKv=*9qxm3l^+uR=pmhHgO4BCE|u7>xzo`W29nZ^IvVV;Fs6M0}+8t z@!DF8G=Z&uJ`h5I9fYx0BEBcq@uvJ#y}7ybd>J5lRG0PtNqgT{9oP8(o9i#8<3C%Q z%l$u#xvKtuHLN|0%}7V(u_DG(DIr{Nn<*<6ux7VQgCw1hI?%yyBw)YcHwO_e~No?!RIA%Imj6*5{Y``yk22gOm(OgiQyO9}N?DnN&| zCHAMvV@&RyYvxZloa;sSmFb{WInpa z6~*DLD3n(fqi1SfIlusOg9pVSzKy4q7aF`yG^fCxv2&j17IzP)i^_O{S=I8(&l-qJ z0$(QVzwyBHiYQ`dmllaC_tnfHus8gapEsP5ABbP*{=Y^`oDvT8t+&!o(6W;ozS>9{ zYkB;E9DMf5;la4{1f(Z+$v_sJ27To_=O@_7e`rK^?Ss9){Iz{{zV`}Vh1Rv-?48AU z=H>wag7%61<0Ool;!PT~+9&V!P64&Z1iEzyC^}5DRJ9L|_x=OO?L+@$AJL*6ZpvCF z$>cW>;ol4;_4fEqlm5z|(_i!!H>>nc^9Q(v3pqZtds?@<72s(8qFy)WvpE z*FmJDyF5u?#21p{a43pAXXa-#!!O;v8mSm~eBL$|9;0C0fP@fz^L~h>ffRM4+0U-d z$>5msdb1?Hs`))DB^;_ZQy;!IZ+~f{eMU^C+JZyYfj<5({;wVZcF%u<(8hlo1>s#9 z%oayuG8O5t><{#Rb{zkM!g9g#KSluBzka{+vP}uZKnz#=nd>;T{@b^I5Xa2U|6`!F z|GF${2zXb0{@LJo{|`bMS^pmcXn*7TL)CHE|997aKJ)GV?B!`X-cKXxCrNr2SGTG?G$l$f&OnqNdqkX5XZ0o z{E*q_zfrJle=^7j_yqe$k#Frk26pXF2dLxF{x1!8^S}9VVD%rP;F(K_)8+qleuV#b zec?QF6Z}~4TB;9O>@c-F^GN8?_>zzl;-C1l35jMwlDt*ljvW(`S(lV9{!ITWugbrL zbAEAV7ZP6`cKv?{pF4Q|6YV|!$M*Su9Q+smPLzKWnnZ7{58pYU|Ie#=HHh+4&LHqku`uqv6RaU)Re+QvIo*B_9*+v(`dixns1_rOKVdy z3=LU>pkjfww8Axr2qa}A;=H-JnFzkR;%dMyr)`NJmT|gHF{tjFD9>a*pvql zmgRvfS6z1UJ^huDszJ7^tajhLd3t8TVnEn=q-kQPQPWNs7dmnun5*2DMlB(Ecq0+S z(1$A>@P=m@5|n|KHx5SF@LxaOJ)3dGQDv5HQZ#YF`TLOyAQQS{&{G008- z8r7TSNBS2e8a=&f-bBPHH&si}1>eG-8Hf4_G|yc3fM-M{u7p5+Zjogh7?@h!rHbu^ z3DKiLjS4Lt!JtRV&GXd;BZn7PS6b0)ipm@L431SGgsow1Smvs_h3?;;xa&J84r_oy zaw8-{rS^1cF7A8nbW8rbHgd0RD~Xk03(XvWtpIj$PecgWrID}oEe4G7s=8P=>L3UB zs~!wg)dDl~fb7d?l&Up{Yz2*EQqNjAyc>o%7~x%!gMtXJLJ!0Z#F|x?3}Z_tEYoao z)bZ(j^%AooaJO3Ec*}~a#yBnN=O$@A$4m)oQdsi5RbKK=jSCbx?O{8$w1j5m43HJA zO?+Rg8&qYqgm`Igg+Xxih!S!wS1`cc&DtdFs~V}_o*dEyk=iU%WdikSk^UyLy4vF4 zag*%2boL)+?;r39_KzbQ|7jHT z?LWlcKj6^*Ur6`#A0)8n|D)jS{Q2c6glzxj{Nn7@`Aa7kCob`*dO9uXl*UedyOt-o zc6Lxbp+0qL`Ch$Jm$-rYCju&xOw5uwAt@t)oF#~-9n0AT0bkxeZ`G~-8OfzZQOv*yn zXYiLt^Cn zq|*2Oe<*)Dc>Wjp`~FX?|BnIff**1-(s@64*^cuQbpF6)g3aPs&}Ma2A%Vxo4Ax%HqloFv;Xgbp@*vD@bll^{htQ*{BIP%(U0}W`-@%u&k%JS_W!;3 zuOzVg&rx9c|Mwf(|4Y8AbT+#o_5qLI{|Wri@_$ExUjGdn0^lJ3i|z40Nl5MdKL&hf z!EYfDCLA~kDby1hA@-hdZ!np{vEPuVKcAHU!Hbse-PJ49Pj@=p8s3_A0F2KEi5c7EG#T6 VEG#T6EG+Im{sF;`WR(E$0018sFo*yE diff --git a/tests/test_fetch_command.py b/tests/test_fetch_command.py --- a/tests/test_fetch_command.py +++ b/tests/test_fetch_command.py @@ -175,7 +175,8 @@ class TestBasicRepoLayout(test_util.Test self.assertEqual(repo[r].hex(), repo2[r].hex()) def test_path_quoting_stupid(self): - self.test_path_quoting(True) + repo = self.test_path_quoting(True) + def test_identical_fixtures(self): '''ensure that the non_ascii_path_N fixtures are identical''' @@ -186,6 +187,12 @@ class TestBasicRepoLayout(test_util.Test self.assertMultiLineEqual(open(fixturepaths[0]).read(), open(fixturepaths[1]).read()) + def test_invalid_message(self): + repo = self._load_fixture_and_fetch('invalid_utf8.tar.gz') + self.assertEqual(repo[0].description().decode('utf8'), + u'bl\xe5b\xe6rgr\xf8d') + + class TestStupidPull(test_util.TestBase): def test_stupid(self): repo = self._load_fixture_and_fetch('two_heads.svndump', stupid=True) diff --git a/tests/test_util.py b/tests/test_util.py --- a/tests/test_util.py +++ b/tests/test_util.py @@ -7,6 +7,7 @@ import shutil import stat import subprocess import sys +import tarfile import tempfile import unittest import urllib @@ -301,6 +302,23 @@ class TestBase(unittest.TestCase): proc.communicate() return path + def load_repo_tarball(self, fixture_name): + '''Extracts a tarball of an svn repo and returns the svn repo path.''' + path = self._makerepopath() + assert not os.path.exists(path) + os.mkdir(path) + tarball = tarfile.open(os.path.join(FIXTURES, fixture_name)) + # This is probably somewhat fragile, but I'm not sure how to + # do better in particular, I think it assumes that the tar + # entries are in the right order and that directories appear + # before their contents. This is a valid assummption for sane + # tarballs, from what I can tell. In particular, for a simple + # tarball of a svn repo with paths relative to the repo root, + # it seems to work + for entry in tarball: + tarball.extract(entry, path) + return path + def fetch(self, repo_path, subdir=None, stupid=False, layout='auto', startrev=0, externals=None, noupdate=True, dest=None, rev=None): if layout == 'single': @@ -333,7 +351,12 @@ class TestBase(unittest.TestCase): return hg.repository(testui(), self.wc_path) def load_and_fetch(self, fixture_name, *args, **opts): - repo_path = self.load_svndump(fixture_name) + if fixture_name.endswith('.svndump'): + repo_path = self.load_svndump(fixture_name) + elif fixture_name.endswith('tar.gz'): + repo_path = self.load_repo_tarball(fixture_name) + else: + assert False, 'Unknown fixture type' return self.fetch(repo_path, *args, **opts), repo_path