changeset 910:312f36a425f0

Ignore invalid utf8 in commit messages Old svn allowed users to include invalid utf8 in their commits. Since there are real repos with said invalid utf8, we need to be able to import them, even if svn won't.
author David Schleimer <dschleimer@fb.com>
date Thu, 17 May 2012 14:15:14 -0700
parents e42a05915edf
children 772280aed751 a103d5211237
files hgsubversion/replay.py tests/fixtures/invalid_utf8.sh tests/fixtures/invalid_utf8.tar.gz tests/test_fetch_command.py tests/test_util.py
diffstat 5 files changed, 80 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/hgsubversion/replay.py
+++ b/hgsubversion/replay.py
@@ -52,6 +52,17 @@ def updateexternals(ui, meta, current):
             else:
                 current.delete(path)
 
+
+def _safe_message(msg):
+  if msg:
+      try:
+          msg.decode('utf-8')
+      except UnicodeDecodeError:
+          # ancient svn failed to enforce utf8 encoding
+          return msg.decode('iso-8859-1').encode('utf-8')
+  return msg
+
+
 def convert_rev(ui, meta, svn, r, tbdelta, firstrun):
 
     editor = meta.editor
@@ -164,10 +175,11 @@ def convert_rev(ui, meta, svn, r, tbdelt
                                       islink=is_link, isexec=is_exec,
                                       copied=copied)
 
+        message = _safe_message(rev.message)
         meta.mapbranch(extra)
         current_ctx = context.memctx(meta.repo,
                                      parents,
-                                     rev.message or util.default_commit_msg(ui),
+                                     message or util.default_commit_msg(ui),
                                      files.keys(),
                                      filectxfn,
                                      meta.authors[rev.author],
@@ -203,7 +215,7 @@ def convert_rev(ui, meta, svn, r, tbdelt
 
         current_ctx = context.memctx(meta.repo,
                                      (ha, node.nullid),
-                                     rev.message or ' ',
+                                     _safe_message(rev.message) or ' ',
                                      [],
                                      del_all_files,
                                      meta.authors[rev.author],
new file mode 100755
--- /dev/null
+++ b/tests/fixtures/invalid_utf8.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+#-*- coding: utf-8 -*-
+#
+# Generate invalid_utf8.svndump
+#
+
+#check svnadmin version, must be >= 1.7
+SVNVERSION=$(svnadmin --version | head -n 1 | cut -d \  -f 3)
+if [[ "$SVNVERSION" < '1.7' ]] ; then
+    echo "You MUST have svn 1.7 or above to use this script"
+    exit 1
+fi
+
+set -x
+
+TMPDIR=$(mktemp -d)
+WD=$(pwd)
+
+cd $TMPDIR
+
+svnadmin create failrepo
+svn co file://$PWD/failrepo fail
+(
+   cd fail
+   touch A
+   svn add A
+   svn ci -m blabargrod
+)
+svnadmin --pre-1.6-compatible create invalid_utf8
+svnadmin dump failrepo | \
+    sed "s/blabargrod/$(echo blåbærgrød | iconv -f utf-8 -t latin1)/g" | \
+    svnadmin load --bypass-prop-validation invalid_utf8
+
+tar cz -C invalid_utf8 -f "$WD"/invalid_utf8.tar.gz .
new file mode 100644
index 0000000000000000000000000000000000000000..537021f18693380b7f1133bc0d8b2558a9966706
GIT binary patch
literal 8227
zc$@(vAl%;{iwFQ*K(tQ)1MFRUbK5wQ&)@nKI5{=4Q=;T&a_5xy*5ha<zKQLW?PT^g
zo12A5NamU%bOb3yxw-r7*AIXXNsb?h<;hyCsfkPy4WQBJ{&hD1w%Skb{aT0X#m)}?
zyR*HsMSpK>Z_~f_da|(rZ>(>xZ*0N+jg3usUF_U{)1ExZOezy0o&?6fh*UUG$yM*E
z#4Msgi@jRy)6V{@qfRURl-^ezIKa<&yyuSp*7nZS_-~@Pi1qtwQ2Bb~<9~h;8Ug=F
zah7#Q(8W;4B2hza!c-@dmje1N(hDUTW)#a{5XKFWY8}D<g!&}mZ&eG^mJqMBh;=Gt
z5Hv-gB9*G9(1{r6AnZ*<FN~BC7b;OAi{Ymc6P=06SdYbp9@l`F9O*C+zD|;Cn1*qm
zDlHn)s@2|y@Kao*>2SB*HhF7Xawz=^)zV47EqYWX)Dp(<b0AZEq4v;B{sUjF_J!7$
z=3cG9?e+iWHr!h3{{>ub{D+D1BJE$c(!ntL#&uky|F@s7Z%&Q>4m@A#|3zFY|I+S;
zaob$fYAfRHX~%nWa`20IeRA?kZ3XU}gA#V4Ay87#R)q0LU#dWKRS#9IjKLE4QD(xC
z67*tdps1fHnW_LxL)o%qu>s8vM2J{-!o*PDOQ=Zq6FCr2c-8nxI82*BS@~HiyAhZZ
zXsApka4+>+Ya*5dXhXxbt|3-Y`HwF`{{je59cXpfxTOlj8k)1#6yq?xAmh^0QKZLM
zwN3)COo5O|KO3kxHM`V#AwFz;6sMiHCuiQ<{qxswV-*VyB~s8_M`5DYxX9*5VPEE}
zC=$j~Moq}B&Qc`RWzMO<650AF-ko($t3*%)nXS$~Vi{Xv`=dBGIXdbbpO;GbdVu{A
zx;Op$&PQ=}-r0Y{;|;HGh>%~RTA*WTGNEd;FsYJJP5}5H0VY^qBOV=cAsS6Le$c03
zmIB-339tzH8v~61Za3*=eh#-1d^irH2ncnlW*7sL*93K6s+ep#v6}Abp-R}S3#=Sr
zgb%eNO3_FmAA4X^mVngY8v~f-qMyk`#;IaFL$r<Ps~G9g;3GB(|2qST_7iOkQK}4=
z0-zyM&=IYCas;X;vnUl~osbj<VWNC`nVYRhz<p{lay5&g1b`+$Nk-cm9N)}^&Z0mV
z1p^i<5dC2k`e7PPaA*Mf_|Qbtx{~ecQwU5a@iv+71ITI?tc0rVgnTKwSqdbWW~oq5
zh7oW~tj0R_KC48pmC8%s^#Eix0Wv^A#sLV?tIp}EposwdhK}*f0NIN|5WN=am6I5z
z;JY~dB+LYaX~1-21ZWX9-&X-vj6$cyg#<NHOlxc@=^LS;P~v(^uLjzV5RkzEK)O2X
zUtq@ua$=tY1$+oMjf#4RUYUo-Iz>^CX<3Poq;yqD@1T-$^J3MkfrimeW^Kcuw$*ep
z41F0z#k+)51J-!b056eUR2@CVihu<5me4VA#*5TQ0Z&vi2n}gWgvK<OGk|!-arW0R
z4)oaM%teW}vV=-<MpB?$-s4tRrVRw@#U1E+qlq&TRTC8LN)!4b43wl1f;&dKDezkH
zxRDKzpkr-KPJQDfutRU{3@|NK2~-CE->ub1yYAJWZq#e2HR0E0y@sj}er?rjR;9ro
z+w~eLEcm%oukjR+ESn(1hSW`fP0?c%LKrj&ld;!>27&?vh8i7%XQJZ<I4P+3Vc2l~
zk?u9AjzH6_s#d>cEd=^IOy2XL7rqj$R;#f_^1ON^CtbyQhI<N{mZvVXK@DIcxmmzE
z3!0St`&#Yn{qY;Htb6UuB+QOc909v(KLZ=~=uRI-xOUcg#eac9tkqCM|90~0G4SN?
zPb~?iTYpOItHTduUS0ey{#FyE>nRczjsH5q2JDIkyzmr=L+9G|R=eH!yXEFsfi_HG
zT7r}r)M>p$gHqxq(v(e9!{Bo^1I)#evL<mJ4pqEc1Lg7oT2(Z?F7`ydE<V1*0x>ia
z6^~f2^}<@A&UloF9O_bV-@vge>_LMEH@QpIPf@0>tp5Z%Hx3P$i?M3JjO?Q;8*_C6
zrXn>3Rkl@gPFe2O6j&co?<9!^ZqhJ7>p&v~6Kl~?6rcW2(VYKn)y2l2e%P#03pO6u
zId%9SQE=)ZUI1_Y8~Xs);QyU1!2DDA|HbC>CH`N;Rl)yf=liGU9_rk~^QsR}7LP0a
zfHKfeKgA`?LAKIi9WrKsNCVBV-wNFq`~);cb3KKdx(R6U%3UZs3s<@dh#T(TSD1zQ
zGD8)W0-QitN?<{z*s9dxaqdImDIDy--T(3M&EffB=S-|3LnA<NMhzt++X&1fGTfI#
z*$tyG4HdV=;<$O21w9PnUV>pmyasrwtAGz{IeywD>8LaSYaO6<M|Q?)j3Zwf*^Sl?
zLm~KqULsXs2vLawL~-k&-y%}dFb=81-0Uk1?A42|AP%zT14S@#FtOZMvRy2G#0r$l
zDvJ>ApzYr+RkCaJKzT&XE2^vW7?BiI@cdfg6SztcZhDT-RgQwRpJR)0;#2{fMpQD8
z04oiMbOKcZdNdi1zJ@sgU<_z!?oDMtvDPMnYD)^pAYCeKajs)_qM{JyO3r4$oZzq&
zokZdjf~G~Mu5xgkD{rS{!(8d`03o9n;qX{B<!hKkai+|zJswK{hcMNEDx8W#pXX{N
z*Sw)al;if>c|;ZcwGgkUsxcpnXH-E#xp!2#dt(nt*vV57+kzp1R)(@BVGM&9GBFj8
z!bHb5ZiBo8aRog^ZVX#b&>lZQqpZi(v^Er20?%Pw2G|N4e!!?6nxit+kxW7wzm#(}
z=m04SLt#~!$v|*MGRjmQqky3?SvME5o^@~{nZ;m0ZAQnzfl(-h>>3?qDce7*qJjAh
zkZBlK)FW98(;YMk6tJzflTuc=)wG@PvuK!D0me{K%*|ZY1kVjnT$|fRV@KiZ991Op
zlS?hx^6YS;*F1)XrC?wPNjJ$>!i+N*`mqt7xw$p4ZYbt9pEar^vgob0G`&)ytDVA(
zxol(h%@At$xbdcqEx8LR)R0NotrDwMoO0`mgHv8(;rGb*0+tLT@Qh2n$(>QpqiO71
zuHSzb*3&UC-l2><1UMcv&%bO9C)2Nc{FrC^R;e^}a33%NmBR^P3<uFl&Y+#1HS60w
zG`9X8t~~#v&3(Cl*ZF^2&tGgz`+qyn*O&g^BCcxw=j3d@`&W{V8ShW_5bIy<#<_s}
z7ZN_#yKz9o<$H>z+{lY7d^n(Se{S4q=e?-9ad9^20%L6IB~hc0Sn>~qQQZEN(mfBG
zNcd6t(k=0D11wD*!h%~vT+0!VvGZkY@dDHil$*9*(zm`2l<RE;@1U&o0oqb7a2gGv
zX$PBJUS)#e#)imJ>xuyWRnd@<JFPddhyYy6XyWCZ6ltQ=oxycksuq)RD2wr9f!1Xs
zQ{){=H<!SA39KIpum*!+aW$rk5Km8>0xqR>;H|o7<5)xU+V)ML-uqVIw!Hr5p5Whg
z`0x47_RRX9t>+s{{I`f}X8q4x_?HXEEn@$uTbyye{UV@SuK}8cgb%$6==?9o-tqoX
zrvwSnFj|O&Oe!z`nSz8s;_Up@;qga-nM3dV<d@FznRxbWYH^IAkT*NSWFM$d%aQ=w
zKz~yxREDulr6Pu^m&oq^Ew*9pI{pe2M*4v=(Lf6T{6n+_<=m1G_-RV41Hr&{Ot%e&
zf@lzna{u6*Y;lk;O0v=NflMx4;+~pl8x8b{h>jzCWE&#<3z<ltO$a^j;NqOpvLQ#q
z(Bc4%s|DVj9(v075+k}ed`;U0n+36$%Vq^YjPg1}<M&4KvU{;q8Lwq(5}ERAd3(RB
z6+<qr?L_j{0dz~iE)~M8ge8XUmzN+rFb{oo!-%;k+y*G`8h;nJ0ULIAAED4P_va!A
znB&icI6gV=?27Z(9a6-gm`{E_-9HjX@6OKgnK(Lmb@<bN(hHzwnxgZUgU;J?DqIlx
z>Esms(!;aElVc?DwsXoU!YIV_(kR}IG^#!NiBUJ5thIzCLS)?Tfbqoymxq9$(t<+F
zN)}7>W#G}=oEd}`D<RHR^)g<LSF!`h4Hfg^8X}cN);%g%O(Y1Slrm&)R{<8X#6V~Q
znMz7Ce~Jz5J)m`pKt>UfZ0KO{K*7Y0Lo>kPqEs6UjgdWNhtFuSN=6B{5ZHba2|FS1
z7K^6SVMwp9407)^O#KnAQAxu71+JzUg9->yN{B>JH*yx?E}9w}$)w4NpcneYfdY~|
zYan1Q_p)g2+O<dLNET?dvVQHhZsgL7TzZk;wHFD>-+GZN;)oUvWQkP_j8OWZag33D
zRbW6|*8ouXQ`)+EL1Jm0VPAOYl0r@O!ZWoC4RN)isDF^&=Y8+5>)yY8tcyR1cA!S>
zIE$ic^5y^&H$~If-mn7Au?EE+NmHQmBy$SN1l6z|Df^mT|G>M-aGV{8!i5lBMT-Z~
zM1<ga<asO7Lmn|zN0`1U`PtOF`7ipt{y%XRs*?Y?rwu^Y<$pG|pHKV$n;Xmhp9{HW
z^FMPo06E#ny?;<odESS&wivtGbusy$=?y;*o&C9DuMUDOyErI!7>kS#QZ=j)$7uct
z@YyxPC9mH4Ld3q@!xyU(84KF8vb1iq$!;<_gKk_bnIW!M%N@(6!$#;(MKZRI4!(<;
z%r%3XC{f}11UKfPF$>nf>^n;f8WcE;frH7~gFC+E3>OP=DZkZK&|Eh}f7BG0Z60ao
zuS#$&a6?ToOCOC-l3UPfYU_4;YQ7!}?_RZjXGNS<wx>`TWeDQSgcr(2irP2et@(C;
z%~<AZ9Cu6Eis>yVyng3ClNHPBn_uPpPohR3&qI%+8Q%^FUc3KeCjavS3M}#8BCeVI
z&uQm9h6=rd*ZapmcV_cIRUx@LACxb6!M9bCPEflWJRbxNy7SIZP4a0WH_j2&R_?F?
zgu_GCwxcDy0fhWgt_a(5ZwrWW4zfd=j+MN@2Jv!^$?m74k}h8BhNIXvj>BEeQ@M9+
z&5W`t%b1E?<ogHb7+w~0N{7xDZ|paC*c(R;JMa{sZM<sm*iYxYuu1Rt-w2oR!fhlu
zS&Ds?knL33>o$oFr0E2iwh!KZuRw_e#tU(KDU)i=3oc#@1k1?^AV6DNk%ipp>BVG@
zC*1R2_fL-xkAL39h+mlFI~(>Y<`N|lh%g}uM~qV|NDhN2Hs(wn3wW!2&ec3FN<=Cl
zeT0U%lc%c)h5>L0O9jczfr)5fzOV;^T+Kl<Knz3e$aj{6)<2r=Ft!j~q<6>fJExf5
z=)3}*qGZfcPLVt5hPszU<xv8f0id2F^V4voUJ7cS24t7ZRUdFaw*n*i(70dAsgJxj
zT?zOn8%~wS=-L>!_Iy>IX;xD$W)u8Wy^jaH&%&QdVqxmpGQqG+Fg%U~L%vL{IFrHQ
z1wNF4?Y}*h!S221d$jNVs4hH-=M&gMbknAu^X>R{GyY3~yYYEL{Q*uil#_X?pOqkp
zWt?ZR5e_8!BJ74BQFZZ$*<b%hzbf&6n8^2$0k|&yzrOWiX8*^|i>3d!h-)_f|L*vT
z3;;_9t4%iJQyBn$VNp2%uI)R?0ubWVlbh!O951iU1fUZu%~xguTy%X-CLnLf<H!XR
z1NBX^0bdoaw-Ua%fW4bij|TOZj9xWpZ*PA0Vf#c4^r#xPw`zPDxL;mRs`3AOf`8YZ
z|GKd;yZ-}TTjIY3Tr=_ix$rL+j$57qn_JUGoB}%w2y5TtIj|0JU3(I&W$yg5U}<MZ
zrA%qJ0#}p|P7%!i@wBu5ixYv_BVoHqDUmRH7VHB8ySto+n)m<KF@HN39^y>Yg<`_e
zq<S=vu&{%b0Pt-A;Wx&E^Ug-4Db7JSnzwUKM!nr}s5rje1T_RA+PfZd`cHPssXg!0
zYLKry*_5T|%V(S3tinZ}XZk&yW4gTVc2(lP_p|`$I{c5Dy{FdyY_2cYe=p>kjsJde
z0nn_JTzMYQyahlQZlf^UotFVQbaD4{VCVGVMnUAlW4PBK@)y<tafldq$UMMOpuBRs
zCgesdfbjjVUi^b=D{i{@rySs|EbjSUjx?I;>DL92A7mBDyy^95yoWhfv>I5x>q3&{
zfk(@gB#&t&Ns+H8)cEGi$w<x(Dz7gomY*y$8rNSf9J=29egyUo-~ZcO-&}tGZxNRr
zf0*gezmcLj@~!K*?)<Oy&FTEl_KVHs_x~1ht*or@DKF&l*gfxzJWv$Ee#T#0b1>Km
ziU_0{U<ffJb6i6@h<HP?{v1(s2y4dM0o1W-zum^CI^pX?A_rv&flsnSXp`DtGO>lb
z;@#<+;)4!_r$(j2BmtLVB;&NTR--$2|7L9M1AiJW`dDKgVUkP7_H+QU*mO{}{af4b
zGguB*`mTsV$y{TNSzP_j$Es{ywq%PcnGAFWn3zP4Ei$rZ=CdFMgN1mgHZEk35W8ZJ
zWfa=A=)6Y0P6x`=o1%{XPn{8S9coc^E^(&P{DXq*UO}t*g?vpVxb5IG&|yF1c>@U~
z3%_3y4W`WlX-a1TNt<_}I=Lb4CFh3;%N-fC>4hrms@U8PgkjiaxZ#FBN|K*IW>Zi+
zMs((Q&$_QUZ=JXTd&yOpZt!g7O^gmiQy*uUM<(aZ7dujsr9CQGTd4@t9uvv?jQ%e8
zUV#^Mt*}*RYvht@&f&N369GIL3BMddvjt|~yW$(njzM-AAARS5Kg&o6y4@rKlqZF<
z4O3J^Q#z6;e6Ej0wy%76+lo!YRuu|0g&yKDb^B*;kf4eYYQt<m=*!=lX_n?W?JdFc
zuk6<%x%|7>edm%Gf&Mg01b-2-EEG9gT=f;c?(ttiG2>xCKHrq%e4<JCnQpRBXOUw6
z33==Bw_EJ^y5mK@Fc?H*c0g`A7*6rb8<c{l#b-1*8I(66&b1D5fX;BcvKN$2)v>z$
zcvP5{4@%DWJ^oqC&6FJoZe?f_D5Xisj**N8#a^kZ{=kIHSQodE2A{HJo$a~Qi0PC3
z%Q`D7Q)P0oax;xp-YE3>gfeO4M6TOJlx<eoFv5stS%_k7JFzE@CgP_sF{wDs-^#_4
zj!yGjsL%6phC4~cFqkRKHGVb<9(x=#3O}n{cVSWE)^VUxmWj0n10nl(Vo*9O-d41E
zB|}9}av(ud9VMl!M<`{phAA5kRe~o^f>_{Bj|fpO${|dQ^3>^@6Z|gyocoqDmDYHz
z^0%^v^kvt)r_y*K2MBP@1<`@lT42z03rBcl%92W3+Cn@ckN1YnaDGo<wE!H9G&yRR
zC$i%UMe#SBbYmH(<#};1Nisv~kO)L!H<1=GROPZIUfG7>M3#|Jt)fb!cxi&&i|2xs
z2-b3m2svQX%|cwRfy#yIW=MeQ*ubFZBz;M;gXbMJ>D-q_PQmjm*BT`38056k51jM4
zS*IPKMZc!!7lu+s;Gm=8Z()oQSm7ZHfuU)V<rXlPZBsJpw2CFfu9-AZaUXi&fU5g?
zRwO5Q_TGw(KIKfOw%|(VM32G%By^p25K`(_#eOmw^77~nzRe;Lr>c)s`ST_GZQ}^Q
zffegWCVjUvWrKv=*9qxm3l^+uR=pmhHgO4BCE|u7>xzo`W29nZ^IvVV;Fs6M0}+8t
z@!DF8G=Z&uJ`h5I9fYx0BEBcq@uvJ#y}7ybd>J5lRG0PtNqgT{9oP8(o9i#8<3C%Q
z%l$u#xvKtuHLN|0%}7V(u_DG(DIr{Nn<*<6ux7VQgCw1hI?%yyBw)YcH<f2$G9_x5
z+CClgI*zR-qYth-W5x>wO_e~No?!RIA%Imj6*5{Y``yk22gOm(OgiQyO9}N?DnN&|
zCHAM<Zcmt&2oZaz_4O{15FYM5=VP-`YMRB}exkD>vV@&RyYvxZloa;sSmFb{WInpa
z6~*DLD3n(fqi1SfIlusOg9pVSzKy4q7aF`yG^fCxv2&j17IzP)i^_O{S=I8(&l-qJ
z0$(QVzwyBHiYQ`dmllaC_tnfHus8gapEsP5ABbP*{=Y^`oDvT8t+&!o(6W;ozS>9{
zYkB;E9DMf5;la4{1f(Z+$v_sJ27To_=O@_7e`rK^?Ss9){Iz{{zV`}Vh1Rv-?48AU
z=H>wag7%61<0Ool;!PT~+9&V!P64&Z1iEzyC^}5DRJ9L|_x=OO?L+@$AJL*6ZpvCF
z$>cW>;ol4;_4fEqlm5z|(_i!!H>>nc^9Q(v3pqZtds?@<GRetekYNme{J@3qFyd!o
zkKQdN$SxH5-0o$cvDlwyn?n?yQSpb?Tg&U)T-N?`+~c0=xW@jkZ$ICew*T8(%l#h<
zxxQroE3vX`O}_~T;KU_m!(YiNwF={23LCh(*xuW(_R=xMxa8m2XK)TxgmJ<DZ|~Z6
z+_r%z`l_!G(3dudY?8Wqi##l_8ze}u36f0<q$mnXq<7WYmMh8G><72s(8qFy)WvpE
z*FmJDyF5u?#21p{a43pAXXa-#!!O;v8mSm~eBL$|9;0C0fP@fz^L~h>ffRM4+0U-d
z$>5msdb1?Hs`))DB^;_ZQy;!IZ+~f{eMU^C+JZyYfj<5({;wVZcF%u<(8hlo1>s#9
z%oayuG8O5t><{#Rb{zkM!g9g#KSluBzka{+vP}uZKnz#=nd>;T{@b^I5Xa2U|6`!F
z|GF${2zXb0{@LJo{|`bMS^pmcXn*7TL)CHE|997aKJ)GV?<lakz55OHf8ZPpSjR#C
z4|ex|Sm@jP|0oE;l!qdao=k=B1>B!`X-cKXxCrNr2SGTG?G$l$f&OnqNdqkX5XZ0o
z{E*q_zfrJle=^7j_yqe$k#Frk26pXF2dLxF{x1!8^S}9VVD%rP;F(K_)8+qleuV#b
zec?QF6Z}~4TB;9O>@c-F^GN8?_>zzl;-C1l35jMwlDt*ljvW(`S(lV9{!ITWugbrL
zbAEAV7ZP6`cKv?{pF4Q|6YV|!$M*Su9Q+smPLzKWnnZ7{58pYU|Ie#=HHh+4<G}O3
z9|gPd-~Ax6_y19Ftp4-y>&LHqku`uqv6RaU)Re+QvIo*B_9*+v(`dixns1_rOKVdy
z3=LU>pkjfww8Axr2qa}A;=H-JnFzkR;%dMyr)`NJmT|g<VcLETWH?%E(X|E1gixc_
zHvm(lDXRUFmSX8fKim=)BGp89&Jyw+L6o(%p@%{~ZXkuK4lAqLbdUSUr7WHh^@pBN
z$|yy(&$6hc*EQ;qyS(jMofYMJVyH53-S|7!l3+}Q*vDOHnjd<K&b(?22z}!|dGY%4
z<mHQ3my_-8Y3*WYnAJ5eG01Cu15ulnn?;=iTc@v{(gM)y;s%l>HF{tjFD9>a*pvql
zmgRvfS6z1UJ^huDszJ7^tajhLd3t8TVnEn=q-kQPQPWNs7dmnun5*2DMlB(Ecq0+S
z(1$A>@P=m@5|n|<qecQot>KHx5SF@LxaOJ)3dGQDv5HQZ#YF`TLOyAQQS{&{G008-
z8r7TSNBS2e8a=&f-bBPHH&si}1>eG-8Hf4_G|yc3fM-M{u7p5+Zjogh7?@h!rHbu^
z3DKiLjS4Lt!JtRV&GXd;BZn7PS6b0)ipm@L431SGgsow1Smvs_h3?;;xa&J84r_oy
zaw8-{rS^1cF7A8nbW8rbHgd0RD~Xk03(XvWtpIj$PecgWrID}oEe4G7s=8P=>L3UB
zs~!wg)dDl~fb7d?l&Up{Yz2*EQqNjAyc>o%7~x%!gMtXJLJ!0Z#F|x?3}Z_tEYoao
z)bZ(j^%AooaJO3Ec*}~a#yBnN=O$@A$4m)oQdsi5RbKK=jSCbx?O{8$w1j5m43HJA
zO?+Rg8&qYqgm`Igg+Xxih!S!wS1`cc&DtdFs~V}_o*dEyk=iU%WdikSk^UyLy4vF4
zag*<iFOfJ0L4<mpqjMALsR5?HhdSa9bf`O5_Uqp_(o%KCwb70nrzmmcHIax~D){AA
zOy)H&)wBj7<+L^!<&urP_d_XzoV9Md52i3RBGL6!Ti|kv`5ep!$~cpPP7V50R<-U=
zPxq$@4yvbtE~!N$Pln;8rYhCbkYeMVERE82E<Ywa-D-5Fb5=fYR9Y~&^}AmGyJnOR
zcK^rqA5LULFh4Yo=YRN-<^PU@=J|i9y??;*`@bw=cK>%2boL)+?;r39_KzbQ|7jHT
z?LWlcKj6^*Ur6`#A0)8n|D)jS{Q2c6glzxj{Nn7@`Aa7kCob`*dO9uXl*UedyOt-o
zc6Lxbp+0qL`Ch$Jm$-rYCju&xOw5uwAt@t)oF#<JW{J$^Nx)+DJ9vgWai^!Mg!OG%
z6i|peb)1*aPoG{m&s^%9!f`?5;3h=BcGz(2H>~-9n0AT0bkxeZ`G~-8OfzZQOv*yn
zXYiLt^C<HL7YR)`nFpar7|SFP5%CiuMBCKemi`LG*Tf?bxfqvZI_3ggl;f!OxNP>n
zq|*2Oe<*)Dc>Wjp`~FX?|BnIff**1-(s@64*^cuQbpF6)g3aPs&}<gXgCypu3^<jH
zFd>Ma2A%Vxo4Ax%HqloFv;Xgbp@*vD@bll^{htQ*{BIP%(U0}W`-@%u&k%JS_W!;3
zuOzVg&rx9c|Mwf(|4Y8AbT+#o_5qLI{|Wri@_$ExUjGdn0^lJ3i|z40Nl5MdKL&hf
z!EYfDCLA~kDby1hA@-hdZ!np{vEPuVKcAHU!H<cZ|HnXMf025N;;NbqaR_{9{m<y`
z{$CJAHviu!*oyrG@qio7RF9=YC;sF;5T1Z?R8E{HE!V5AAW<3+Au_2WS^u%YXuLqG
zz|hNep6aA$Kn!xW3T3)b$;(9!#eV6oxR>bse-PJ49Pj@=p8s3_A0F2KEi5c7EG#T6
VEG#T6EG+Im{sF;`WR(E$0018sFo*yE
--- a/tests/test_fetch_command.py
+++ b/tests/test_fetch_command.py
@@ -175,7 +175,8 @@ class TestBasicRepoLayout(test_util.Test
             self.assertEqual(repo[r].hex(), repo2[r].hex())
 
     def test_path_quoting_stupid(self):
-        self.test_path_quoting(True)
+        repo = self.test_path_quoting(True)
+
 
     def test_identical_fixtures(self):
         '''ensure that the non_ascii_path_N fixtures are identical'''
@@ -186,6 +187,12 @@ class TestBasicRepoLayout(test_util.Test
         self.assertMultiLineEqual(open(fixturepaths[0]).read(),
                                   open(fixturepaths[1]).read())
 
+    def test_invalid_message(self):
+        repo = self._load_fixture_and_fetch('invalid_utf8.tar.gz')
+        self.assertEqual(repo[0].description().decode('utf8'),
+                         u'bl\xe5b\xe6rgr\xf8d')
+
+
 class TestStupidPull(test_util.TestBase):
     def test_stupid(self):
         repo = self._load_fixture_and_fetch('two_heads.svndump', stupid=True)
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -7,6 +7,7 @@ import shutil
 import stat
 import subprocess
 import sys
+import tarfile
 import tempfile
 import unittest
 import urllib
@@ -301,6 +302,23 @@ class TestBase(unittest.TestCase):
         proc.communicate()
         return path
 
+    def load_repo_tarball(self, fixture_name):
+        '''Extracts a tarball of an svn repo and returns the svn repo path.'''
+        path = self._makerepopath()
+        assert not os.path.exists(path)
+        os.mkdir(path)
+        tarball = tarfile.open(os.path.join(FIXTURES, fixture_name))
+        # This is probably somewhat fragile, but I'm not sure how to
+        # do better in particular, I think it assumes that the tar
+        # entries are in the right order and that directories appear
+        # before their contents.  This is a valid assummption for sane
+        # tarballs, from what I can tell.  In particular, for a simple
+        # tarball of a svn repo with paths relative to the repo root,
+        # it seems to work
+        for entry in tarball:
+            tarball.extract(entry, path)
+        return path
+
     def fetch(self, repo_path, subdir=None, stupid=False, layout='auto', startrev=0,
               externals=None, noupdate=True, dest=None, rev=None):
         if layout == 'single':
@@ -333,7 +351,12 @@ class TestBase(unittest.TestCase):
         return hg.repository(testui(), self.wc_path)
 
     def load_and_fetch(self, fixture_name, *args, **opts):
-        repo_path = self.load_svndump(fixture_name)
+        if fixture_name.endswith('.svndump'):
+            repo_path = self.load_svndump(fixture_name)
+        elif fixture_name.endswith('tar.gz'):
+            repo_path = self.load_repo_tarball(fixture_name)
+        else:
+            assert False, 'Unknown fixture type'
 
         return self.fetch(repo_path, *args, **opts), repo_path