From 11842cf150bc69b76ca0c2f97bedde6b8e5ed181 Mon Sep 17 00:00:00 2001 From: Stephan Hug Date: Wed, 12 Jul 2023 09:12:19 +0200 Subject: [PATCH] chore: refactor view to extract shared logic into FileConverter utility class --- USAGE.md | 6 +- .../api/data/odt-template.odt | Bin 0 -> 10589 bytes document_merge_service/api/file_converter.py | 27 ++++++++ document_merge_service/api/serializers.py | 2 +- .../api/tests/test_convert.py | 15 ++++- document_merge_service/api/views.py | 60 +++++------------- 6 files changed, 57 insertions(+), 53 deletions(-) create mode 100644 document_merge_service/api/data/odt-template.odt create mode 100644 document_merge_service/api/file_converter.py diff --git a/USAGE.md b/USAGE.md index ac27bde5..0e1320d7 100644 --- a/USAGE.md +++ b/USAGE.md @@ -310,11 +310,9 @@ To convert a Docx file to PDF using the DMS you can send a `POST` request with t >>> resp = requests.post( ... 'http://localhost:8000/api/v1/convert', ... data={ -... 'target_format': 'pdf' +... 'file': file_to_convert, + 'target_format': 'pdf' ... }, -... files=( -... ("file", open('my-example-file.docx', 'rb')), -... ), ... ) ``` diff --git a/document_merge_service/api/data/odt-template.odt b/document_merge_service/api/data/odt-template.odt new file mode 100644 index 0000000000000000000000000000000000000000..ade743880ced82602ea3ea0f18f3df6b0c1a77a1 GIT binary patch literal 10589 zcmb_?1yq!47cSD>B?u$ZC=F85pc2yEH6S$%9nwgrbazWicS}llN;gP1+~J(xqv!m3 z?tj<1d)BPAznSlO-o5u5U%Zcu1Pm-L6ci#96g1b4lGbMyRB9+FsQWMECn$4cbD)hQ z7^n*dn;Glt+8A4a80|m?3>Lc9#?}lLU?9lALf_UL2(n?Y0Xo>o{EZBP|H~dhJYfrv zp|O#z)gLt0ER4GP`am-vMxb6v3YzkrE;0k*NQFazs? zfM)-K`&GDLD+?nlptZH0?!Sfms}`+obZu<^FF3;AMEdWo{R(AhW}#~X{C|xRYyq|f zLpsv-U#jw}*6txcVnjql{9_zJlKIO}A)dJ|$k-5QZNp$?V5k`?2=ZdW6g^99wHiN# zt)-<2L)&`b1Y-hk!rrGsDjz;GQIXD2dwnT=x2aupwwv9^9ALC%Yd)Yx#+A%l*f2k$ zr*&eR%V4Qhdv@>!7@ygjH<3c?jsBGr{aw9E$kS2tnbsPQZ00zo!&J68v~DqsC^?|M zh~`t1{W)rXmGmEz2WX?q;9zbR9MKD! zxli~8S@27Ay4E+ZFuc^j^gvu%OoWa8)NcR2XezjXfY~NSQQn{D*LOImh!X|8UY|Blqv8AWiR2#kuQI$) zRmTacD+;WAHk{pR{K%-fHBl{K9?mAEZD=ULUe^?Ns2mGT_81kdG-!ip6j!k5*?~El zQU6+QE&DKnrH`PHg$Wp&(m{_=E_dOKTcUD?{&Wm!Ol?4Cp@V6k?PZAXk>u6LT?o@& zKL+z{qpAab^oZS+YHzD_AF9kY;z}g~kcgTXcb(?TshH$pP^PNKKG;i{Mi(xsYoeSs zB@67>f@01UJ5*(W-0zo}Z(8poc8>ZfGM!-d{N>l#j8z1>jsNzH`ldb+tOgXXWweGULpByDotr_G_IBOl&KHO zjdt=G=Z$k1u$cNN`HbfP(?)!x)%Z+yR^A%6^nEiapgDuDjVKu$jjRcCYkt-a^x&^& zbKck1yoI|T6wvmGZxPWoo42n$f$f-L zNNrAP!Hy>wYQp-Az8q#H-q0yr;cHVFsDF;1-hk~_4c1%>8| ztd?>JKZzo`fDfT1S546i^aNjmsA3K`%|(Yk;apL|Z06V2xh;gXqJY6=xe1|2eJPAg z@$oR@BGd4WR8y(~U1+NdmED^;_MB&RECd>S+RE9VDJo2CpC`D)yWGAvk>hQwuP><8 zj;VUGv>mKucWSt|7Lk!LVsd(O0y;CFAm}Z$VT^WjU5_7#O2`64uPtb-RvNgW*LX}` zu03uti_%oyh%#k-WLT{A(adSDY2Z5Z3I@*1PYiews7i4hD8Q9B`$$u9LUf0r5>0GE&nDw*MA^uZXY0O)basOF)ymSwu^#`X7j6F~rfiU)e$NW*aexn2IuzAYj+a>_ zDIw7XPZNzdbYN#PL*oW{0{Meft-64))_&tj6i&GOBGWgI6PN zVysMelH6)u%xLdaN^)`WaL+5}uaQ4kT@S!fcQWM3yV<3+vU9353RwBH{TB7J`q92K z|6&v7GMT>`S|u%=JYwjynMa;+>TCU)_>r391{i(XhLCd&HM!!etZpGMlioV*oy~-3 zQg`=3fl&pUmbQ;~P_Hlhz{ihy!pNpDhA`K%C3armzxHF%UDU$>5;HWu>Ne+JZ?3z5 ze{wO`>$xM8Ur@G^GK#?KsW&2@i=!rn0kS*1(&+`Lta=QuV05WGSghA6;1wz)79pMhm#MqAAt~l@j`5^(|o3CQIdo zob8IK`Li4AIKBLR0}+byhJG3%sQG7@FT@^i9Smmnyowve?FuT5y>V~n#c3Shh)P|N z4KZ`&p_e#5OkE=-Ja9uFvMn)38~Kbe1n1dacjy(c4h~YXii1%RxIv?ic!QiVf zStQU^bBOxwVd3M0S`5#}t8hM(>HX4|8&xbuH_;?0Z{La?Vk;VN(f8D|<0Iu}w>m&k zowRg>inXJvpb0nob9n?P1UD1j$nN^C{Y$ym*eWSI9&aOQbpuxKM{2QMWn)+rTL zR{ab9LhJ5Py%^ZpuSd6!B|8*6042fVkl zKZ9ti7U9|#K*wuk)uvB~W}Z|UJWnJ{Pm-9uAl!!cl%{|iYS#Jm=!0YG1*P=lA}m%B z?|a!x2Hj*0Z|H8%TMiF$D$r3IMP=^M^APyerdqATzPb--fcR4Iv6TP9lasN6y$t~3 z7xa<2bkmbB==}U|odP98qh`ugm7u+1kudOH!B8%qtG5hAt?*cBT*Px<`V+|~YzvEa zL?pQ*o(dA-ZLPyp(Z^OC;$jzDo6Loq7x9+pR>qk~8#+t}_8C2%pS~Oykt@8d)eWmD zqI6+VbESB?pphbBZ5Ya{IrIgX}(93&K+Kqzj=aBMkm8WCBmA&K_d`QZ&XkSRWnmA7NUOP3c zc6fm;(>oj1{JH1Elm3BJH35LpsZC@`$VX{Sfho-xub6jI!7=4P_s%0Eg+#}&TP0PS z>T<5+q$fp;@iS%MJ4FW`Y5me+t4~cZOqECHaU|0FJw5`v4tlH2WN(Se%lXR=nf zB^~%Buv)bUkvqefCP^vprsVNF%iWhHX5OAZlv_WE=JwD=4}-6SSSV1I}Kf z++ST6L_efbAV5J)F@L|het%ve_Hh$_4hIEw|GmFERd{P_t_RXJHnV25`R+1+K}P;E zQm@dEiI5?upozT}k%NMIU;=sKfkK3V{F3M~4u*CVN z=HB-D^6tjU&d%=Y&hh5X!OrpZx1G(4{ne9$ovY)W+l$@PljEb)i-U{X>x+}y+gnIU z@9ypj*^SwupkQ*uLS@emC~ zlYZz{Aip&J$BvqnF#doM=)oWqgESsHr+9>VHNl~?O+<`Y^W#YW!+nY^`NA?hI?vKj-p(Akfmh(hg z(psldd@l>GU14ZN3#I^jV}B+2hchblpS1sU?h7Oo{EJHcYsvmslK+(Z*LD9n07y^# zHj@9D%1T}|`QndjKIu1&4r4_8z( zPA9!U^{>#l6Ll1D@%T3?gV^TVPXeJu3ny+)I5I`zoLDOur}juW;N)UiS5N`{sDPf3 z^IP9KeU_RIwooz1-$sF(usLM@OEzw-H@g#n!m(rD{kzlT9Xm@u4lJZVa;dneGHcHx zBpa!tH?$P*XC*O$fC(3g2AuTjM(_95`}InbV^VSyQ#4< zd7j5iv*{~;K3&H(de4$a;$2~-G$@iQy7-7X_9}T`H%>G35hY~%P7if`s9EiWq=D|b$ucnRe4YRUD3 zXIK3z$YCTLcuyOYc4AUK6BWr7V=;l)_;hA-vFP9OxMF#Kn&zPoQI~u*^Ago|S%<@Q zX>cG92k*%N_NI;YL^=trTnX&SMm_!b=NnC! zMq+Nqs#_vEN_+|)R%dq=*(TPcEM^`IIIW_S_8Bzp-gP3YvstbIGs=TKbrF56%A;*k=OD5aQH3|~k1lw6Uf+gB zYfU+fafpY|xX?}Lbj{H$R4-X`p2aae35#_;KXaqf=96@7xq2dKGuWMmPM+v;O?QV{ zdbjF48=l$OTukrO!9K)P!;BqYXzZj{*f=P87K^XqUDJ-I6n^cFgFh zm!~gnpa;xR-bzq-F=3aWkdFOz{|Pj}aRg}rG4(N(7`-a@wkBd)J$V8SD)2*)KT$Y# zpj%Px3Igtj2)OyRSXFzDk&eDfWxK@Fo&ilRm}HX&11(C)X_$x^wvX9vBffpoG?|%a z-&NNd2e>5g*m!4!qQfZr2pKcQD(Y7G{M?7*VR zq7(lF6Au3#pdNtrO-QAH(?G7(nA(2t^^=WcnLu;mMZ6rWHFX63liacoQsMv)N&(mp z&(;q(cI*oKZ}$4KyO^894mQ3-R6ym@$xo}s{#EYZbB@p(!cVsT*;__!d zeB#HDle?gV1Mu0Cd`*1OVh@v$&7}HqQYv&v&B<@Z&~!*TB_}4zJTi#cHWS@@b1c@! zmNT&oMydl_4JqJ>3e;j?E|!>ckm=bgA6KPJvZp-{9D_YnTkCZNPin~Yrvvn8uxU9y z#X6dHHW`3eU04K;!YeJ9Z!h&U&;&Kwh5{r4LTRMvZ#hNab4Fx)0ad^plzB+I@~(q2PU0y+t4E2O0~H4<)t1mqjSy!2pLUF4nll3 z$sh5OvfMqbpk5RX={qIBw#it$bZ^JWq7EW#_Dz2mZK4_iP|WX$iCX25TI!aVkMxTT z*M3c3#L~hiu2^|M3bry7P;QfbzdZa#{2g~wYV*TRhDg=1LldnG^8FmPR(oke`%=Pv zlUZKb_w{{Ib&Oui9sPR?ujzvQZ^BlMu|yojDdBL=sm`MTS=(ujmgdKIH)jJKrL>?k z$L9ISuO!=MQsuKcYK7900MX<2B@ghjOrP|TlJKNcKq0N<(+Z}f{01wD@VC_rg=!Bq zPQ#aO8)L2k8=6o3ZZZ6Y)}3&<7Z{L}5Jb|B>SZ^Iw5>4V&oElQ$_jE0UL6Q-XkiGQ zA$iK6#a`g05A<@`Z^CHH9S8?%RSt|%nJ2B2fzRe&zaq6<>6@9FVLf>D!j};CTnkmn z#q8md!Zm*3j^%F6Do(^`OXP^b*z)jN8Ms6|@lgHT<8dK7HGRdUZZ*}zTU81qLp7eP zX+`o3z(g)T{agM#$4>x$RNnW-w_9oYX&EPl$wh4bg`Yo?HZ%pz9Uk4iD(~pVb5-BA zdAR2yw0g44+FZ|YZc`njdX5_AiNZ&sri0Iyj1Cwub9N@-Yn(cV<#vWHJ5b&FrWzkS zAnrTRW9B^0?L2O{tj|~uYpKgA04PrG5tJIbViNHoVY*3oc=>4n$d7&wD^pV zwXwQHwt3rBd12N_<)ZWSIJU_Q_@%iiZG|~!j$L`;G;^XnkDAmjVSgUUjhAs7S4GC^ zXvkvX^pKTFCs=CtEmwv9f1OC5V~4sv8WJtf<6~{2+?%Hffqh&ZErP(ttV|{#ur0Se zT?niqahg3*z7E1q)SssbfuXIB7Dda~{TFP+%47fnGv=x=;>v_sVF7D^ps+uXhVw_A++6*Bav4^r8n#-Qt>v)BD120y8>_ zpNs4msu5&TMDG{u)YnMkeRzRS@G+GQn=z#Quo0Cn>hQ+y8o9kuSDkKbGTri3`1?e# zf%(AdRoTO7I@_EHjxh8$?K}yZ*s2tic8RYn-sL z`@hF6+{^u@S|5wVKf$A7R*G3wDppdCihS^~3Q*-=bgn|H!IRJkZkBIFUn(5iwg03Z z7&qfQ^zNj{Vd#aG3w%jE!f*)w=7bPo5!vnu`7ST@a1rFNRfJ-A06rW%oV$2)_C<%12;Iqn1Kb80AarjAl8!gb zuU;O9eXB1c*f*pDIhM|JKrZ8|fY?(M5qVN4+~t#wlVLul#VI+hyRq!hkcCdbNAyrl za?yh1tjDlmkT=C#$5nbQWhK7KSO+_g5aHcy6E+JmCUBfy9J$XTmX;XUsqx^k@VnWK zh@Lu)XM9>BtwIIu*LL5G==2j>ZrJFWn@f8rNaUBBA*OUIFhmOb3A&1TCVbFZ?f zq`9um$^2SZUX27+rVu;xr95iMS3nA(DW;Z_t}$8hAug)g|0wSvpRc-^)^pjOxK3cN zrUWdT9nSpfcK;F!fkP{s3=IluH~RZatY7ULqCg(3^esR(_dDwMJ4DLr7BkOrT^EWm z*-|Z#m#blj*zEu;u-nm@0=v*ISz_W;;va~_EIM ztJ5S)M;q~Z$`wehJYm^RAsgB2jPIt#M`5vKf26&q5N+8im;YLk$rvX+)B6r_ayYX> zftEdRVmz8wDBs`{UxOU?D~^eYs$AC)VsvIh6ZLDmx`t^fUb_n{E>*p${&e?#_N*mJ zwztXYk7Ug(KP{SFndcn2MAwwlxm zMPg_|J*$c!y?j^jAqh(?8(!SOwNV@J#r}qE9L#Fn;1f4|fX15D0BZSoAbR|jL0zdM zU~{Na_bBmH@Hrp&>s+}Pa+1S_O;QyKhkO*QL`kOhv_wesDO;k!@~hq{ui7@JXr`K% z=s4emZr+c)>Et>MOdOoC+SLo%q`l1y9Jl6`pu3dd4UAA8ja zgzL6q_xLd#)j2(yUe0y3OYUDn_%aDC!34b@;7(gTc_yL>Cl$LQDTJjPoX}8OupuZ8 zLQ>H2Vn2Hm)c@{Vmu{_#@=KVI`qnHKi%{^%qUu69UCDKo229ix&BCK^ci=r$)m2uk zxw{MH^6F5&PxaU;7~EVUgo`_?inmb9C6feCDYd>8Q(t4q+m*0hI5K`-g+@cuw+?xV zGNdAfU7V97cQ<40#AR&G9#V^<_y}>2j8@l?#hi;(He1dEsI~a08dr-9ETD6sj@Ea@ z?^)z7remtBLVH)>mH7an<>m;fL-Q4^7x860AVO{E$|%KKO2#)a@$)>QQo!B}0>R{U zzcwk_Rh?&X>T};5-w*uhvyssIIEt#(7NsK1>PU@TI&)5zNdsp>3ZKVEww&u0^xLaiM-4^8bh^L5qNeu>ZH-ikNM}zco%3W$aT5BjzZ>gx*5pj@EQEop?ujifV)=(Iyep~0mZ@}%u zPiR!*4(B$xZxUMlf-RN=cVolW#~oSfwXnzirR`s^T|QXUI0+Y3Hi61(*&?}RVlE{j z@}Su_cji8K5TC=K)6YoV?BcDo7m>_S!xVN+a@nGPvEV!pOH}!dc~9#4YU@h-zTS}% zz9{sN{dmeh_TwKw<3jyj;{+-CPxf8$BqQ;6)K8~C-K&Fs*c*uZ@6x80VOu!(KyJ{#rL6QmgNS_&f4vnaaIF>4)(`7AJ_*?+?AwKT$t#zx};EImou#&w`fk z$lqHfv@4||0D^K0mT;(lIq@9(RAm@Q=9>HoOP`X}(` h)#H9m{$W<+zgeMWB;erhPs4(IfRIIJiQ@j~{{ZXCTY~@q literal 0 HcmV?d00001 diff --git a/document_merge_service/api/file_converter.py b/document_merge_service/api/file_converter.py new file mode 100644 index 00000000..88f8f962 --- /dev/null +++ b/document_merge_service/api/file_converter.py @@ -0,0 +1,27 @@ +from pathlib import Path +from tempfile import NamedTemporaryFile + +from django.conf import settings +from django.http import HttpResponse + +from .unoconv import Unoconv + + +class FileConverter: + def convert(file_contents, target_format): + dir = Path(settings.DATABASE_DIR, "tmp") + dir.mkdir(parents=True, exist_ok=True) + + with NamedTemporaryFile("wb", dir=dir) as tmp: + tmp.write(file_contents) + unoconv = Unoconv( + pythonpath=settings.UNOCONV_PYTHON, + unoconvpath=settings.UNOCONV_PATH, + ) + result = unoconv.process(tmp.name, target_format) + + status = 200 if result.returncode == 0 else 500 + + return HttpResponse( + content=result.stdout, status=status, content_type=result.content_type + ) diff --git a/document_merge_service/api/serializers.py b/document_merge_service/api/serializers.py index 7f94af69..4952ab36 100644 --- a/document_merge_service/api/serializers.py +++ b/document_merge_service/api/serializers.py @@ -144,6 +144,6 @@ class ConvertSerializer(serializers.Serializer): target_format = serializers.ChoiceField( allow_null=False, required=True, - choices=("pdf"), + choices=[("pdf", "PDF")], help_text="The target format of the conversion. Currently only 'pdf' is supported.", ) diff --git a/document_merge_service/api/tests/test_convert.py b/document_merge_service/api/tests/test_convert.py index c73ab02b..46a7f8b1 100644 --- a/document_merge_service/api/tests/test_convert.py +++ b/document_merge_service/api/tests/test_convert.py @@ -5,19 +5,28 @@ from document_merge_service.api.data import django_file -# @pytest.mark.parametrize("template__description", ["test description"]) @pytest.mark.parametrize( "target_format,response_content_type", [ ("pdf", "application/pdf"), ], ) -def test_convert(db, client, template, target_format, response_content_type): +def test_convert(db, client, target_format, response_content_type): url = reverse("convert") - file_to_convert = django_file("docx-template-syntax.docx") + file_to_convert = django_file("docx-template.docx") data = {"file": file_to_convert.file, "target_format": target_format} response = client.post(url, data=data, format="multipart") assert response.status_code == status.HTTP_200_OK assert response.headers.get("Content-Type") == response_content_type + + +def test_incorrect_file_type(db, client): + url = reverse("convert") + file_to_convert = django_file("odt-template.odt") + + data = {"file": file_to_convert.file, "target_format": "pdf"} + response = client.post(url, data=data, format="multipart") + + assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/document_merge_service/api/views.py b/document_merge_service/api/views.py index 20e3a928..bbd537be 100644 --- a/document_merge_service/api/views.py +++ b/document_merge_service/api/views.py @@ -1,9 +1,6 @@ import mimetypes -from pathlib import Path -from tempfile import NamedTemporaryFile import jinja2 -from django.conf import settings from django.http import HttpResponse from django.utils.encoding import smart_str from generic_permissions.permissions import PermissionViewMixin @@ -14,7 +11,7 @@ from rest_framework.views import APIView from . import engines, filters, models, serializers -from .unoconv import Unoconv +from .file_converter import FileConverter class TemplateView(VisibilityViewMixin, PermissionViewMixin, viewsets.ModelViewSet): @@ -37,7 +34,6 @@ def merge(self, request, pk=None): response = HttpResponse( content_type=content_type or "application/force-download" ) - extension = mimetypes.guess_extension(content_type) serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) @@ -59,24 +55,9 @@ def merge(self, request, pk=None): convert = serializer.data.get("convert") if convert: - dir = Path(settings.DATABASE_DIR, "tmp") - dir.mkdir(parents=True, exist_ok=True) - with NamedTemporaryFile("wb", dir=dir) as tmp: - tmp.write(response.content) - unoconv = Unoconv( - pythonpath=settings.UNOCONV_PYTHON, - unoconvpath=settings.UNOCONV_PATH, - ) - result = unoconv.process(tmp.name, convert) - extension = convert - status = 500 - if result.returncode == 0: - status = 200 - response = HttpResponse( - content=result.stdout, status=status, content_type=result.content_type - ) + response = FileConverter.convert(response.content, convert) - filename = f"{template.slug}.{extension}" + filename = f"{template.slug}.{convert}" response["Content-Disposition"] = f'attachment; filename="{filename}"' return response @@ -104,34 +85,23 @@ def retrieve(self, request, **kwargs): class ConvertView(APIView): def post(self, request, **kwargs): serializer = serializers.ConvertSerializer(data=request.data) - serializer.is_valid() - template = models.Template( - engine="docx-template", template=serializer.data["file"] - ) + serializer.is_valid(raise_exception=True) - content_type, _ = mimetypes.guess_type(template.template.name) - response = HttpResponse( - content_type=content_type or "application/force-download" - ) + file = serializer.data["file"] target_format = serializer.data["target_format"] - dir = Path(settings.DATABASE_DIR, "tmp") - dir.mkdir(parents=True, exist_ok=True) - with NamedTemporaryFile("wb", dir=dir) as tmp: - tmp.write(template.template.file.read()) - unoconv = Unoconv( - pythonpath=settings.UNOCONV_PYTHON, - unoconvpath=settings.UNOCONV_PATH, + content_type, foo = mimetypes.guess_type(file.name) + + if ( + content_type + != "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ): + raise exceptions.ValidationError( + "Incorrect file format. Only docx files are supported for conversion." ) - result = unoconv.process(tmp.name, target_format) - status = 500 - if result.returncode == 0: - status = 200 - response = HttpResponse( - content=result.stdout, status=status, content_type=result.content_type - ) + response = FileConverter.convert(file.read(), target_format) - filename = f"{template.template.name.split('.')[0]}.{target_format}" + filename = f"{file.name.split('.')[0]}.{target_format}" response["Content-Disposition"] = f'attachment; filename="{filename}"' return response