diff --git a/USAGE.md b/USAGE.md index ac27bde5..0e1320d7 100644 --- a/USAGE.md +++ b/USAGE.md @@ -310,11 +310,9 @@ To convert a Docx file to PDF using the DMS you can send a `POST` request with t >>> resp = requests.post( ... 'http://localhost:8000/api/v1/convert', ... data={ -... 'target_format': 'pdf' +... 'file': file_to_convert, + 'target_format': 'pdf' ... }, -... files=( -... ("file", open('my-example-file.docx', 'rb')), -... ), ... ) ``` diff --git a/document_merge_service/api/data/odt-template.odt b/document_merge_service/api/data/odt-template.odt new file mode 100644 index 00000000..ade74388 Binary files /dev/null and b/document_merge_service/api/data/odt-template.odt differ diff --git a/document_merge_service/api/file_converter.py b/document_merge_service/api/file_converter.py new file mode 100644 index 00000000..88f8f962 --- /dev/null +++ b/document_merge_service/api/file_converter.py @@ -0,0 +1,27 @@ +from pathlib import Path +from tempfile import NamedTemporaryFile + +from django.conf import settings +from django.http import HttpResponse + +from .unoconv import Unoconv + + +class FileConverter: + def convert(file_contents, target_format): + dir = Path(settings.DATABASE_DIR, "tmp") + dir.mkdir(parents=True, exist_ok=True) + + with NamedTemporaryFile("wb", dir=dir) as tmp: + tmp.write(file_contents) + unoconv = Unoconv( + pythonpath=settings.UNOCONV_PYTHON, + unoconvpath=settings.UNOCONV_PATH, + ) + result = unoconv.process(tmp.name, target_format) + + status = 200 if result.returncode == 0 else 500 + + return HttpResponse( + content=result.stdout, status=status, content_type=result.content_type + ) diff --git a/document_merge_service/api/serializers.py b/document_merge_service/api/serializers.py index 7f94af69..4952ab36 100644 --- a/document_merge_service/api/serializers.py +++ b/document_merge_service/api/serializers.py @@ -144,6 +144,6 @@ class ConvertSerializer(serializers.Serializer): target_format = serializers.ChoiceField( allow_null=False, required=True, - choices=("pdf"), + choices=[("pdf", "PDF")], help_text="The target format of the conversion. Currently only 'pdf' is supported.", ) diff --git a/document_merge_service/api/tests/test_convert.py b/document_merge_service/api/tests/test_convert.py index c73ab02b..46a7f8b1 100644 --- a/document_merge_service/api/tests/test_convert.py +++ b/document_merge_service/api/tests/test_convert.py @@ -5,19 +5,28 @@ from document_merge_service.api.data import django_file -# @pytest.mark.parametrize("template__description", ["test description"]) @pytest.mark.parametrize( "target_format,response_content_type", [ ("pdf", "application/pdf"), ], ) -def test_convert(db, client, template, target_format, response_content_type): +def test_convert(db, client, target_format, response_content_type): url = reverse("convert") - file_to_convert = django_file("docx-template-syntax.docx") + file_to_convert = django_file("docx-template.docx") data = {"file": file_to_convert.file, "target_format": target_format} response = client.post(url, data=data, format="multipart") assert response.status_code == status.HTTP_200_OK assert response.headers.get("Content-Type") == response_content_type + + +def test_incorrect_file_type(db, client): + url = reverse("convert") + file_to_convert = django_file("odt-template.odt") + + data = {"file": file_to_convert.file, "target_format": "pdf"} + response = client.post(url, data=data, format="multipart") + + assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/document_merge_service/api/views.py b/document_merge_service/api/views.py index 20e3a928..bbd537be 100644 --- a/document_merge_service/api/views.py +++ b/document_merge_service/api/views.py @@ -1,9 +1,6 @@ import mimetypes -from pathlib import Path -from tempfile import NamedTemporaryFile import jinja2 -from django.conf import settings from django.http import HttpResponse from django.utils.encoding import smart_str from generic_permissions.permissions import PermissionViewMixin @@ -14,7 +11,7 @@ from rest_framework.views import APIView from . import engines, filters, models, serializers -from .unoconv import Unoconv +from .file_converter import FileConverter class TemplateView(VisibilityViewMixin, PermissionViewMixin, viewsets.ModelViewSet): @@ -37,7 +34,6 @@ def merge(self, request, pk=None): response = HttpResponse( content_type=content_type or "application/force-download" ) - extension = mimetypes.guess_extension(content_type) serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) @@ -59,24 +55,9 @@ def merge(self, request, pk=None): convert = serializer.data.get("convert") if convert: - dir = Path(settings.DATABASE_DIR, "tmp") - dir.mkdir(parents=True, exist_ok=True) - with NamedTemporaryFile("wb", dir=dir) as tmp: - tmp.write(response.content) - unoconv = Unoconv( - pythonpath=settings.UNOCONV_PYTHON, - unoconvpath=settings.UNOCONV_PATH, - ) - result = unoconv.process(tmp.name, convert) - extension = convert - status = 500 - if result.returncode == 0: - status = 200 - response = HttpResponse( - content=result.stdout, status=status, content_type=result.content_type - ) + response = FileConverter.convert(response.content, convert) - filename = f"{template.slug}.{extension}" + filename = f"{template.slug}.{convert}" response["Content-Disposition"] = f'attachment; filename="{filename}"' return response @@ -104,34 +85,23 @@ def retrieve(self, request, **kwargs): class ConvertView(APIView): def post(self, request, **kwargs): serializer = serializers.ConvertSerializer(data=request.data) - serializer.is_valid() - template = models.Template( - engine="docx-template", template=serializer.data["file"] - ) + serializer.is_valid(raise_exception=True) - content_type, _ = mimetypes.guess_type(template.template.name) - response = HttpResponse( - content_type=content_type or "application/force-download" - ) + file = serializer.data["file"] target_format = serializer.data["target_format"] - dir = Path(settings.DATABASE_DIR, "tmp") - dir.mkdir(parents=True, exist_ok=True) - with NamedTemporaryFile("wb", dir=dir) as tmp: - tmp.write(template.template.file.read()) - unoconv = Unoconv( - pythonpath=settings.UNOCONV_PYTHON, - unoconvpath=settings.UNOCONV_PATH, + content_type, foo = mimetypes.guess_type(file.name) + + if ( + content_type + != "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ): + raise exceptions.ValidationError( + "Incorrect file format. Only docx files are supported for conversion." ) - result = unoconv.process(tmp.name, target_format) - status = 500 - if result.returncode == 0: - status = 200 - response = HttpResponse( - content=result.stdout, status=status, content_type=result.content_type - ) + response = FileConverter.convert(file.read(), target_format) - filename = f"{template.template.name.split('.')[0]}.{target_format}" + filename = f"{file.name.split('.')[0]}.{target_format}" response["Content-Disposition"] = f'attachment; filename="{filename}"' return response