adfinis · StephanH90 · Aug 3, 2023 · Jul 11, 2023 · Jul 12, 2023 · Aug 2, 2023
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/adfinis/document-merge-service)
 [![License: GPL-3.0-or-later](https://img.shields.io/github/license/adfinis/document-merge-service)](https://spdx.org/licenses/GPL-3.0-or-later.html)
 
-A document template merge service providing an API to manage templates and merge them with given data.
+A document template merge service providing an API to manage templates and merge them with given data. It can also be used to convert Docx files to PDF.
 
 ## Installation
 
@@ -54,6 +54,14 @@ After uploading successfully, you can merge a template with the following call:
 curl -H "Content-Type: application/json" --data '{"data": {"test": "Test Input"}}' http://localhost:8000/api/v1/template/test-template/merge/ > output.docx
 ```
 
+### Converting a template
+To convert a standalone Docx file the following call can be used:
+
+```bash
+curl -X POST --form [email protected] --form target_format="pdf" http://localhost:8000/api/v1/convert > example.pdf
+```
+
+
 ## Further reading
 
 - [Configuration](CONFIGURATION.md) - Further configuration and how to do a production setup

diff --git a/USAGE.md b/USAGE.md
@@ -296,6 +296,24 @@ example above, `data` would look like this:
 ...     },
 ```
 
+## Converting Docx files
+
+The document merge service can also be used to convert a single Docx file to PDF.
+
+If you want to simulatanousely merge a template with data and convert it to PDF use the merge function as explained in [merging templates](#merging-templates)
+
+To convert a Docx file to PDF using the DMS you can send a `POST` request with the file and the `target_format`. Currently `pdf` is the only possible `target_format`.
+
+```python
+>>> resp = requests.post(
+...     'http://localhost:8000/api/v1/convert',
+...     data={
+...         'file': file_to_convert,
+            'target_format': 'pdf'
+...     },
+... )
+```
+
 ## Maintenance / Cleanup
 
 The DMS allows REST verbs like `PATCH` and `DELETE` for updating and deleting

diff --git a/document_merge_service/api/data/invalid-template.xlsx b/document_merge_service/api/data/invalid-template.xlsx
diff --git a/document_merge_service/api/data/odt-template.odt b/document_merge_service/api/data/odt-template.odt
diff --git a/document_merge_service/api/file_converter.py b/document_merge_service/api/file_converter.py
@@ -0,0 +1,27 @@
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+
+from django.conf import settings
+from django.http import HttpResponse
+
+from .unoconv import Unoconv
+
+
+class FileConverter:
+    def convert(file_contents, target_format):
+        dir = Path(settings.DATABASE_DIR, "tmp")
+        dir.mkdir(parents=True, exist_ok=True)
+
+        with NamedTemporaryFile("wb", dir=dir) as tmp:
+            tmp.write(file_contents)
+            unoconv = Unoconv(
+                pythonpath=settings.UNOCONV_PYTHON,
+                unoconvpath=settings.UNOCONV_PATH,
+            )
+            result = unoconv.process(tmp.name, target_format)
+
+        status = 200 if result.returncode == 0 else 500
+
+        return HttpResponse(
+            content=result.stdout, status=status, content_type=result.content_type
+        )
diff --git a/document_merge_service/api/serializers.py b/document_merge_service/api/serializers.py
@@ -137,3 +137,13 @@ class TemplateMergeSerializer(serializers.Serializer):
 
     class Meta:
         model = models.Template
+
+
+class ConvertSerializer(serializers.Serializer):
+    file = CustomFileField(required=True, allow_empty_file=False)
+    target_format = serializers.ChoiceField(
+        allow_null=False,
+        required=True,
+        choices=[("pdf", "PDF")],
+        help_text="The target format of the conversion. Currently only 'pdf' is supported.",
+    )
diff --git a/document_merge_service/api/tests/test_convert.py b/document_merge_service/api/tests/test_convert.py
@@ -0,0 +1,32 @@
+import pytest
+from django.urls import reverse
+from rest_framework import status
+
+from document_merge_service.api.data import django_file
+
+
+@pytest.mark.parametrize(
+    "target_format,response_content_type",
+    [
+        ("pdf", "application/pdf"),
+    ],
+)
+def test_convert(db, client, target_format, response_content_type):
+    url = reverse("convert")
+    file_to_convert = django_file("docx-template.docx")
+
+    data = {"file": file_to_convert.file, "target_format": target_format}
+    response = client.post(url, data=data, format="multipart")
+
+    assert response.status_code == status.HTTP_200_OK
+    assert response.headers.get("Content-Type") == response_content_type
+
+
+def test_incorrect_file_type(db, client):
+    url = reverse("convert")
+    file_to_convert = django_file("invalid-template.xlsx")
+
+    data = {"file": file_to_convert.file, "target_format": "pdf"}
+    response = client.post(url, data=data, format="multipart")
+
+    assert response.status_code == status.HTTP_400_BAD_REQUEST
diff --git a/document_merge_service/api/urls.py b/document_merge_service/api/urls.py
@@ -12,7 +12,12 @@
         r"^template-download/(?P<pk>.+)$",
         views.DownloadTemplateView.as_view(),
         name="template-download",
-    )
+    ),
+    re_path(
+        r"^convert$",
+        views.ConvertView.as_view(),
+        name="convert",
+    ),
 ]
 
 urlpatterns.extend(r.urls)
diff --git a/document_merge_service/api/views.py b/document_merge_service/api/views.py
@@ -1,19 +1,17 @@
 import mimetypes
-from pathlib import Path
-from tempfile import NamedTemporaryFile
 
 import jinja2
-from django.conf import settings
 from django.http import HttpResponse
 from django.utils.encoding import smart_str
 from generic_permissions.permissions import PermissionViewMixin
 from generic_permissions.visibilities import VisibilityViewMixin
 from rest_framework import exceptions, viewsets
 from rest_framework.decorators import action
 from rest_framework.generics import RetrieveAPIView
+from rest_framework.views import APIView
 
 from . import engines, filters, models, serializers
-from .unoconv import Unoconv
+from .file_converter import FileConverter
 
 
 class TemplateView(VisibilityViewMixin, PermissionViewMixin, viewsets.ModelViewSet):
@@ -36,7 +34,6 @@ def merge(self, request, pk=None):
         response = HttpResponse(
             content_type=content_type or "application/force-download"
         )
-        extension = mimetypes.guess_extension(content_type)
 
         serializer = self.get_serializer(data=request.data)
         serializer.is_valid(raise_exception=True)
@@ -58,24 +55,9 @@ def merge(self, request, pk=None):
         convert = serializer.data.get("convert")
 
         if convert:
-            dir = Path(settings.DATABASE_DIR, "tmp")
-            dir.mkdir(parents=True, exist_ok=True)
-            with NamedTemporaryFile("wb", dir=dir) as tmp:
-                tmp.write(response.content)
-                unoconv = Unoconv(
-                    pythonpath=settings.UNOCONV_PYTHON,
-                    unoconvpath=settings.UNOCONV_PATH,
-                )
-                result = unoconv.process(tmp.name, convert)
-            extension = convert
-            status = 500
-            if result.returncode == 0:
-                status = 200
-            response = HttpResponse(
-                content=result.stdout, status=status, content_type=result.content_type
-            )
+            response = FileConverter.convert(response.content, convert)
 
-        filename = f"{template.slug}.{extension}"
+        filename = f"{template.slug}.{convert}"
         response["Content-Disposition"] = f'attachment; filename="{filename}"'
         return response
 
@@ -98,3 +80,28 @@ def retrieve(self, request, **kwargs):
         response["Content-Length"] = template.template.size
         response.write(template.template.read())
         return response
+
+
+class ConvertView(APIView):
+    def post(self, request, **kwargs):
+        serializer = serializers.ConvertSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        file = serializer.data["file"]
+        target_format = serializer.data["target_format"]
+
+        content_type, foo = mimetypes.guess_type(file.name)
+
+        if content_type not in [
+            "application/vnd.oasis.opendocument.text",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        ]:
+            raise exceptions.ValidationError(
+                "Incorrect file format. Only docx and odt files are supported for conversion."
+            )
+
+        response = FileConverter.convert(file.read(), target_format)
+
+        filename = f"{file.name.split('.')[0]}.{target_format}"
+        response["Content-Disposition"] = f'attachment; filename="{filename}"'
+        return response