Skip to content

Commit

Permalink
feat: allow conversion of docx files to pdf using a new convert endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
StephanH90 committed Jul 11, 2023
1 parent ec30ac6 commit 01a8bd5
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 2 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/adfinis/document-merge-service)
[![License: GPL-3.0-or-later](https://img.shields.io/github/license/adfinis/document-merge-service)](https://spdx.org/licenses/GPL-3.0-or-later.html)

A document template merge service providing an API to manage templates and merge them with given data.
A document template merge service providing an API to manage templates and merge them with given data. It can also be used to convert Docx files to PDF.

## Installation

Expand Down Expand Up @@ -53,6 +53,13 @@ After uploading successfully, you can merge a template with the following call:
curl -H "Content-Type: application/json" --data '{"data": {"test": "Test Input"}}' http://localhost:8000/api/v1/template/test-template/merge/ > output.docx
```

### Converting a template
To convert a standalone Docx file the following call can be used:

```bash
curl -X POST --form [email protected] --form target_format="pdf" http://localhost:8000/api/v1/convert > example.pdf
```


## Further reading

Expand Down
20 changes: 20 additions & 0 deletions USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,26 @@ example above, `data` would look like this:
... },
```

## Converting Docx files

The document merge service can also be used to convert a single Docx file to PDF.

If you want to simulatanousely merge a template with data and convert it to PDF use the merge function as explained in [merging templates](#merging-templates)

To convert a Docx file to PDF using the DMS you can send a `POST` request with the file and the `target_format`. Currently `pdf` is the only possible `target_format`.

```python
>>> resp = requests.post(
... 'http://localhost:8000/api/v1/convert',
... data={
... 'target_format': 'pdf'
... },
... files=(
... ("file", open('my-example-file.docx', 'rb')),
... ),
... )
```

## Maintenance / Cleanup

The DMS allows REST verbs like `PATCH` and `DELETE` for updating and deleting
Expand Down
10 changes: 10 additions & 0 deletions document_merge_service/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,13 @@ class TemplateMergeSerializer(serializers.Serializer):

class Meta:
model = models.Template


class ConvertSerializer(serializers.Serializer):
file = CustomFileField(required=True, allow_empty_file=False)
target_format = serializers.ChoiceField(
allow_null=False,
required=True,
choices=("pdf"),
help_text="The target format of the conversion. Currently only 'pdf' is supported.",
)
23 changes: 23 additions & 0 deletions document_merge_service/api/tests/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest
from django.urls import reverse
from rest_framework import status

from document_merge_service.api.data import django_file


# @pytest.mark.parametrize("template__description", ["test description"])
@pytest.mark.parametrize(
"target_format,response_content_type",
[
("pdf", "application/pdf"),
],
)
def test_convert(db, client, template, target_format, response_content_type):
url = reverse("convert")
file_to_convert = django_file("docx-template-syntax.docx")

data = {"file": file_to_convert.file, "target_format": target_format}
response = client.post(url, data=data, format="multipart")

assert response.status_code == status.HTTP_200_OK
assert response.headers.get("Content-Type") == response_content_type
7 changes: 6 additions & 1 deletion document_merge_service/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
r"^template-download/(?P<pk>.+)$",
views.DownloadTemplateView.as_view(),
name="template-download",
)
),
re_path(
r"^convert$",
views.ConvertView.as_view(),
name="convert",
),
]

urlpatterns.extend(r.urls)
37 changes: 37 additions & 0 deletions document_merge_service/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from rest_framework import exceptions, viewsets
from rest_framework.decorators import action
from rest_framework.generics import RetrieveAPIView
from rest_framework.views import APIView

from . import engines, filters, models, serializers
from .unoconv import Unoconv
Expand Down Expand Up @@ -98,3 +99,39 @@ def retrieve(self, request, **kwargs):
response["Content-Length"] = template.template.size
response.write(template.template.read())
return response


class ConvertView(APIView):
def post(self, request, **kwargs):
serializer = serializers.ConvertSerializer(data=request.data)
serializer.is_valid()
template = models.Template(
engine="docx-template", template=serializer.data["file"]
)

content_type, _ = mimetypes.guess_type(template.template.name)
response = HttpResponse(
content_type=content_type or "application/force-download"
)
target_format = serializer.data["target_format"]

dir = Path(settings.DATABASE_DIR, "tmp")
dir.mkdir(parents=True, exist_ok=True)
with NamedTemporaryFile("wb", dir=dir) as tmp:
tmp.write(template.template.file.read())
unoconv = Unoconv(
pythonpath=settings.UNOCONV_PYTHON,
unoconvpath=settings.UNOCONV_PATH,
)
result = unoconv.process(tmp.name, target_format)

status = 500
if result.returncode == 0:
status = 200
response = HttpResponse(
content=result.stdout, status=status, content_type=result.content_type
)

filename = f"{template.template.name.split('.')[0]}.{target_format}"
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response

0 comments on commit 01a8bd5

Please sign in to comment.