Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow conversion of docx files to pdf using a new convert endpoint #622

Merged
merged 3 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/adfinis/document-merge-service)
[![License: GPL-3.0-or-later](https://img.shields.io/github/license/adfinis/document-merge-service)](https://spdx.org/licenses/GPL-3.0-or-later.html)

A document template merge service providing an API to manage templates and merge them with given data.
A document template merge service providing an API to manage templates and merge them with given data. It can also be used to convert Docx files to PDF.

## Installation

Expand Down Expand Up @@ -54,6 +54,14 @@ After uploading successfully, you can merge a template with the following call:
curl -H "Content-Type: application/json" --data '{"data": {"test": "Test Input"}}' http://localhost:8000/api/v1/template/test-template/merge/ > output.docx
```

### Converting a template
To convert a standalone Docx file the following call can be used:

```bash
curl -X POST --form [email protected] --form target_format="pdf" http://localhost:8000/api/v1/convert > example.pdf
```


## Further reading

- [Configuration](CONFIGURATION.md) - Further configuration and how to do a production setup
Expand Down
18 changes: 18 additions & 0 deletions USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,24 @@ example above, `data` would look like this:
... },
```

## Converting Docx files

The document merge service can also be used to convert a single Docx file to PDF.

If you want to simulatanousely merge a template with data and convert it to PDF use the merge function as explained in [merging templates](#merging-templates)

To convert a Docx file to PDF using the DMS you can send a `POST` request with the file and the `target_format`. Currently `pdf` is the only possible `target_format`.

```python
>>> resp = requests.post(
... 'http://localhost:8000/api/v1/convert',
... data={
... 'file': file_to_convert,
'target_format': 'pdf'
... },
... )
```

## Maintenance / Cleanup

The DMS allows REST verbs like `PATCH` and `DELETE` for updating and deleting
Expand Down
Binary file not shown.
Binary file added document_merge_service/api/data/odt-template.odt
Binary file not shown.
27 changes: 27 additions & 0 deletions document_merge_service/api/file_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from pathlib import Path
from tempfile import NamedTemporaryFile

from django.conf import settings
from django.http import HttpResponse

from .unoconv import Unoconv


class FileConverter:
def convert(file_contents, target_format):
dir = Path(settings.DATABASE_DIR, "tmp")
dir.mkdir(parents=True, exist_ok=True)

with NamedTemporaryFile("wb", dir=dir) as tmp:
tmp.write(file_contents)
unoconv = Unoconv(
pythonpath=settings.UNOCONV_PYTHON,
unoconvpath=settings.UNOCONV_PATH,
)
result = unoconv.process(tmp.name, target_format)

status = 200 if result.returncode == 0 else 500

return HttpResponse(
content=result.stdout, status=status, content_type=result.content_type
)
10 changes: 10 additions & 0 deletions document_merge_service/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,13 @@ class TemplateMergeSerializer(serializers.Serializer):

class Meta:
model = models.Template


class ConvertSerializer(serializers.Serializer):
file = CustomFileField(required=True, allow_empty_file=False)
target_format = serializers.ChoiceField(
allow_null=False,
required=True,
choices=[("pdf", "PDF")],
help_text="The target format of the conversion. Currently only 'pdf' is supported.",
)
32 changes: 32 additions & 0 deletions document_merge_service/api/tests/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pytest
from django.urls import reverse
from rest_framework import status

from document_merge_service.api.data import django_file


@pytest.mark.parametrize(
"target_format,response_content_type",
[
("pdf", "application/pdf"),
],
)
def test_convert(db, client, target_format, response_content_type):
url = reverse("convert")
file_to_convert = django_file("docx-template.docx")

data = {"file": file_to_convert.file, "target_format": target_format}
response = client.post(url, data=data, format="multipart")

assert response.status_code == status.HTTP_200_OK
assert response.headers.get("Content-Type") == response_content_type


def test_incorrect_file_type(db, client):
url = reverse("convert")
file_to_convert = django_file("invalid-template.xlsx")

data = {"file": file_to_convert.file, "target_format": "pdf"}
response = client.post(url, data=data, format="multipart")

assert response.status_code == status.HTTP_400_BAD_REQUEST
7 changes: 6 additions & 1 deletion document_merge_service/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
r"^template-download/(?P<pk>.+)$",
views.DownloadTemplateView.as_view(),
name="template-download",
)
),
re_path(
r"^convert$",
views.ConvertView.as_view(),
name="convert",
),
]

urlpatterns.extend(r.urls)
51 changes: 29 additions & 22 deletions document_merge_service/api/views.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import mimetypes
from pathlib import Path
from tempfile import NamedTemporaryFile

import jinja2
from django.conf import settings
from django.http import HttpResponse
from django.utils.encoding import smart_str
from generic_permissions.permissions import PermissionViewMixin
from generic_permissions.visibilities import VisibilityViewMixin
from rest_framework import exceptions, viewsets
from rest_framework.decorators import action
from rest_framework.generics import RetrieveAPIView
from rest_framework.views import APIView

from . import engines, filters, models, serializers
from .unoconv import Unoconv
from .file_converter import FileConverter


class TemplateView(VisibilityViewMixin, PermissionViewMixin, viewsets.ModelViewSet):
Expand All @@ -36,7 +34,6 @@ def merge(self, request, pk=None):
response = HttpResponse(
content_type=content_type or "application/force-download"
)
extension = mimetypes.guess_extension(content_type)

serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
Expand All @@ -58,24 +55,9 @@ def merge(self, request, pk=None):
convert = serializer.data.get("convert")

if convert:
dir = Path(settings.DATABASE_DIR, "tmp")
dir.mkdir(parents=True, exist_ok=True)
with NamedTemporaryFile("wb", dir=dir) as tmp:
tmp.write(response.content)
unoconv = Unoconv(
pythonpath=settings.UNOCONV_PYTHON,
unoconvpath=settings.UNOCONV_PATH,
)
result = unoconv.process(tmp.name, convert)
extension = convert
status = 500
if result.returncode == 0:
status = 200
response = HttpResponse(
content=result.stdout, status=status, content_type=result.content_type
)
response = FileConverter.convert(response.content, convert)

filename = f"{template.slug}.{extension}"
filename = f"{template.slug}.{convert}"
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response

Expand All @@ -98,3 +80,28 @@ def retrieve(self, request, **kwargs):
response["Content-Length"] = template.template.size
response.write(template.template.read())
return response


class ConvertView(APIView):
def post(self, request, **kwargs):
serializer = serializers.ConvertSerializer(data=request.data)
serializer.is_valid(raise_exception=True)

file = serializer.data["file"]
target_format = serializer.data["target_format"]

content_type, foo = mimetypes.guess_type(file.name)

if content_type not in [
"application/vnd.oasis.opendocument.text",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
]:
raise exceptions.ValidationError(
"Incorrect file format. Only docx and odt files are supported for conversion."
)

response = FileConverter.convert(file.read(), target_format)

filename = f"{file.name.split('.')[0]}.{target_format}"
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response
Loading