Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] update api for multi file and pdf support #1522

Merged
merged 11 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ jobs:
python-version: ${{ matrix.python }}
architecture: x64
- name: Build & run docker
run: cd api && docker-compose up -d --build
run: cd api && make lock && make run
- name: Ping server
run: wget --spider --tries=12 http://localhost:8080/docs
- name: Run docker test
run: |
docker-compose -f api/docker-compose.yml exec --no-TTY web pytest tests/
run: cd api && make test
2 changes: 1 addition & 1 deletion api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update \
COPY pyproject.toml /app/pyproject.toml
COPY Makefile /app/Makefile

RUN pip install --upgrade pip setuptools wheel poetry \
RUN pip install --upgrade pip setuptools wheel \
&& make lock \
&& pip install -r /app/requirements.txt \
&& pip cache purge \
Expand Down
7 changes: 4 additions & 3 deletions api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
.PHONY: lock run stop test
# Pin the dependencies
lock:
pip install poetry>=1.0
poetry lock
poetry export -f requirements.txt --without-hashes --output requirements.txt
poetry export -f requirements.txt --without-hashes --with dev --output requirements-dev.txt
Expand All @@ -18,8 +19,8 @@ stop:
# Run tests for the library
test:
docker compose up -d --build
docker cp requirements-dev.txt api_web_1:/app/requirements-dev.txt
docker cp requirements-dev.txt api_web:/app/requirements-dev.txt
felixdittrich92 marked this conversation as resolved.
Show resolved Hide resolved
docker compose exec -T web pip install -r requirements-dev.txt
docker cp tests api_web_1:/app/tests
docker compose exec -T web pytest tests/
docker cp tests api_web:/app/tests
docker compose exec -T web pytest tests/ -vv
docker compose down
123 changes: 110 additions & 13 deletions api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,39 @@ with this snippet:

```python
import requests

headers = {"accept": "application/json"}
params = {"det_arch": "db_resnet50"}

with open('/path/to/your/img.jpg', 'rb') as f:
data = f.read()
print(requests.post("http://localhost:8080/detection", files={'file': data}).json())
files = [ # application/pdf, image/jpeg, image/png supported
("files", ("117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg", f.read(), "image/jpeg")),
]
print(requests.post("http://localhost:8080/detection", headers=headers, params=params, files=files).json())
```

should yield

```json
[{'box': [0.826171875, 0.185546875, 0.90234375, 0.201171875]},
{'box': [0.75390625, 0.185546875, 0.8173828125, 0.201171875]}]
[
{
"name": "117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg",
"geometries": [
[
0.724609375,
0.1787109375,
0.7900390625,
0.2080078125
],
[
0.6748046875,
0.1796875,
0.7314453125,
0.20703125
]
]
}
]
```

#### Text recognition
Expand All @@ -56,15 +79,27 @@ with this snippet:

```python
import requests

headers = {"accept": "application/json"}
params = {"reco_arch": "crnn_vgg16_bn"}

with open('/path/to/your/img.jpg', 'rb') as f:
data = f.read()
print(requests.post("http://localhost:8080/recognition", files={'file': data}).json())
files = [ # application/pdf, image/jpeg, image/png supported
("files", ("117133599-c073fa00-ada4-11eb-831b-412de4d28341.jpeg", f.read(), "image/jpeg")),
]
print(requests.post("http://localhost:8080/recognition", headers=headers, params=params, files=files).json())
```

should yield

```json
{'value': 'invite'}
[
{
"name": "117133599-c073fa00-ada4-11eb-831b-412de4d28341.jpeg",
"value": "invite",
"confidence": 1.0
}
]
```

#### End-to-end OCR
Expand All @@ -76,16 +111,78 @@ with this snippet:

```python
import requests

headers = {"accept": "application/json"}
params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}

with open('/path/to/your/img.jpg', 'rb') as f:
data = f.read()
print(requests.post("http://localhost:8080/ocr", files={'file': data}).json())
files = [ # application/pdf, image/jpeg, image/png supported
("files", ("117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg", f.read(), "image/jpeg")),
]
print(requests.post("http://localhost:8080/ocr", headers=headers, params=params, files=files).json())
```

should yield

```json
[{'box': [0.75390625, 0.185546875, 0.8173828125, 0.201171875],
'value': 'Hello'},
{'box': [0.826171875, 0.185546875, 0.90234375, 0.201171875],
'value': 'world!'}]
[
{
"name": "117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg",
"orientation": {
"value": 0,
"confidence": null
},
"language": {
"value": null,
"confidence": null
},
"dimensions": [2339, 1654],
"items": [
{
"blocks": [
{
"geometry": [
0.7471996155154171,
0.1787109375,
0.9101580212741838,
0.2080078125
],
"lines": [
{
"geometry": [
0.7471996155154171,
0.1787109375,
0.9101580212741838,
0.2080078125
],
"words": [
{
"value": "Hello",
"geometry": [
0.7471996155154171,
0.1796875,
0.8272978149561669,
0.20703125
],
"confidence": 1.0
},
{
"value": "world!",
"geometry": [
0.8176307908857315,
0.1787109375,
0.9101580212741838,
0.2080078125
],
"confidence": 1.0
}
]
}
]
}
]
}
]
}
]
```
28 changes: 20 additions & 8 deletions api/app/routes/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,31 @@

from typing import List

from fastapi import APIRouter, File, UploadFile, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status

from app.schemas import DetectionOut
from app.vision import det_predictor
from app.schemas import DetectionIn, DetectionOut
from app.utils import get_documents, resolve_geometry
from app.vision import init_predictor
from doctr.file_utils import CLASS_NAME
from doctr.io import decode_img_as_tensor

router = APIRouter()


@router.post("/", response_model=List[DetectionOut], status_code=status.HTTP_200_OK, summary="Perform text detection")
async def text_detection(file: UploadFile = File(...)):
async def text_detection(request: DetectionIn = Depends(), files: List[UploadFile] = [File(...)]):

Check warning on line 19 in api/app/routes/detection.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

api/app/routes/detection.py#L19

as argument
"""Runs docTR text detection model to analyze the input image"""
img = decode_img_as_tensor(file.file.read())
boxes = det_predictor([img])[0]
return [DetectionOut(box=box.tolist()) for box in boxes[CLASS_NAME][:, :-1]]
try:
predictor = init_predictor(request)
content, filenames = await get_documents(files)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

return [
DetectionOut(
name=filename,
geometries=[
geom[:-1].tolist() if len(geom) == 5 else resolve_geometry(geom.tolist()) for geom in doc[CLASS_NAME]
],
)
for doc, filename in zip(predictor(content), filenames)
]
57 changes: 40 additions & 17 deletions api/app/routes/kie.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,50 @@
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Dict, List
from typing import List

from fastapi import APIRouter, File, UploadFile, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status

from app.schemas import OCROut
from app.vision import kie_predictor
from doctr.io import decode_img_as_tensor
from app.schemas import KIEElement, KIEIn, KIEOut
from app.utils import get_documents, resolve_geometry
from app.vision import init_predictor

router = APIRouter()


@router.post("/", response_model=Dict[str, List[OCROut]], status_code=status.HTTP_200_OK, summary="Perform KIE")
async def perform_kie(file: UploadFile = File(...)):
@router.post("/", response_model=List[KIEOut], status_code=status.HTTP_200_OK, summary="Perform KIE")
async def perform_kie(request: KIEIn = Depends(), files: List[UploadFile] = [File(...)]):

Check warning on line 18 in api/app/routes/kie.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

api/app/routes/kie.py#L18

as argument
"""Runs docTR KIE model to analyze the input image"""
img = decode_img_as_tensor(file.file.read())
out = kie_predictor([img])

return {
class_name: [
OCROut(box=(*prediction.geometry[0], *prediction.geometry[1]), value=prediction.value)
for prediction in out.pages[0].predictions[class_name]
]
for class_name in out.pages[0].predictions.keys()
}
try:
predictor = init_predictor(request)
content, filenames = await get_documents(files)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

out = predictor(content)

results = [
KIEOut(
name=filenames[i],
orientation=page.orientation,
language=page.language,
dimensions=page.dimensions,
predictions=[
KIEElement(
class_name=class_name,
items=[
dict(
value=prediction.value,
geometry=resolve_geometry(prediction.geometry),
confidence=round(prediction.confidence, 2),
)
for prediction in page.predictions[class_name]
],
)
for class_name in page.predictions.keys()
],
)
for i, page in enumerate(out.pages)
]

return results
61 changes: 48 additions & 13 deletions api/app/routes/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,59 @@

from typing import List

from fastapi import APIRouter, File, UploadFile, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status

from app.schemas import OCROut
from app.vision import predictor
from doctr.io import decode_img_as_tensor
from app.schemas import OCRBlock, OCRIn, OCRLine, OCROut, OCRPage, OCRWord
from app.utils import get_documents, resolve_geometry
from app.vision import init_predictor

router = APIRouter()


@router.post("/", response_model=List[OCROut], status_code=status.HTTP_200_OK, summary="Perform OCR")
async def perform_ocr(file: UploadFile = File(...)):
async def perform_ocr(request: OCRIn = Depends(), files: List[UploadFile] = [File(...)]):

Check warning on line 18 in api/app/routes/ocr.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

api/app/routes/ocr.py#L18

as argument
"""Runs docTR OCR model to analyze the input image"""
img = decode_img_as_tensor(file.file.read())
out = predictor([img])

return [
OCROut(box=(*word.geometry[0], *word.geometry[1]), value=word.value)
for block in out.pages[0].blocks
for line in block.lines
for word in line.words
try:
# generator object to list
content, filenames = await get_documents(files)
predictor = init_predictor(request)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

out = predictor(content)

results = [
OCROut(
name=filenames[i],
orientation=page.orientation,
language=page.language,
dimensions=page.dimensions,
items=[
OCRPage(
blocks=[
OCRBlock(
geometry=resolve_geometry(block.geometry),
lines=[
OCRLine(
geometry=resolve_geometry(line.geometry),
words=[
OCRWord(
value=word.value,
geometry=resolve_geometry(word.geometry),
confidence=round(word.confidence, 2),
)
for word in line.words
],
)
for line in block.lines
],
)
for block in page.blocks
]
)
],
)
for i, page in enumerate(out.pages)
]

return results
Loading
Loading