From ada50eafb8cdc2072a5fcdd7ea678afc5cf497ad Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Thu, 23 May 2024 21:54:48 +0100 Subject: [PATCH 1/2] Using new archive urls --- iiify/app.py | 2 +- iiify/resolver.py | 76 ++++++++++++++++++++++++++++++++++---- tests/test_manifests_v2.py | 13 ++++++- 3 files changed, 82 insertions(+), 9 deletions(-) diff --git a/iiify/app.py b/iiify/app.py index 8f8a041..ed9414c 100755 --- a/iiify/app.py +++ b/iiify/app.py @@ -199,7 +199,7 @@ def manifest(identifier): @app.route('/iiif/2//manifest.json') def manifest2(identifier): - domain = "https://iiif.archivelab.org/iiif/" + domain = purify_domain(request.args.get('domain', request.url_root)) page = None if '$' in identifier: identifier, page = identifier.split('$') diff --git a/iiify/resolver.py b/iiify/resolver.py index c84d6ff..0dff0e9 100644 --- a/iiify/resolver.py +++ b/iiify/resolver.py @@ -4,11 +4,12 @@ import requests from iiif2 import iiif, web from .configs import options, cors, approot, cache_root, media_root, apiurl -from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef +from iiif_prezi3 import Manifest, config, Annotation, AnnotationPage,AnnotationPageRef, Canvas, Manifest, ResourceItem, ServiceItem, Choice, Collection, ManifestRef, CollectionRef from urllib.parse import urlparse, parse_qs, quote import json import math import re +import xml.etree.ElementTree as ET IMG_CTX = 'http://iiif.io/api/image/2/context.json' PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json' @@ -145,10 +146,13 @@ def create_collection3(identifier, domain, page=1, rows=1000): return json.loads(collection.jsonld()) -def manifest_page(identifier, label='', page='', width='', height='', metadata=None): +def manifest_page(identifier, label='', page='', width='', height='', metadata=None, canvasId=""): + if not canvasId: + canvasId = f"{identifier}/canvas" + metadata = metadata or {} return { - '@id': '%s/canvas' % identifier, + '@id': canvasId, '@type': 'sc:Canvas', '@context': PRZ_CTX, 'description': metadata.get('description', ''), @@ -234,7 +238,8 @@ def create_manifest(identifier, domain=None, page=None): label=metadata['title'], width=info['width'], height=info['height'], - metadata=metadata + metadata=metadata, + canvasId= f"https://iiif.archivelab.org/iiif/{identifier}/canvas" ) ) @@ -274,7 +279,8 @@ def create_manifest(identifier, domain=None, page=None): label=metadata['title'], width=info['width'], height=info['height'], - metadata=metadata + metadata=metadata, + canvasId= f"https://iiif.archivelab.org/iiif/{identifier}/canvas" ) ) else: @@ -293,7 +299,8 @@ def create_manifest(identifier, domain=None, page=None): identifier = "%s%s$%s" % (domain, identifier, page), label=data['pageNums'][page], width=data['pageWidths'][page], - height=data['pageHeights'][page] + height=data['pageHeights'][page], + canvasId= f"https://iiif.archivelab.org/iiif/{identifier}${page}/canvas" ) ) return manifest @@ -304,7 +311,8 @@ def create_manifest(identifier, domain=None, page=None): identifier = "%s%s$%s" % (domain, identifier, page), label=data['pageNums'][page], width=data['pageWidths'][page], - height=data['pageHeights'][page] + height=data['pageHeights'][page], + canvasId= f"https://iiif.archivelab.org/iiif/{identifier}${page}/canvas" ) ) return manifest @@ -406,9 +414,12 @@ def create_manifest3(identifier, domain=None, page=None): # subprefix can be different from the identifier use the scandata filename to find the correct prefix # if not present fall back to identifier subprefix = identifier + djvuFile = "" for fileMd in metadata['files']: if fileMd['name'].endswith('_scandata.xml'): subprefix = fileMd['name'].replace('_scandata.xml', '') + if fileMd['format'] == 'Djvu XML': + djvuFile = fileMd['name'] bookReaderURL = f"https://{metadata.get('server')}/BookReader/BookReaderJSIA.php?id={identifier}&itemPath={metadata.get('dir')}&server={metadata.get('server')}&format=jsonp&subPrefix={subprefix}" @@ -469,7 +480,20 @@ def create_manifest3(identifier, domain=None, page=None): except: pass + # Add annotations if djvu file is present + if djvuFile: + count = 1 + for canvas in manifest.items: + if 'annotations' in canvas: + annotations = canvas.annotations + else: + annotations = [] + annotations.append( + AnnotationPageRef(id=f"{domain}3/annotations/{identifier}/{quote(djvuFile, safe='()')}/{count}.json", type="AnnotationPage") + ) + canvas.annotations = annotations + count += 1 elif mediatype == 'image': (multiFile, format) = checkMultiItem(metadata) print (f"Checking multiFile {multiFile} {format}") @@ -613,6 +637,44 @@ def create_manifest3(identifier, domain=None, page=None): return json.loads(manifest.jsonld()) +def create_annotations(version, identifier, fileName, canvas_no, domain=None): + annotationPage = AnnotationPage(id=f"{domain}{version}/annotations/{identifier}/{quote(fileName, safe='()')}/{canvas_no}.json") + annotationPage.items = [] + index = int(canvas_no) - 1 + url = f"{ARCHIVE}/download/{identifier}/{fileName}" + try: + # Fetch the remote XML file + response = requests.get(url) + response.raise_for_status() # Raise an error for bad status codes + + # Parse the XML content + djfu = ET.fromstring(response.content) + page = djfu.findall(f".//OBJECT[{canvas_no}]")[0] + words = page.findall(".//WORD") + count = 1 + for word in words: + annotationPage.items.append({ + "id": f"https://iiif.archive.org/iiif/{identifier}/canvas/{index}/anno/{count}", + "type": "Annotation", + "motivation": "commenting", + "body": { + "type": "TextualBody", + "format": "text/plain", + "value": word.text + }, + "target": f"https://iiif.archive.org/iiif/{identifier}${index}/canvas#xywh={word.attrib['coords']}" + }) + count += 1 + + except requests.exceptions.RequestException as e: + print(f"Error fetching the XML file: {e}") + raise ValueError("Failed to retrieve {url}") + except ET.ParseError as e: + print(f"Error parsing the XML content: {e}") + raise ValueError("Failed to process {url}") + + return json.loads(annotationPage.jsonld()) + def coerce_list(value): if isinstance(value, list): return ". ".join(value) diff --git a/tests/test_manifests_v2.py b/tests/test_manifests_v2.py index 2380259..876f3fd 100644 --- a/tests/test_manifests_v2.py +++ b/tests/test_manifests_v2.py @@ -12,12 +12,23 @@ def test_v2_image_manifest(self): self.assertEqual(resp.status_code, 200) manifest = resp.json - self.assertEqual(manifest['@id'], 'https://iiif.archivelab.org/iiif/rashodgson68/manifest.json', 'V2 Manifest ID has changed') + self.assertEqual(manifest['@id'], 'https://localhost/iiif/rashodgson68/manifest.json', 'V2 Manifest ID is using new infrastructure changed') self.assertEqual(manifest['@type'], "sc:Manifest", f"Unexpected type. Expected Manifest got {manifest['@type']}") self.assertEqual(len(manifest['sequences'][0]['canvases']),32,f"Expected 32 canvases but got: {len(manifest['sequences'][0]['canvases'])}") self.assertEqual(manifest['sequences'][0]['canvases'][0]['@id'],"https://iiif.archivelab.org/iiif/rashodgson68$0/canvas",f"v2 canvas id has changed") + def test_v2_image_api(self): + resp = self.test_app.get("/iiif/2/1991-12-compute-magazine/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(manifest['@id'], 'https://localhost/iiif/1991-12-compute-magazine/manifest.json', 'V2 Manifest ID is using new infrastructure changed') + image = manifest['sequences'][0]['canvases'][0]['images'][0]['resource'] + self.assertEqual(image['@id'], "https://localhost/iiif/1991-12-compute-magazine$0/full/full/0/default.jpg", "Resource not using new image server") + self.assertEqual(image['service']['@id'], 'https://localhost/iiif/1991-12-compute-magazine$0', "V2 service not using the new image server") + + def test_text_which_is_image(self): resp = self.test_app.get("/iiif/2/fbf_3chords_1_/manifest.json") From 5dbd43b7d30269f7c7db05aef062b21115d9f807 Mon Sep 17 00:00:00 2001 From: Glen Robson Date: Thu, 23 May 2024 23:56:02 +0100 Subject: [PATCH 2/2] Adding test for single image --- tests/test_manifests_v2.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_manifests_v2.py b/tests/test_manifests_v2.py index 876f3fd..393a6d6 100644 --- a/tests/test_manifests_v2.py +++ b/tests/test_manifests_v2.py @@ -28,6 +28,30 @@ def test_v2_image_api(self): self.assertEqual(image['@id'], "https://localhost/iiif/1991-12-compute-magazine$0/full/full/0/default.jpg", "Resource not using new image server") self.assertEqual(image['service']['@id'], 'https://localhost/iiif/1991-12-compute-magazine$0', "V2 service not using the new image server") + def test_v2_single_image(self): + resp = self.test_app.get("/iiif/2/img-8664_202009/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(manifest['@id'], 'https://localhost/iiif/img-8664_202009/manifest.json', 'V2 Manifest ID is using new infrastructure changed') + canvas = manifest['sequences'][0]['canvases'][0] + self.assertEqual(canvas['@id'], 'https://iiif.archivelab.org/iiif/img-8664_202009/canvas', 'Expected canvas id to be the same') + image = canvas['images'][0]['resource'] + self.assertEqual(image['@id'], "https://localhost/iiif/img-8664_202009/full/full/0/default.jpg", "Resource not using new image server") + self.assertEqual(image['service']['@id'], 'https://localhost/iiif/img-8664_202009', "V2 service not using the new image server") + + def test_v2_single_text_manifest(self): + resp = self.test_app.get("/iiif/2/fbf_3chords_1_/manifest.json") + self.assertEqual(resp.status_code, 200) + manifest = resp.json + + self.assertEqual(manifest['@id'], 'https://localhost/iiif/fbf_3chords_1_/manifest.json', 'V2 Manifest ID is using new infrastructure changed') + canvas = manifest['sequences'][0]['canvases'][0] + self.assertEqual(canvas['@id'], 'https://iiif.archivelab.org/iiif/fbf_3chords_1_$0/canvas', 'Expected canvas id to be the same') + image = canvas['images'][0]['resource'] + self.assertEqual(image['@id'], "https://localhost/iiif/fbf_3chords_1_$0/full/full/0/default.jpg", "Resource not using new image server") + self.assertEqual(image['service']['@id'], 'https://localhost/iiif/fbf_3chords_1_$0', "V2 service not using the new image server") + def test_text_which_is_image(self): resp = self.test_app.get("/iiif/2/fbf_3chords_1_/manifest.json")