resources details complete

semantic-systems · Jul 15, 2024 · fbed53e · fbed53e
1 parent 2104a38
commit fbed53e
Show file tree

Hide file tree

Showing 11 changed files with 762 additions and 22 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -7,18 +7,22 @@ on:
 
 jobs:
   deploy:
-    runs-on: self-hosted
+    runs-on: self-hosted    
     steps:
       - name: 'Check out repo'
         uses: actions/checkout@v3
         with:
-          ref: main
+          ref: main         
       - name: 'Stop the running NFDI Search Engine'
         run: docker compose down
       - name: 'Delete old Docker image'
         run: docker image rm nfdi-search-engine-search-engine
       - name: 'Copy logging.conf'
+<<<<<<< HEAD
       run: cp logging.conf.example logging.conf
+=======
+        run: cp logging.conf.example logging.conf       
+>>>>>>> origin/develop
       - name: 'Create .env'
         run: |
           echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ./.env

diff --git a/a.py b/a.py
@@ -7,7 +7,11 @@
 from objects import CreativeWork, Author
 
 base_url = "https://zenodo.org/api/records?size=25&q="
-doi = "4701615"
+doi = "r3730f562f9e::324df2bd7d05a0942f31f0fe34e2eefa"
+
+# search_result = data_retriever.retrieve_single_object(source=source,
+#                                                         base_url=
+#                                                         doi=doi)
 
 encoded_doi = urllib.parse.quote_plus(string=doi, safe='()?&=,')
 url = base_url + encoded_doi
@@ -52,9 +56,9 @@
             _author.affiliation = author.get("affiliation", "")
             resource.author.append(_author)
         # print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
-        # print(json.dumps(search_result, indent=4))
+        print(json.dumps(search_result, indent=4))
         # print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
-        print(resource.author[0].name)
+        # print( resource.name)
     else:
         logger.error(f'Failed to retrieve data: {response.status_code}')
 

diff --git a/main.py b/main.py
@@ -57,12 +57,12 @@
 @app.route('/')
 def index():
 
-    if (utils.env_config["OPENAI_API_KEY"] == ""):
-        return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))
+    # if (utils.env_config["OPENAI_API_KEY"] == ""):
+    #     return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))
 
 
-    if (utils.env_config["OPENAI_API_KEY"] == ""):
-        return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))
+    # if (utils.env_config["OPENAI_API_KEY"] == ""):
+    #     return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))
 
     response = make_response(render_template('index.html'))
 
@@ -99,8 +99,7 @@ def search_results():
         # add all the sources here in this list; for simplicity we should use the exact module name
         # ensure the main method which execute the search is named "search" in the module
         sources = [dblp_publications, openalex_publications, zenodo, wikidata_publications, resodate, oersi, ieee,
-                   eudat, openaire_products, re3data, orkg, openalex_researchers]
-        # sources = [openalex_publications]
+                   eudat, eulg,  openaire_products, re3data, orkg, openalex_researchers]
         for source in sources:
             t = threading.Thread(target=source.search, args=(search_term, results,))
             t.start()
@@ -351,12 +350,41 @@ def resource_details(sources):
     sources = ast.literal_eval(sources)
     for source in sources:
         doi = source['doi']
-    resource = zenodo.get_resource(doi)
+    resource = zenodo.get_resource(doi="https://doi.org/"+doi)
     response = make_response(render_template('resource-details.html', resource=resource))
 
     print("response:", response)
     return response
 
+@app.route('/resource-details-citations/<path:doi>', methods=['GET'])
+@utils.timeit
+def resource_details_citations(doi):
+    print("DOI:", doi)
+    resource = semanticscholar.get_citations_for_publication(doi=doi)
+    response = make_response(render_template('partials/publication-details/citations.html', resource=resource))
+    print("response:", response)
+    return response
+
+@app.route('/resource-details-references/<path:doi>', methods=['GET'])
+@utils.timeit
+def resource_details_references(doi):
+    print("doi:", doi)
+
+    resource = crossref.get_publication(doi=doi)
+    response = make_response(render_template('partials/publication-details/references.html', resource=resource))
+
+    print("response:", response)
+    return response
+
+@app.route('/resource-details-recommendations/<path:doi>', methods=['GET'])
+@utils.timeit
+def resource_details_recommendations(doi):
+    print("DOI:", doi)
+    publications = semanticscholar.get_recommendations_for_publication(doi=doi)
+    response = make_response(render_template('partials/publication-details/recommendations.html', publications=publications))
+    print("response:", response)
+    return response
+
 @app.route('/researcher-details/<string:index>', methods=['GET'])
 def researcher_details(index):
     index = json.loads(index)

diff --git a/requirements.txt b/requirements.txt
@@ -19,4 +19,4 @@ dateparser>=1.2.0
 Flask-Session==0.5.0
 rank_bm25==0.2.2
 python-dotenv==1.0.1
-==1.35.3
+openai==1.35.3
diff --git a/sources/dblp.py b/sources/dblp.py
@@ -0,0 +1,126 @@
+import extruct
+import requests
+from objects import Person, Article
+import logging
+import os
+import pprint
+import utils
+# logging.config.fileConfig(os.getenv('LOGGING_FILE_CONFIG', './logging.conf'))
+logger = logging.getLogger('nfdi_search_engine')
+
+
+def extract_metadata(text: bytes) -> object:
+    """Extract all metadata present in the page and return a dictionary of metadata lists.
+
+    Args:
+        text: The content of a requests.get( ) call
+
+    Returns:
+        metadata (dict): Dictionary of json-ld, microdata, and opengraph lists.
+        Each of the lists present within the dictionary contains multiple dictionaries.
+    """
+    metadata = extruct.extract(text,
+                               uniform=True,
+                               syntaxes=['json-ld',
+                                         'microdata',
+                                         'opengraph'])
+    assert isinstance(metadata, object)
+    return metadata
+
+
+@utils.timeit
+# def dblp(search_term: str, g, results):
+def search(search_term: str, results):
+
+    try:
+
+        base_url = utils.config["search_url_dblp"]
+        url = base_url + search_term
+
+        headers = {'Accept': 'application/json',
+                   'Content-Type': 'application/json',
+                   'User-Agent': utils.config["request_header_user_agent"]
+                   }
+        response = requests.get(url, headers=headers, timeout=int(utils.config["request_timeout"]))        
+
+        logger.debug(f'DBLP response status code: {response.status_code}')
+        logger.debug(f'DBLP response headers: {response.headers}')
+
+        # TODO unclear why here are only a few but now all results returned
+
+        metadata = extract_metadata(response.content)
+        # TODO unclear why this loop takes so long
+        #The profiler indicates that the JSON-LD parsing process is responsible for the majority of the execution time, taking approximately 18.21 seconds.
+        #
+        # I.e. the JSON-LD parsing takes that long
+        for data in metadata['microdata']:
+            if data['@type'] == 'Person':
+                '''
+                results.append(
+                    Person(
+                        name=data["name"],
+                        url=data["url"],
+                        affiliation=""
+                    )
+                )
+                '''
+            elif data['@type'] == 'ScholarlyArticle':
+                if 'author' in data:
+                    url = ''
+                    if 'url' in data:
+                        if type(data["url"]) == list:
+                            url = ', '.join(data["url"])
+                        else:
+                            url = data["url"]
+                    publication = Article()
+                    publication.source = 'DBLP'
+                    publication.name = data["name"]
+                    publication.url = url
+                    publication.image = data["image"]
+                    publication.description = ''
+                    publication.abstract = ''
+                    publication.keywords.append('')
+                    publication.inLanguage.append("")
+                    publication.datePublished = data["datePublished"]
+                    publication.license = ''
+                    author = Person()
+                    author.type = 'Person'
+                    if type(data["author"]) == list:
+                        #author = ', '.join([authors["name"] for authors in data["author"]])
+                        for authors in data["author"]:
+                            author2 = Person()
+                            author2.name = authors["name"]
+                            author2.type = 'Person'
+                            publication.author.append(author2)
+                    elif type(data["author"]) == dict:
+                        author.name = data["author"]["name"]
+                        publication.author.append(author)
+                    else:
+                        author.name = data["author"]
+                        publication.author.append(author)
+                    publication.encoding_contentUrl = ''
+                    publication.encodingFormat = ''
+
+                    results['publications'].append(publication)
+                    '''
+                    results.append(
+                        Article(
+                            title=data["name"],
+                            url=url,
+                            authors=author,
+                            description='',
+                            date=data["datePublished"]
+                        )
+                    )
+                    '''
+        logger.info(f"Got {len(results)} Researchers and scholarly articls from DBLP")
+        # return results
+        # g.parse(data=json.dumps(data), format='json-ld')
+        # logger.info(f"Graph g has {len(g)} statements after querying DBLP.")
+
+    except requests.exceptions.Timeout as ex:
+        logger.error(f'Timed out Exception: {str(ex)}')
+        results['timedout_sources'].append('DBLP')
+
+    except Exception as ex:
+        logger.error(f'Exception: {str(ex)}')