Skip to content

Commit

Permalink
resources details complete
Browse files Browse the repository at this point in the history
  • Loading branch information
Mugdhaa21 committed Jul 15, 2024
1 parent 2104a38 commit fbed53e
Show file tree
Hide file tree
Showing 11 changed files with 762 additions and 22 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@ on:

jobs:
deploy:
runs-on: self-hosted
runs-on: self-hosted
steps:
- name: 'Check out repo'
uses: actions/checkout@v3
with:
ref: main
ref: main
- name: 'Stop the running NFDI Search Engine'
run: docker compose down
- name: 'Delete old Docker image'
run: docker image rm nfdi-search-engine-search-engine
- name: 'Copy logging.conf'
<<<<<<< HEAD
run: cp logging.conf.example logging.conf
=======
run: cp logging.conf.example logging.conf
>>>>>>> origin/develop
- name: 'Create .env'
run: |
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ./.env
Expand Down
10 changes: 7 additions & 3 deletions a.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
from objects import CreativeWork, Author

base_url = "https://zenodo.org/api/records?size=25&q="
doi = "4701615"
doi = "r3730f562f9e::324df2bd7d05a0942f31f0fe34e2eefa"

# search_result = data_retriever.retrieve_single_object(source=source,
# base_url=
# doi=doi)

encoded_doi = urllib.parse.quote_plus(string=doi, safe='()?&=,')
url = base_url + encoded_doi
Expand Down Expand Up @@ -52,9 +56,9 @@
_author.affiliation = author.get("affiliation", "")
resource.author.append(_author)
# print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
# print(json.dumps(search_result, indent=4))
print(json.dumps(search_result, indent=4))
# print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
print(resource.author[0].name)
# print( resource.name)
else:
logger.error(f'Failed to retrieve data: {response.status_code}')

Expand Down
42 changes: 35 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@
@app.route('/')
def index():

if (utils.env_config["OPENAI_API_KEY"] == ""):
return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))
# if (utils.env_config["OPENAI_API_KEY"] == ""):
# return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))


if (utils.env_config["OPENAI_API_KEY"] == ""):
return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))
# if (utils.env_config["OPENAI_API_KEY"] == ""):
# return make_response(render_template('error.html',error_message='Environment variables are not set. Kindly set all the required variables.'))

response = make_response(render_template('index.html'))

Expand Down Expand Up @@ -99,8 +99,7 @@ def search_results():
# add all the sources here in this list; for simplicity we should use the exact module name
# ensure the main method which execute the search is named "search" in the module
sources = [dblp_publications, openalex_publications, zenodo, wikidata_publications, resodate, oersi, ieee,
eudat, openaire_products, re3data, orkg, openalex_researchers]
# sources = [openalex_publications]
eudat, eulg, openaire_products, re3data, orkg, openalex_researchers]
for source in sources:
t = threading.Thread(target=source.search, args=(search_term, results,))
t.start()
Expand Down Expand Up @@ -351,12 +350,41 @@ def resource_details(sources):
sources = ast.literal_eval(sources)
for source in sources:
doi = source['doi']
resource = zenodo.get_resource(doi)
resource = zenodo.get_resource(doi="https://doi.org/"+doi)
response = make_response(render_template('resource-details.html', resource=resource))

print("response:", response)
return response

@app.route('/resource-details-citations/<path:doi>', methods=['GET'])
@utils.timeit
def resource_details_citations(doi):
print("DOI:", doi)
resource = semanticscholar.get_citations_for_publication(doi=doi)
response = make_response(render_template('partials/publication-details/citations.html', resource=resource))
print("response:", response)
return response

@app.route('/resource-details-references/<path:doi>', methods=['GET'])
@utils.timeit
def resource_details_references(doi):
print("doi:", doi)

resource = crossref.get_publication(doi=doi)
response = make_response(render_template('partials/publication-details/references.html', resource=resource))

print("response:", response)
return response

@app.route('/resource-details-recommendations/<path:doi>', methods=['GET'])
@utils.timeit
def resource_details_recommendations(doi):
print("DOI:", doi)
publications = semanticscholar.get_recommendations_for_publication(doi=doi)
response = make_response(render_template('partials/publication-details/recommendations.html', publications=publications))
print("response:", response)
return response

@app.route('/researcher-details/<string:index>', methods=['GET'])
def researcher_details(index):
index = json.loads(index)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ dateparser>=1.2.0
Flask-Session==0.5.0
rank_bm25==0.2.2
python-dotenv==1.0.1
==1.35.3
openai==1.35.3
126 changes: 126 additions & 0 deletions sources/dblp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import extruct
import requests
from objects import Person, Article
import logging
import os
import pprint
import utils
# logging.config.fileConfig(os.getenv('LOGGING_FILE_CONFIG', './logging.conf'))
logger = logging.getLogger('nfdi_search_engine')


def extract_metadata(text: bytes) -> object:
"""Extract all metadata present in the page and return a dictionary of metadata lists.
Args:
text: The content of a requests.get( ) call
Returns:
metadata (dict): Dictionary of json-ld, microdata, and opengraph lists.
Each of the lists present within the dictionary contains multiple dictionaries.
"""
metadata = extruct.extract(text,
uniform=True,
syntaxes=['json-ld',
'microdata',
'opengraph'])
assert isinstance(metadata, object)
return metadata


@utils.timeit
# def dblp(search_term: str, g, results):
def search(search_term: str, results):

try:

base_url = utils.config["search_url_dblp"]
url = base_url + search_term

headers = {'Accept': 'application/json',
'Content-Type': 'application/json',
'User-Agent': utils.config["request_header_user_agent"]
}
response = requests.get(url, headers=headers, timeout=int(utils.config["request_timeout"]))

logger.debug(f'DBLP response status code: {response.status_code}')
logger.debug(f'DBLP response headers: {response.headers}')

# TODO unclear why here are only a few but now all results returned

metadata = extract_metadata(response.content)
# TODO unclear why this loop takes so long
#The profiler indicates that the JSON-LD parsing process is responsible for the majority of the execution time, taking approximately 18.21 seconds.
#
# I.e. the JSON-LD parsing takes that long
for data in metadata['microdata']:
if data['@type'] == 'Person':
'''
results.append(
Person(
name=data["name"],
url=data["url"],
affiliation=""
)
)
'''
elif data['@type'] == 'ScholarlyArticle':
if 'author' in data:
url = ''
if 'url' in data:
if type(data["url"]) == list:
url = ', '.join(data["url"])
else:
url = data["url"]
publication = Article()
publication.source = 'DBLP'
publication.name = data["name"]
publication.url = url
publication.image = data["image"]
publication.description = ''
publication.abstract = ''
publication.keywords.append('')
publication.inLanguage.append("")
publication.datePublished = data["datePublished"]
publication.license = ''
author = Person()
author.type = 'Person'
if type(data["author"]) == list:
#author = ', '.join([authors["name"] for authors in data["author"]])
for authors in data["author"]:
author2 = Person()
author2.name = authors["name"]
author2.type = 'Person'
publication.author.append(author2)
elif type(data["author"]) == dict:
author.name = data["author"]["name"]
publication.author.append(author)
else:
author.name = data["author"]
publication.author.append(author)
publication.encoding_contentUrl = ''
publication.encodingFormat = ''

results['publications'].append(publication)
'''
results.append(
Article(
title=data["name"],
url=url,
authors=author,
description='',
date=data["datePublished"]
)
)
'''
logger.info(f"Got {len(results)} Researchers and scholarly articls from DBLP")
# return results
# g.parse(data=json.dumps(data), format='json-ld')
# logger.info(f"Graph g has {len(g)} statements after querying DBLP.")

except requests.exceptions.Timeout as ex:
logger.error(f'Timed out Exception: {str(ex)}')
results['timedout_sources'].append('DBLP')

except Exception as ex:
logger.error(f'Exception: {str(ex)}')
Loading

0 comments on commit fbed53e

Please sign in to comment.