diff --git a/.dockerignore b/.dockerignore index 17e091e..d53e735 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,4 +7,8 @@ __pycache__ .gitignore README.md run.sh -Jenkinsfile \ No newline at end of file +Jenkinsfile +.mypy_cache/ +.dmypy.json +dmypy.json +.vscode \ No newline at end of file diff --git a/scripts/console b/scripts/console index aaa4509..c0884f1 100755 --- a/scripts/console +++ b/scripts/console @@ -1,6 +1,6 @@ #!/bin/bash -# scripts/console: Open a container instance of the applicaiton for testing/debugging +# scripts/console: Open a container instance of the application for testing/debugging # Once in a terminal you can run, for instance: "black " to reformat a file -docker run --rm -it -v "$PWD":/code --workdir /code python:3.9-slim-buster sh -c "pip install -r src/requirements.txt -r requirements.txt && sh" +docker run --rm -it -v "$PWD":/code --network=host --workdir /code python:3.9-slim-buster sh -c "pip install -r src/requirements.txt -r requirements.txt && sh" diff --git a/scripts/localsuperlinter b/scripts/localsuperlinter new file mode 100755 index 0000000..1cc257c --- /dev/null +++ b/scripts/localsuperlinter @@ -0,0 +1,13 @@ +#!/bin/bash + +# At the root of the project you can run this command to simulate super linter (you may want to tweak the env vars). +docker run --rm \ + -e RUN_LOCAL=true \ + -e USE_FIND_ALGORITHM=true \ + -e VALIDATE_PYTHON=true \ + -e VALIDATE_PYTHON_BLACK=true \ + -e VALIDATE_PYTHON_FLAKE=true \ + -e IGNORE_GENERATED_FILES=true \ + -v /"${PWD}":/tmp/lint \ + -w /tmp/lint \ + github/super-linter \ No newline at end of file diff --git a/src/copy_member_to_members.py b/src/copy_member_to_members.py index 979eef5..9afe65a 100644 --- a/src/copy_member_to_members.py +++ b/src/copy_member_to_members.py @@ -43,9 +43,29 @@ def main(): es_client.ingest.put_pipeline( PIPELINE_ID, { - "description": "Removes the 'virtualStudies' field", + "description": "Copy all members", "processors": [ - {"remove": {"field": "virtualStudies", "ignore_missing": True}} + { + "remove": { + "description": "Removes the 'virtualStudies' field", + "field": "virtualStudies", + "ignore_missing": True, + } + }, + { + "script": { + "lang": "painless", + "source": """ + def searchText = []; + for (def x : [ctx['firstName'], ctx['lastName'], ctx['institution']]) { + if (x != null && !x.trim().isEmpty() && !searchText.contains(x.toLowerCase())) { + searchText.add(x.toLowerCase()) + } + ctx['searchText'] = searchText + } + """, + } + }, ], }, ) diff --git a/src/mappings.py b/src/mappings.py index 98e3b10..a50ccdc 100644 --- a/src/mappings.py +++ b/src/mappings.py @@ -19,6 +19,13 @@ }, "member_ascii_folding": {"type": "asciifolding", "preserve_original": True}, }, + "normalizer": { + "custom_normalizer": { + "type": "custom", + "char_filter": [], + "filter": "lowercase", + } + }, } } @@ -177,6 +184,7 @@ "analyzer": "autocomplete", "fields": {"raw": {"type": "keyword"}}, }, + "searchText": {"type": "keyword", "normalizer": "custom_normalizer"}, } }, } diff --git a/src/transform.py b/src/transform.py index 8356031..18aa5d6 100644 --- a/src/transform.py +++ b/src/transform.py @@ -12,14 +12,17 @@ def transform_event_to_docs(event, index, omit): a generator with docs""" for record in event["Records"]: payload = loads(record["body"]) + first_name = payload.get("firstName") + last_name = payload.get("lastName") + institution = payload.get("institution") yield dict( filter( lambda x: x[0] not in omit if len(omit) > 0 else True, { "_index": index, "_id": payload["_id"], - "firstName": payload.get("firstName"), - "lastName": payload.get("lastName"), + "firstName": first_name, + "lastName": last_name, "email": payload.get("email"), "hashedEmail": payload.get("hashedEmail"), "institutionalEmail": payload.get("institutionalEmail"), @@ -29,7 +32,7 @@ def transform_event_to_docs(event, index, omit): "roles": payload.get("roles"), "title": payload.get("title"), "jobTitle": payload.get("jobTitle"), - "institution": payload.get("institution"), + "institution": institution, "city": payload.get("city"), "state": payload.get("state"), "country": payload.get("country"), @@ -43,6 +46,9 @@ def transform_event_to_docs(event, index, omit): ], "linkedin": payload.get("linkedin", ""), "website": payload.get("website", ""), + "searchText": list( + set(filter(None, [first_name, last_name, institution])) + ), }.items(), ) )