From 16ea578eecca2779ca4beb3a1595cc45930a22d1 Mon Sep 17 00:00:00 2001 From: omersafakbebek Date: Mon, 25 Dec 2023 06:46:23 +0300 Subject: [PATCH 1/3] added semantic search service --- ludos/semantic-search/.gitignore | 42 ++++++++++++++++++++++++ ludos/semantic-search/Dockerfile | 8 +++++ ludos/semantic-search/deploy.sh | 6 ++++ ludos/semantic-search/docker-compose.yml | 8 +++++ ludos/semantic-search/main.py | 18 ++++++++++ ludos/semantic-search/requirements.txt | 38 +++++++++++++++++++++ ludos/semantic-search/semantic_search.py | 10 ++++++ 7 files changed, 130 insertions(+) create mode 100644 ludos/semantic-search/.gitignore create mode 100644 ludos/semantic-search/Dockerfile create mode 100755 ludos/semantic-search/deploy.sh create mode 100644 ludos/semantic-search/docker-compose.yml create mode 100644 ludos/semantic-search/main.py create mode 100644 ludos/semantic-search/requirements.txt create mode 100644 ludos/semantic-search/semantic_search.py diff --git a/ludos/semantic-search/.gitignore b/ludos/semantic-search/.gitignore new file mode 100644 index 00000000..71bacced --- /dev/null +++ b/ludos/semantic-search/.gitignore @@ -0,0 +1,42 @@ +# compiled output +/dist +/node_modules + +# Logs +logs +*.log +npm-debug.log* +pnpm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# OS +.DS_Store + +# Tests +/coverage +/.nyc_output + +# IDEs and editors +/.idea +.project +.classpath +.c9/ +*.launch +.settings/ +*.sublime-workspace + +# IDE - VSCode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +.env + +venv +semantic.tar.gz +__pycache__ + diff --git a/ludos/semantic-search/Dockerfile b/ludos/semantic-search/Dockerfile new file mode 100644 index 00000000..77b2161a --- /dev/null +++ b/ludos/semantic-search/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.10.1 +WORKDIR /app +COPY requirements.txt . +RUN pip install -r requirements.txt +COPY semantic_search.py . +RUN python semantic_search.py +COPY main.py . +CMD python main.py \ No newline at end of file diff --git a/ludos/semantic-search/deploy.sh b/ludos/semantic-search/deploy.sh new file mode 100755 index 00000000..96109507 --- /dev/null +++ b/ludos/semantic-search/deploy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +docker build . -t semantic +docker save semantic | gzip > semantic.tar.gz +rsync --rsync-path="sudo rsync" -r -avh -e "ssh -i ~/ludos-semantic.pem" ./semantic.tar.gz ubuntu@3.77.226.88:~/semantic --delete +ssh -i ~/ludos-semantic.pem ubuntu@3.77.226.88 "sudo docker container stop semantic && sudo docker container rm semantic && cd ~/semantic && sudo docker load -i semantic.tar.gz && sudo docker run -d --name semantic -p 8000:8000 semantic && docker image prune -af && docker builder prune -f && docker builder prune -af --filter until=10m && docker volume prune -af" +``` \ No newline at end of file diff --git a/ludos/semantic-search/docker-compose.yml b/ludos/semantic-search/docker-compose.yml new file mode 100644 index 00000000..4c7b4bf3 --- /dev/null +++ b/ludos/semantic-search/docker-compose.yml @@ -0,0 +1,8 @@ +version: '3.9' + +services: + semantic: + image: ludos-semantic + build: . + ports: + - 8000:8000 \ No newline at end of file diff --git a/ludos/semantic-search/main.py b/ludos/semantic-search/main.py new file mode 100644 index 00000000..32813d29 --- /dev/null +++ b/ludos/semantic-search/main.py @@ -0,0 +1,18 @@ +from flask import Flask, request, jsonify +from waitress import serve +from semantic_search import find_hits +app = Flask(__name__) + +@app.route('/search/', methods=['POST']) +def search(searchKey): + body = request.get_json() + items = body['items'] + corpus = list(map(lambda item: item["text"], items)) + hits = find_hits(searchKey, corpus) + response = [] + for hit in hits: + response.append({"id": items[hit['corpus_id']]["id"], "text": items[hit['corpus_id']]["text"], "score": hit['score']}) + return jsonify(response) +if __name__ == "__main__": + print("Starting server...") + serve(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/ludos/semantic-search/requirements.txt b/ludos/semantic-search/requirements.txt new file mode 100644 index 00000000..1821e1a4 --- /dev/null +++ b/ludos/semantic-search/requirements.txt @@ -0,0 +1,38 @@ +blinker==1.7.0 +certifi==2023.11.17 +charset-normalizer==3.3.2 +click==8.1.7 +filelock==3.13.1 +Flask==3.0.0 +fsspec==2023.12.2 +huggingface-hub==0.20.1 +idna==3.6 +itsdangerous==2.1.2 +Jinja2==3.1.2 +joblib==1.3.2 +MarkupSafe==2.1.3 +mpmath==1.3.0 +networkx==3.2.1 +nltk==3.8.1 +numpy==1.26.2 +packaging==23.2 +Pillow==10.1.0 +PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 +safetensors==0.4.1 +scikit-learn==1.3.2 +scipy==1.11.4 +sentence-transformers==2.2.2 +sentencepiece==0.1.99 +sympy==1.12 +threadpoolctl==3.2.0 +tokenizers==0.15.0 +torch==2.1.2 +torchvision==0.16.2 +tqdm==4.66.1 +transformers==4.36.2 +typing_extensions==4.9.0 +urllib3==2.1.0 +waitress==2.1.2 +Werkzeug==3.0.1 diff --git a/ludos/semantic-search/semantic_search.py b/ludos/semantic-search/semantic_search.py new file mode 100644 index 00000000..3589a656 --- /dev/null +++ b/ludos/semantic-search/semantic_search.py @@ -0,0 +1,10 @@ +from sentence_transformers import SentenceTransformer, util +embedder = SentenceTransformer('all-MiniLM-L6-v2') + + +def find_hits(query, corpus): + query_embedding = embedder.encode(query, convert_to_tensor=True) + corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) + hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5) + hits = hits[0] + return hits \ No newline at end of file From 254e76f410ede048ba8032b7256610bd9ff1f6bc Mon Sep 17 00:00:00 2001 From: omersafakbebek Date: Mon, 25 Dec 2023 15:26:32 +0300 Subject: [PATCH 2/3] added semantic search endpoint --- .../src/controllers/search.controller.ts | 19 ++ .../response/semantic-search.response.dto.ts | 64 +++++ .../interfaces/semantic/response.interface.ts | 4 + .../services/config/typeorm-config.service.ts | 4 +- ludos/backend/src/services/search.service.ts | 238 +++++++++++++++++- 5 files changed, 324 insertions(+), 5 deletions(-) create mode 100644 ludos/backend/src/dtos/search/response/semantic-search.response.dto.ts create mode 100644 ludos/backend/src/interfaces/semantic/response.interface.ts diff --git a/ludos/backend/src/controllers/search.controller.ts b/ludos/backend/src/controllers/search.controller.ts index 285cf591..84f63c02 100644 --- a/ludos/backend/src/controllers/search.controller.ts +++ b/ludos/backend/src/controllers/search.controller.ts @@ -9,6 +9,7 @@ import { } from '@nestjs/swagger'; import { AuthorizedRequest } from '../interfaces/common/authorized-request.interface'; import { SerializerInterceptor } from '../interceptors/customSerializer.interceptor'; +import { SemanticSearchResponseDto } from '../dtos/search/response/semantic-search.response.dto'; @Controller('search') @ApiTags('search') @@ -33,4 +34,22 @@ export class SearchController { req.user ? req.user.id : undefined, ); } + @ApiOkResponse({ + type: SemanticSearchResponseDto, + }) + @ApiBearerAuth() + @ApiOperation({ + summary: 'Semantic Search for users, games, groups and posts', + }) + @UseInterceptors(new SerializerInterceptor(SemanticSearchResponseDto)) + @Get('/semantic/:searchKey') + async semanticSearch( + @Req() req: AuthorizedRequest, + @Param('searchKey') searchKey: string, + ): Promise { + return await this.searchService.semanticSearch( + searchKey, + req.user ? req.user.id : undefined, + ); + } } diff --git a/ludos/backend/src/dtos/search/response/semantic-search.response.dto.ts b/ludos/backend/src/dtos/search/response/semantic-search.response.dto.ts new file mode 100644 index 00000000..9db23e70 --- /dev/null +++ b/ludos/backend/src/dtos/search/response/semantic-search.response.dto.ts @@ -0,0 +1,64 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { GameListResponseDto } from '../../game/response/list.response'; +import { PostListResponseDto } from '../../post/response/list.response.dto'; +import { UserInOtherResponsesDto } from '../../user/response/user-in-other-responses.dto'; +import { Expose, Type } from 'class-transformer'; +import { GroupListResponseDto } from '../../group/response/list.response.dto'; +export class UserSemanticResponseDto { + @ApiProperty({ type: () => UserInOtherResponsesDto }) + @Expose() + @Type(() => UserInOtherResponsesDto) + item: UserInOtherResponsesDto; + + @ApiProperty() + @Expose() + score: number; +} +export class GameSemanticResponseDto { + @ApiProperty({ type: () => GameListResponseDto }) + @Expose() + @Type(() => GameListResponseDto) + item: GameListResponseDto; + + @ApiProperty() + @Expose() + score: number; +} +export class PostSemanticResponseDto { + @ApiProperty({ type: () => PostListResponseDto }) + @Expose() + @Type(() => PostListResponseDto) + item: PostListResponseDto; + + @ApiProperty() + @Expose() + score: number; +} +export class GroupSemanticResponseDto { + @ApiProperty({ type: () => GroupListResponseDto }) + @Expose() + @Type(() => GroupListResponseDto) + item: GroupListResponseDto; + + @ApiProperty() + @Expose() + score: number; +} +export class SemanticSearchResponseDto { + @ApiProperty({ type: () => [UserSemanticResponseDto] }) + @Type(() => UserSemanticResponseDto) + @Expose() + users: UserSemanticResponseDto[]; + @ApiProperty({ type: () => [GameSemanticResponseDto] }) + @Type(() => GameSemanticResponseDto) + @Expose() + games: GameSemanticResponseDto[]; + @ApiProperty({ type: () => [PostSemanticResponseDto] }) + @Type(() => PostSemanticResponseDto) + @Expose() + posts: PostSemanticResponseDto[]; + @ApiProperty({ type: () => [GroupSemanticResponseDto] }) + @Type(() => GroupSemanticResponseDto) + @Expose() + groups: GroupSemanticResponseDto[]; +} diff --git a/ludos/backend/src/interfaces/semantic/response.interface.ts b/ludos/backend/src/interfaces/semantic/response.interface.ts new file mode 100644 index 00000000..2f486ea5 --- /dev/null +++ b/ludos/backend/src/interfaces/semantic/response.interface.ts @@ -0,0 +1,4 @@ +export interface SemanticResponse { + id: string; + score: number; +} diff --git a/ludos/backend/src/services/config/typeorm-config.service.ts b/ludos/backend/src/services/config/typeorm-config.service.ts index f2cf969c..e9391350 100644 --- a/ludos/backend/src/services/config/typeorm-config.service.ts +++ b/ludos/backend/src/services/config/typeorm-config.service.ts @@ -30,9 +30,9 @@ export class TypeOrmConfigService implements TypeOrmOptionsFactory { ssl: { rejectUnauthorized: false, }, - }, + }, }), - + entities: [ User, ResetPassword, diff --git a/ludos/backend/src/services/search.service.ts b/ludos/backend/src/services/search.service.ts index 279a5246..097b7a09 100644 --- a/ludos/backend/src/services/search.service.ts +++ b/ludos/backend/src/services/search.service.ts @@ -1,9 +1,18 @@ import { Injectable } from '@nestjs/common'; -import { UserRepository } from '../repositories/user.repository'; -import { GameRepository } from '../repositories/game.repository'; -import { PostRepository } from '../repositories/post.repository'; +import axios from 'axios'; import { SearchResponseDto } from '../dtos/search/response/search.response.dto'; +import { + GameSemanticResponseDto, + GroupSemanticResponseDto, + PostSemanticResponseDto, + SemanticSearchResponseDto, + UserSemanticResponseDto, +} from '../dtos/search/response/semantic-search.response.dto'; +import { SemanticResponse } from '../interfaces/semantic/response.interface'; +import { GameRepository } from '../repositories/game.repository'; import { GroupRepository } from '../repositories/group.repository'; +import { PostRepository } from '../repositories/post.repository'; +import { UserRepository } from '../repositories/user.repository'; @Injectable() export class SearchService { @@ -55,4 +64,227 @@ export class SearchService { groups: groups.items, }; } + public async semanticSearch( + searchKey: string, + userId?: string, + ): Promise { + const users = await this.userRepository.findUsers(1, 1000); + const games = await this.gameRepository.findGames( + 1, + 1000, + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + userId, + ); + const posts = await this.postRepository.findPosts( + 1, + 1000, + undefined, + undefined, + undefined, + undefined, + userId, + ); + const groups = await this.groupRepository.findGroups( + 1, + 1000, + undefined, + undefined, + undefined, + undefined, + userId, + ); + let usersUsernameSemanticSearchResult: SemanticResponse[] = []; + try { + usersUsernameSemanticSearchResult = ( + await axios.post(`http://104.248.19.88:8000/search/${searchKey}`, { + items: users.items.map((user) => { + return { + id: user.id, + text: user.username, + }; + }), + }) + ).data; + } catch (error) { + usersUsernameSemanticSearchResult = []; + } + + const items = []; + users.items.forEach((user) => { + if (user.fullName != null) { + items.push({ + id: user.id, + text: user.fullName, + }); + } + }); + let usersFullNameSemanticSearchResult: SemanticResponse[] = []; + try { + usersFullNameSemanticSearchResult = ( + await axios.post(`http://104.248.19.88:8000/search/${searchKey}`, { + items: items, + }) + ).data; + } catch (error) { + usersFullNameSemanticSearchResult = []; + } + let gamesTitleSemanticSearchResult: SemanticResponse[] = []; + try { + gamesTitleSemanticSearchResult = ( + await axios.post(`http://104.248.19.88:8000/search/${searchKey}`, { + items: games.items.map((game) => { + return { + id: game.id, + text: game.title, + }; + }), + }) + ).data; + } catch (error) { + gamesTitleSemanticSearchResult = []; + } + let postsTitleSemanticSearchResult: SemanticResponse[] = []; + try { + postsTitleSemanticSearchResult = ( + await axios.post(`http://104.248.19.88:8000/search/${searchKey}`, { + items: posts.items.map((post) => { + return { + id: post.id, + text: post.title, + }; + }), + }) + ).data; + } catch (error) { + postsTitleSemanticSearchResult = []; + } + let postsBodySemanticSearchResult: SemanticResponse[] = []; + try { + postsBodySemanticSearchResult = ( + await axios.post(`http://104.248.19.88:8000/search/${searchKey}`, { + items: posts.items.map((post) => { + return { + id: post.id, + text: post.body, + }; + }), + }) + ).data; + } catch (error) { + postsBodySemanticSearchResult = []; + } + let groupsNameSemanticSearchResult: SemanticResponse[] = []; + try { + groupsNameSemanticSearchResult = ( + await axios.post(`http://104.248.19.88:8000/search/${searchKey}`, { + items: groups.items.map((group) => { + return { + id: group.id, + text: group.name, + }; + }), + }) + ).data; + } catch (error) { + groupsNameSemanticSearchResult = []; + } + + const groupsResponse: GroupSemanticResponseDto[] = []; + groupsNameSemanticSearchResult.forEach((group: SemanticResponse) => { + if (group.score > 0.5) { + groupsResponse.push({ + item: groups.items.find((g) => g.id === group.id), + score: group.score, + }); + } + }); + const gamesResponse: GameSemanticResponseDto[] = []; + gamesTitleSemanticSearchResult.forEach((game: SemanticResponse) => { + if (game.score > 0.5) { + gamesResponse.push({ + item: games.items.find((g) => g.id === game.id), + score: game.score, + }); + } + }); + const usersResponse: UserSemanticResponseDto[] = []; + usersUsernameSemanticSearchResult.forEach((user: SemanticResponse) => { + if (user.score > 0.5) { + const sameUser = usersFullNameSemanticSearchResult.find( + (u) => u.id === user.id, + ); + if (sameUser) { + if (sameUser.score > user.score) { + usersResponse.push({ + item: users.items.find((u) => u.id === user.id), + score: sameUser.score, + }); + usersFullNameSemanticSearchResult.splice( + usersFullNameSemanticSearchResult.indexOf(sameUser), + 1, + ); + return; + } + } + usersResponse.push({ + item: users.items.find((u) => u.id === user.id), + score: user.score, + }); + } + }); + usersFullNameSemanticSearchResult.forEach((user: SemanticResponse) => { + if (user.score > 0.5) { + usersResponse.push({ + item: users.items.find((u) => u.id === user.id), + score: user.score, + }); + } + }); + const postsResponse: PostSemanticResponseDto[] = []; + postsTitleSemanticSearchResult.forEach((post: SemanticResponse) => { + if (post.score > 0.5) { + const samePost = postsBodySemanticSearchResult.find( + (p) => p.id === post.id, + ); + if (samePost) { + if (samePost.score > post.score) { + postsResponse.push({ + item: posts.items.find((p) => p.id === post.id), + score: samePost.score, + }); + postsBodySemanticSearchResult.splice( + postsBodySemanticSearchResult.indexOf(samePost), + 1, + ); + return; + } + } + postsResponse.push({ + item: posts.items.find((p) => p.id === post.id), + score: post.score, + }); + } + }); + postsBodySemanticSearchResult.forEach((post: SemanticResponse) => { + if (post.score > 0.5) { + postsResponse.push({ + item: posts.items.find((p) => p.id === post.id), + score: post.score, + }); + } + }); + + return { + users: usersResponse, + games: gamesResponse, + posts: postsResponse, + groups: groupsResponse, + }; + } } From 443d53ebde34b0524b293322c39a31675ea1b645 Mon Sep 17 00:00:00 2001 From: omersafakbebek Date: Mon, 25 Dec 2023 15:29:28 +0300 Subject: [PATCH 3/3] removed deploy.sh --- ludos/semantic-search/deploy.sh | 6 ------ 1 file changed, 6 deletions(-) delete mode 100755 ludos/semantic-search/deploy.sh diff --git a/ludos/semantic-search/deploy.sh b/ludos/semantic-search/deploy.sh deleted file mode 100755 index 96109507..00000000 --- a/ludos/semantic-search/deploy.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -docker build . -t semantic -docker save semantic | gzip > semantic.tar.gz -rsync --rsync-path="sudo rsync" -r -avh -e "ssh -i ~/ludos-semantic.pem" ./semantic.tar.gz ubuntu@3.77.226.88:~/semantic --delete -ssh -i ~/ludos-semantic.pem ubuntu@3.77.226.88 "sudo docker container stop semantic && sudo docker container rm semantic && cd ~/semantic && sudo docker load -i semantic.tar.gz && sudo docker run -d --name semantic -p 8000:8000 semantic && docker image prune -af && docker builder prune -f && docker builder prune -af --filter until=10m && docker volume prune -af" -``` \ No newline at end of file