-
Notifications
You must be signed in to change notification settings - Fork 1
/
extract_entities.py
84 lines (63 loc) · 2.64 KB
/
extract_entities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#import openai
from config import openai_api_key
#openai.api_key = openai_api_key
def extract_entities(text):
prompt = f"""
The following entities are present in Indian Languages.
Please extract the following entities from the text:
Name, pin code, phone number, gender, occupation, and address.
Provide the summary of the text in exact below format:
Name is ......., pin code is ........, phone number is ........, gender is ........, occupation is ........, Address is ............ .
Text: "{text}"
Summary:
Detailed view:
Original language: {text}
Text: "{text}"
Summary:
Detailed view:
Original language: {text}
"""
try:
response = openai.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant that extracts information from Indian multilingual text."},
{"role": "user", "content": prompt}
],
max_tokens=500
)
response_text = response.choices[0].message.content
except Exception as e:
return f"Error during OpenAI API call: {e}", "Detailed view not available."
# Process the response to extract summary and detailed transcription
if "Detailed view:" in response_text:
parts = response_text.split("Detailed view:")
summary_part = parts[0].strip()
detailed_transcription_part = parts[1].strip()
else:
summary_part = response_text.strip()
detailed_transcription_part = "Detailed view not provided."
# Format the summary and detailed transcription
formatted_summary = format_summary(summary_part)
formatted_detailed_transcription = format_detailed_transcription(detailed_transcription_part)
return formatted_summary, formatted_detailed_transcription
def format_summary(summary):
# Process the summary to remove unnecessary parts
lines = summary.split('\n')
summary_lines = []
is_summary_section = False
for line in lines:
line = line.strip()
if line.startswith("Summary:"):
is_summary_section = True
continue
if is_summary_section:
summary_lines.append(line)
formatted_summary = ' '.join(summary_lines)
return formatted_summary
def format_detailed_transcription(detailed_transcription):
# Process the detailed transcription to ensure proper formatting
lines = detailed_transcription.split('\n')
detailed_lines = [line.strip() for line in lines if line.strip()]
formatted_detailed_transcription = '\n'.join(detailed_lines)
return formatted_detailed_transcription