-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanth_script.py
71 lines (59 loc) · 2.63 KB
/
anth_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import anthropic
import re
import csv
client = anthropic.Anthropic(api_key=["API KEY"])
message = """Generate a unique doctor profile in the United States for UX research purposes. Try to not have the same names and each persona should be independent of each other.
Ensure that each profile has complete details and a well-described short biography.
Keep the formatting of each response consistent please.
Include the following characteristics:
Name:
Age:
Gender:
Ethnicity/Race:
Income:
Primary motivations: [a short one-paragraph biography describing the doctor's reasons for becoming a doctor]
Short Biography: [a detailed one-paragraph biography describing the doctor's background, experience, and personality]
"""
# CSV headers
csv_headers = ["Name", "Age", "Gender", "Ethnicity/Race", "Income", "Primary motivations", "Short Biography"]
# regex patterns to extract only required fields
patterns = {
"Name": r"^Name:\s*(.+)$",
"Age": r"^Age:\s*(\d+)$",
"Gender": r"^Gender:\s*(.+)$",
"Ethnicity/Race": r"^Ethnicity/Race:\s*(.+)$",
"Income": r"^Income:\s*(.+)$",
"Primary motivations": r"^Primary motivations:\s*(.+?)(?=\n[A-Z]|$)",
"Short Biography": r"^Short Biography:\s*(.+)$"
}
output_file = "doctor_profiles_anthropic.csv"
with open(output_file, mode="a", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_headers)
writer.writeheader()
for i in range(9189):
try:
print(f"Generating profile {i}...")
reply = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[{"role": "user", "content": message}]
)
if hasattr(reply, 'content') and isinstance(reply.content, list):
text = ""
for block in reply.content:
if hasattr(block, 'text'):
text = block.text.strip()
break
if not text:
raise ValueError("No valid text content found in reply.")
else:
raise ValueError("Reply content is not a valid list.")
data = {header: "" for header in csv_headers}
for header, pattern in patterns.items():
match = re.search(pattern, text, re.MULTILINE)
if match:
data[header] = match.group(1).strip()
writer.writerow(data)
except Exception as e:
print(f"Error during API call for profile {i}: {e}")
writer.writerow({key: "Error" for key in csv_headers})