-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathidentList2gramps.py
167 lines (146 loc) · 6.81 KB
/
identList2gramps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 29 23:05:13 2020
Function to convert csv family tree (idented list) to Gramps CSV
@author: luiz
"""
fileName = "/media/luiz/Volume/Dokumente/Fotografien und Videos/ÁrvoreGen/A GRANDE ÁRVORE - Medeiros (csv export).csv"
import pandas as pd
import numpy as np
import individuum as ind
data = pd.read_csv(fileName, delimiter=';')
def isInt(s):
try:
int(s)
return True
except:
return False
# User preferences:
buildGraph = False
# Initializing vars:
nameList = []
genList = []
symbList = []
nameSet = set([])
if buildGraph == True:
indivArr = np.array([])
else:
indivSet = set([])
nextIsName = False
nextIsChild = True
blockBegin = True
ident = 0
for iRow, row in enumerate(data.itertuples()):
generation = 0
for iCol, element in enumerate(row):
if element == '|':
generation = generation + 1
if iRow >= 2 and iCol >= 2 and iCol <= 11:
if type(element) == str and nextIsName == True: #and len(element) > 2: # looks like a name
nameList.append(element) # preserve order
genList.append(generation) # put current generation
if element not in nameSet:
if blockBegin == True:
indiv = ind.individuum(name = element, bithday = row[16], death = row[17])
elif nextIsChild == False:
indiv = ind.individuum(name = element, bithday = row[16], death = row[17])
else:
nameArr = np.array(nameList)
genArr = np.array(genList)
symbArr = np.array(symbList)
if buildGraph == False:
parent1 = str(nameArr[genArr == generation - 1][-1]) # last person whose generation is the last
#parent2 = str(nameArr[genArr == generation - 1 and (symbArr[:-1] != 'z')][-2]) # before last person whose generation is the last
parent2 = str(nameArr[np.logical_and(genArr == generation - 1, symbArr != 'z')][-1]) # before last person whose generation is the last
else:
parent1 = indivArr[genArr[:-1] == generation - 1][-1] # last person whose generation is the last
parent2 = indivArr[np.logical_and(genArr[:-1] == generation - 1, symbArr[:-1] != 'z')][-1] # before last person whose generation is the last
indiv = ind.individuum(name = element, father = parent2, mother = parent1, birthday = row[17], death = row[18], bplace = row[16]) # parents will be inverted in some cases, but... there is no gender info in the table
nameSet.add(element) # prevent adding the same person again
indiv.ident = ident
ident = ident + 1
if buildGraph == False:
indivSet.add(indiv)
else:
indivArr = np.append(indivArr, indiv)
nextIsName = False
elif type(element) == str and len(element) == 1: # is one-character long
if element == 'z': # next element is non-filio, but spouse or partner
symbList.append('z')
blockBegin = False
nextIsChild = False #
nextIsName = True
elif element == 'v': # begin of a new block
symbList.append('v')
blockBegin = True#
nextIsName = True
elif isInt(element): # next element: counting children
symbList.append(element)
blockBegin = False
nextIsChild = True
nextIsName = True
famSet = set()
for indiv in indivSet:
famSet.add((indiv.mater, indiv.pater))
# now going to build a dataframe
headLines = "Person Surname Given Call Suffix Prefix Title Gender Birth date Birth place Birth source Baptism date Baptism place Baptism source Death date Death place Death source Burial date Burial place Burial source Note".split(' ')
dataTb = pd.DataFrame([])
dataDict = dict([])
for indiv in indivSet:
dataDict['id1'] = indiv.ident#id
dataDict['Person'] = '[I' + "{:04d}".format(indiv.ident) + ']' #id
# if there is a nickname between brackets:
if indiv.nomen.find('(') != -1:
dataDict['Nick'] = indiv.nomen[indiv.nomen.find("(")+1:indiv.nomen.find(")")]
name = indiv.nomen[:indiv.nomen.find('(')].rstrip()
else:
dataDict['Nick'] = ''
name = indiv.nomen
if len(name.split(' ')) > 1: # if there is a complete name
dataDict['Surname'] = name.split(' ')[-1] # last
dataDict['Given'] = ' '.join(name.split(' ')[:-1])
else: # if the name is only one word
dataDict['Given'] = name # until any '(' and remove whitespaces at the end
dataDict['Surname'] = ''
dataDict['Full name'] = indiv.nomen
dataDict['Birth date'] = indiv.natalis
dataDict['Death date'] = indiv.mortis
dataDict['Birth place'] = indiv.locNatalis
dataTb = dataTb.append(dataDict, ignore_index=True)
famId = 0
marrDict = dict([])
marrTb = pd.DataFrame([])
for fam in famSet:
if fam == (None, None): # case both parents are not given
continue # skip, go to the next
marrDict['id2'] = famId
marrDict['Marriage'] = '[F' + "{:04d}".format(famId) + ']'
hb = int(dataTb.loc[(dataTb['Full name'] == fam[1])]['id1'])
wf = int(dataTb.loc[(dataTb['Full name'] == fam[0])]['id1'])
marrDict['Husband'] = '[I' + "{:04d}".format(hb) + ']'
marrDict['Wife'] = '[I' + "{:04d}".format(wf) + ']'
marrDict['Husband Name'] = fam[1]
marrDict['Wife Name'] = fam[0]
famId = famId + 1
marrTb = marrTb.append(marrDict, ignore_index=True)
famDict = dict([])
famTb = pd.DataFrame([])
for indiv in indivSet:
if indiv.mater != None and indiv.pater != None: # both parents are given/listed
indFam = marrTb.loc[(marrTb['Husband Name'] == indiv.pater) & (marrTb['Wife Name'] == indiv.mater)] # locating family of individual
famDict['Family'] = '[F' + "{:04d}".format(int(indFam['id2'])) + ']'
famDict['Child'] = '[I' + "{:04d}".format(indiv.ident) + ']'
famTb = famTb.append(famDict, ignore_index=True)
elif indiv.mater != None: # only father is given
print('ignoring for now family for: ' + indiv.nomen)
elif indiv.pater != None:
print('ignoring for now family for: ' + indiv.nomen)
completeTb = dataTb
completeTb = completeTb.join(marrTb, how='outer')
completeTb = completeTb.join(famTb, how='outer')
completeTb.to_csv('/media/luiz/Volume/Dokumente/Fotografien und Videos/ÁrvoreGen/out.csv')
# with open('csvfile.csv','wb') as file:
# for line in text:
# file.write(line)
# file.write('\n')