-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSpotlightEntitySummarizer.php
40 lines (36 loc) · 1.2 KB
/
SpotlightEntitySummarizer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
<?php
$handle = fopen($argv[1], 'r');
$header = fgetcsv($handle);
$header[] = 'Count';
$colsToDiff = [1, 2, 6];
$data = [];
while ($line = fgetcsv($handle)) {
$id = $line[0];
if (!isset($data[$id])) {
$line[] = 1; // initialize count
$data[$id] = $line;
} else {
foreach ($colsToDiff as $col) {
if ($data[$id][$col] != $line[$col]) {
die("Unexpected mismatch in column $col: " . print_r($data[$id], true) . print_r($line, true));
}
}
// merge surface forms:
$forms = explode(',', $data[$id][3]);
$forms[] = $line[3];
$data[$id][3] = implode(',', array_unique($forms));
$data[$id][4] .= ',' . $line[4]; // append offsets
// if similarity score doesn't match, create an average:
if ($data[$id][5] != $line[5]) {
$data[$id][5] = (($data[$id][5] * $data[$id][7]) + $line[5]) / ($data[$id][7] + 1);
}
$data[$id][7]++; // increment count
}
}
fclose($handle);
$handle = fopen(dirname($argv[1]) . '/' . 'summary-' . basename($argv[1]), 'w');
fputcsv($handle, $header);
foreach ($data as $current) {
fputcsv($handle, $current);
}
fclose($handle);