forked from jongminyoon/fever
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdoc_db.py
53 lines (41 loc) · 1.53 KB
/
doc_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Adapted and modified from https://github.com/sheffieldnlp/fever-baselines/tree/master/src/scripts
# which is adapted from https://github.com/facebookresearch/DrQA/blob/master/scripts/retriever/build_db.py
# Copyright 2017-present, Facebook, Inc.
# All rights reserved.
"""Documents, in a sqlite database."""
import sqlite3
import utils
class DocDB(object):
"""Sqlite backed document storage.
Implements get_doc_text(doc_id).
"""
def __init__(self, db_path=None):
self.path = db_path
self.connection = sqlite3.connect(self.path, check_same_thread=False)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def path(self):
"""Return the path to the file that backs this database."""
return self.path
def close(self):
"""Close the connection to the database."""
self.connection.close()
def get_doc_ids(self):
"""Fetch all ids of docs stored in the db."""
cursor = self.connection.cursor()
cursor.execute("SELECT id FROM documents")
results = [r[0] for r in cursor.fetchall()]
cursor.close()
return results
def get_doc_text(self, doc_id):
"""Fetch the raw text of the doc for 'doc_id'."""
cursor = self.connection.cursor()
cursor.execute(
"SELECT text FROM documents WHERE id = ?",
(utils.normalize(doc_id),)
)
result = cursor.fetchone()
cursor.close()
return result if result is None else result[0]