-
Notifications
You must be signed in to change notification settings - Fork 4
/
models.py
77 lines (62 loc) · 2.4 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy_repr import PrettyRepresentableBase
import pandas as pd
from biorxiv_scraper import baseurl
db = SQLAlchemy(model_class=PrettyRepresentableBase)
class Biorxiv(db.Model):
source = db.Column(db.String(10), default='biorxiv')
id = db.Column(db.String, primary_key=True)
created = db.Column(db.DateTime)
title = db.Column(db.String)
parse_status = db.Column(db.Integer, default=0, nullable=False)
_parse_data = db.Column('parse_data', db.String)
_pages = db.Column('pages', db.String, default='[]', nullable=False)
page_count = db.Column('page_count', db.Integer, default=0, nullable=False)
posted_date = db.Column(db.String(10), default='')
_author_contact = db.Column('author_contact', db.String)
email_sent = db.Column(db.Integer)
@hybrid_property
def parse_data(self):
if self._parse_data:
return pd.read_json(self._parse_data)
else:
return pd.DataFrame(columns=['fn', 'cm', 'pct_cm', 'pct_page'])
@parse_data.setter
def parse_data(self, df):
self._parse_data = df.reset_index().to_json()
@hybrid_property
def pages(self):
return json.loads(self._pages)
@pages.setter
def pages(self, lst):
self._pages = json.dumps(lst)
@hybrid_property
def pages_str(self):
if len(self.pages) == 0:
raise ValueError("Can't pretty print if pages = []")
if len(self.pages) == 1:
return "page {}".format(self.pages[0])
if len(self.pages) == 2:
return "pages {} and {}".format(*self.pages)
else:
pretty = ", ".join([str(p) for p in self.pages[:-1]])
pretty = 'pages {}, and {}'.format(pretty, self.pages[-1])
return pretty
@hybrid_property
def author_contact(self):
if self._author_contact:
return json.loads(self._author_contact)
return None
@author_contact.setter
def author_contact(self, data):
self._author_contact = json.dumps(data)
@hybrid_property
def url(self):
return baseurl(self.id)
@hybrid_property
def pdf_url(self):
return "https://www.biorxiv.org/content/10.1101/{}.full.pdf".format(self.id)
class Test(Biorxiv):
pass