Skip to content

Commit 725edd6

Browse files
committed
Added notebooks to coordinator project
1 parent 3c3e872 commit 725edd6

8 files changed

+15979
-0
lines changed

notebooks/.ipynb_checkpoints/biothings-checkpoint.ipynb

+562
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 2
6+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 38,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import requests\n",
10+
"import pandas as pd\n",
11+
"from io import StringIO"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"# Definitions"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 115,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"def map_entrez_genes(genes, contact=None):\n",
28+
" url = 'https://www.uniprot.org/uploadlists/'\n",
29+
"\n",
30+
" params = {\n",
31+
" 'from':'P_ENTREZGENEID',\n",
32+
" 'to':'ACC',\n",
33+
" 'format':'tab',\n",
34+
" 'query': \" \".join(str(x) for x in genes) if hasattr(genes, '__iter__') else str(genes)\n",
35+
" }\n",
36+
" headers = {'User-Agent': 'Python %s' % contact} if contact else None\n",
37+
"\n",
38+
" r = requests.get(url, params, headers=headers)\n",
39+
" df = pandas.read_csv(StringIO(r.text), sep='\\t')\n",
40+
" df.rename(columns={df.columns[0]: 'entrez_id'}, inplace=True)\n",
41+
" return dict(zip(df['entrez_id'], df['Entry']))\n"
42+
]
43+
},
44+
{
45+
"cell_type": "markdown",
46+
"metadata": {},
47+
"source": [
48+
"# Testing"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": 116,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": [
57+
"id_map = map_entrez_genes([672, 673])"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 117,
63+
"metadata": {},
64+
"outputs": [
65+
{
66+
"data": {
67+
"text/plain": [
68+
"{672: 'P38398', 673: 'P15056'}"
69+
]
70+
},
71+
"execution_count": 117,
72+
"metadata": {},
73+
"output_type": "execute_result"
74+
}
75+
],
76+
"source": [
77+
"id_map"
78+
]
79+
},
80+
{
81+
"cell_type": "markdown",
82+
"metadata": {},
83+
"source": [
84+
"# Experimentation"
85+
]
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": 39,
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"url = 'https://www.uniprot.org/uploadlists/'\n",
94+
"\n",
95+
"params = {\n",
96+
"'from':'P_ENTREZGENEID',\n",
97+
"'to':'ACC',\n",
98+
"'format':'tab',\n",
99+
"'query':'673 672'\n",
100+
"}\n",
101+
"\n",
102+
"# data = urllib.urlencode(params)\n",
103+
"# request = urllib2.Request(url, data)\n",
104+
"# contact = \"\" # Please set a contact email address here to help us debug in case of problems (see https://www.uniprot.org/help/privacy).\n",
105+
"# request.add_header('User-Agent', 'Python %s' % contact)\n",
106+
"# response = urllib2.urlopen(requebst)\n",
107+
"# page = response.read(200000)\n",
108+
"\n",
109+
"r = requests.get(url, params)"
110+
]
111+
},
112+
{
113+
"cell_type": "code",
114+
"execution_count": 40,
115+
"metadata": {},
116+
"outputs": [
117+
{
118+
"data": {
119+
"text/plain": [
120+
"<Response [200]>"
121+
]
122+
},
123+
"execution_count": 40,
124+
"metadata": {},
125+
"output_type": "execute_result"
126+
}
127+
],
128+
"source": [
129+
"r"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": 87,
135+
"metadata": {},
136+
"outputs": [],
137+
"source": [
138+
"df = pandas.read_csv(StringIO(r.text), sep='\\t')\n",
139+
"df.rename(columns={df.columns[0]: 'entrez_id'}, inplace=True)"
140+
]
141+
},
142+
{
143+
"cell_type": "code",
144+
"execution_count": 89,
145+
"metadata": {},
146+
"outputs": [],
147+
"source": [
148+
"id_entries = df[df.Status == 'reviewed'][['entrez_id','Entry']]"
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": 99,
154+
"metadata": {},
155+
"outputs": [
156+
{
157+
"data": {
158+
"text/html": [
159+
"<div>\n",
160+
"<style scoped>\n",
161+
" .dataframe tbody tr th:only-of-type {\n",
162+
" vertical-align: middle;\n",
163+
" }\n",
164+
"\n",
165+
" .dataframe tbody tr th {\n",
166+
" vertical-align: top;\n",
167+
" }\n",
168+
"\n",
169+
" .dataframe thead th {\n",
170+
" text-align: right;\n",
171+
" }\n",
172+
"</style>\n",
173+
"<table border=\"1\" class=\"dataframe\">\n",
174+
" <thead>\n",
175+
" <tr style=\"text-align: right;\">\n",
176+
" <th></th>\n",
177+
" <th>entrez_id</th>\n",
178+
" <th>Entry</th>\n",
179+
" </tr>\n",
180+
" </thead>\n",
181+
" <tbody>\n",
182+
" <tr>\n",
183+
" <th>1</th>\n",
184+
" <td>673</td>\n",
185+
" <td>P15056</td>\n",
186+
" </tr>\n",
187+
" <tr>\n",
188+
" <th>3</th>\n",
189+
" <td>672</td>\n",
190+
" <td>P38398</td>\n",
191+
" </tr>\n",
192+
" </tbody>\n",
193+
"</table>\n",
194+
"</div>"
195+
],
196+
"text/plain": [
197+
" entrez_id Entry\n",
198+
"1 673 P15056\n",
199+
"3 672 P38398"
200+
]
201+
},
202+
"execution_count": 99,
203+
"metadata": {},
204+
"output_type": "execute_result"
205+
}
206+
],
207+
"source": [
208+
"id_entries"
209+
]
210+
},
211+
{
212+
"cell_type": "code",
213+
"execution_count": 98,
214+
"metadata": {},
215+
"outputs": [
216+
{
217+
"data": {
218+
"text/plain": [
219+
"{673: 'P38398', 672: 'P38398'}"
220+
]
221+
},
222+
"execution_count": 98,
223+
"metadata": {},
224+
"output_type": "execute_result"
225+
}
226+
],
227+
"source": [
228+
"id_map"
229+
]
230+
},
231+
{
232+
"cell_type": "code",
233+
"execution_count": 91,
234+
"metadata": {},
235+
"outputs": [],
236+
"source": [
237+
"from IPython.display import HTML"
238+
]
239+
},
240+
{
241+
"cell_type": "code",
242+
"execution_count": 108,
243+
"metadata": {},
244+
"outputs": [
245+
{
246+
"data": {
247+
"text/plain": [
248+
"['P15056', 'P38398']"
249+
]
250+
},
251+
"execution_count": 108,
252+
"metadata": {},
253+
"output_type": "execute_result"
254+
}
255+
],
256+
"source": [
257+
"[x for x in id_entries['Entry']]"
258+
]
259+
},
260+
{
261+
"cell_type": "code",
262+
"execution_count": null,
263+
"metadata": {},
264+
"outputs": [],
265+
"source": [
266+
"HTML(\"<br />\".join(['<a href=\"https://www.uniprot.org/uniprot/%(id)s\">Uniprot: %(id)s</a>' % {'id': x} for x in id_entries['Entry']]))"
267+
]
268+
}
269+
],
270+
"metadata": {
271+
"kernelspec": {
272+
"display_name": "Python 3",
273+
"language": "python",
274+
"name": "python3"
275+
},
276+
"language_info": {
277+
"codemirror_mode": {
278+
"name": "ipython",
279+
"version": 3
280+
},
281+
"file_extension": ".py",
282+
"mimetype": "text/x-python",
283+
"name": "python",
284+
"nbconvert_exporter": "python",
285+
"pygments_lexer": "ipython3",
286+
"version": "3.7.0"
287+
}
288+
},
289+
"nbformat": 4,
290+
"nbformat_minor": 2
291+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 2
6+
}

0 commit comments

Comments
 (0)