|
| 1 | +import pprint |
| 2 | + |
| 3 | +from nose.tools import eq_ |
| 4 | + |
| 5 | +from .. import arxiv |
| 6 | +from ...identifier import Identifier |
| 7 | + |
| 8 | +INPUT_TEXT = """ |
| 9 | +This is a doi randomly placed in the text 10.0000/m1 |
| 10 | +Here's a typo that might be construed as a doi 10.60 people were there. |
| 11 | +{{cite|...|arxiv=0706.0001v1|pmid=10559875}} |
| 12 | +<ref>Halfaker, A., Geiger, R. S., Morgan, J. T., & Riedl, J. (2012). |
| 13 | +The rise and decline of an open collaboration system: How Wikipedia’s |
| 14 | +reaction to popularity is causing its decline. |
| 15 | +American Behavioral Scientist, |
| 16 | +0002764212469365 arxiv:0706.0002v1</ref>. Hats pants and banana |
| 17 | +[http://arxiv.org/0706.0003] |
| 18 | +[http://arxiv.org/abs/0706.0004v1] |
| 19 | +[https://arxiv.org/abs/0706.0005v1] |
| 20 | +[https://arxiv.org/abs/math.GT/0309001] |
| 21 | +[https://arxiv.org/abs/-math.gs/0309002] |
| 22 | +{{cite|...|arxiv=foobar.hats/0101003|issue=1656}} |
| 23 | +http://www.google.com/sky/#latitude=3.362&longitude=160.1238441&zoom= |
| 24 | +10.2387/234310.2347/39423 |
| 25 | +<!-- |
| 26 | + 10.2387/234310.2347/39423--> |
| 27 | +""" |
| 28 | +EXPECTED = [ |
| 29 | + Identifier('arxiv', "0706.0001"), |
| 30 | + Identifier('arxiv', "0706.0002"), |
| 31 | + Identifier('arxiv', "0706.0003"), |
| 32 | + Identifier('arxiv', "0706.0004"), |
| 33 | + Identifier('arxiv', "0706.0005"), |
| 34 | + Identifier('arxiv', "math.gt/0309001"), |
| 35 | + Identifier('arxiv', "math.gs/0309002"), |
| 36 | + Identifier('arxiv', "foobar.hats/0101003") |
| 37 | +] |
| 38 | + |
| 39 | +def test_extract(): |
| 40 | + ids = list(arxiv.extract(INPUT_TEXT)) |
| 41 | + pprint.pprint(ids) |
| 42 | + pprint.pprint(EXPECTED) |
| 43 | + eq_(ids, EXPECTED) |
0 commit comments