Skip to content

Commit

Permalink
adding stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
yoachim committed Dec 23, 2015
1 parent 7586a80 commit 66bfb94
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 12 deletions.
7 changes: 7 additions & 0 deletions notes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,10 @@ https://ui.adsabs.harvard.edu/#search/q=author%3A%22Yoachim%2C+P%22&sort=date+de
so now I just need to figure out if the API will let me download that network, then I'm almost done!
Then I don't have to roll my own silly graph network with networkX or anything https://networkx.github.io/index.html


---------

Fun way to spin the paper might be, ranking astro phd depts via where their grads end up (like the other paper), and then also by how fast their grads leave the field. I suspect they would be similar, but possibly different.


Probably need to do some screening on making sure the author is actually an "astronomer", and not a kinda-related physics phd.
60 changes: 48 additions & 12 deletions python/grabPhdClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,26 @@
import numpy as np
import difflib

def inAstroJ(toTest):
# Check if a list of publications includes at least one in
# an "astronomy journal". Maybe also AAS or IAU meeting abstract.
if not hasattr(inAstro, journalList):
inAstro.journalList = [
'The Astrophysical Journal',
'The Astronomical Journal', #XXX-add more
]

# If it's a string, just test it
if type(toTest) is str:
return toTest in inAstro.journalList
else:
# Else, loop through
for journal in toTest:
if journal in inAstro.journalList:
return True
return False


def authSimple(author):
"""
reduce a name string to Last, FI
Expand Down Expand Up @@ -87,15 +107,27 @@ def affClean(aff):
def checkAffMatch(aff1,aff2, matchThresh=0.70):
"""
See if two affiliations are similar enough that we think they match
Make them lower case so things will work since some affiliations
end up as all caps.
"""
result = False
if aff1 == aff2:
# Bail out if either one is None
if aff1 is None:
return False
if aff2 is None:
return False
# Bail out if either one is a '-'
if (aff1 == '-') | (aff2 == '-'):
return False

if aff1.lower() == aff2.lower():
return True
if difflib.SequenceMatcher(None, aff1,aff2).ratio() > matchThresh:
if difflib.SequenceMatcher(None, aff1.lower(),aff2.lower()).ratio() > matchThresh:
return True
if aff1 in aff2:
if aff1.lower() in aff2.lower():
return True
if aff2 in aff1:
if aff2.lower() in aff1.lower():
return True
return result

Expand Down Expand Up @@ -137,7 +169,7 @@ def checkAuthorMatch(article1,article2,authorName=None,
for name, aff in zip(article2.author,article2.aff):
if authSimple(name) == authSimple(authorName):
aff2 = affClean(aff)
if aff1 is not None:
if (aff1 is not None) & (aff2 is not None):
if checkAffMatch(aff1,aff2,matchThresh=matchThresh):
return True
# If they share nCommonRefs
Expand Down Expand Up @@ -170,7 +202,7 @@ def grabPhdClass(year):
return ack

def authorsPapers(author, year=None):
ack = list(ads.query(authors=author, year=year, database='astronomy', rows='all'))
ack = list(ads.query(authors=author, dates=year, database='astronomy', rows='all'))
return ack


Expand All @@ -194,12 +226,16 @@ def test1():
paperList = authorsPapers(article.author[0], year=years)


name = 'Yoachim, P'
myPapers = authorsPapers(name)
# Try linking my publications
mineLinked, myG = authorGroup(myPapers, myPapers[-1], name)
years = [int(paper.year) for paper in mineLinked]
nx.draw_spring(myG, node_color=years)
def test2():
name = 'Yoachim, P'
myPapers = authorsPapers(name)
# Try linking my publications
mineLinked, myG = authorGroup(myPapers, myPapers[-1], name)
years = [int(paper.year) for paper in mineLinked]
nx.draw_spring(myG, node_color=years)

# What do I want my final output to be?
# (name, phd year, phd bibcode, phd.aff, latest paper bibcode, latest year, latest aff, latest 1st author bibcode, latest 1st year, latest 1st aff, largest publication gap)


#XXX--ok, next up, test it on an author with a common name. Then start writing some I/O functions for
7 changes: 7 additions & 0 deletions python/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from grabphdClass import *

# Just gather a few test cases where I personally know what the result should be

if __module__ == '__main__':

# My phd bibcode:

0 comments on commit 66bfb94

Please sign in to comment.