Merge pull request #12 from lakshmanaram/master

added degree info feature
skcript · Dec 31, 2016 · d390d63 · d390d63
2 parents cdb93bb + d8c0f47
commit d390d63
Show file tree

Hide file tree

Showing 9 changed files with 435 additions and 105 deletions.
diff --git a/README.md b/README.md
@@ -25,6 +25,7 @@ cvscan add -s "C,C++,R,Java"
 ```
 cvscan remove --skill "C,C++"
 ```
+
 ## Jobs
 ### add
 Adding  
@@ -41,6 +42,7 @@ Removing
 ```
 cvscan remove --job "contributor,Android Programmer"
 ```
+
 ## Organizations
 ### add
 ```
@@ -50,3 +52,72 @@ cvscan add --org "Skcript"
 ```
 cvscan remove -o "Skcript"
 ```
+
+## Qualifications
+Note:  
+* Qualifications are case-sensitive.
+* Puntuations before the first and after the last alphabet should be excluded
+
+### add
+```
+cvscan add -q "B.S,B.Tech,B.Arch"
+```
+### remove
+```
+cvscan remove --qual "B.Arch"
+```
+
+## Extra Information
+### add
+```
+cvscan add -e "machine learning,artificial intelligence"
+```
+### remove
+```
+cvscan remove --extra "machine learning,artificial intelligence"
+```
+
+File Descriptions
+============
+## class Cvscan
+```
+cvscan = Cvscan(name,path)
+```
+#### Extract
+Convert the input file to raw_text and calls parse class method
+```
+cvscan.extract()
+```
+#### Display extracted text
+```
+cvscan.show()
+```
+### Attributes
+| Attributes          | Function |
+|---------------------|-----------|
+|path                 | Stores the path of the resume |
+|raw_text             | Stores the resume as raw text |
+|URLs                 | Stores all the URLs from the resume |
+|name                 | Applicant's name |
+|emails               | Applicant's email |
+|Phone number         | Applicant's contact number |
+|address              | Applicant's address |
+|experience           | Applicant's experience in years |
+|cleaned_resume       | Raw text after removing english stopwords |
+|skills               | Applicant's skillset |
+|qualifications       | Applicant's qualifications |
+|degree_info          | Info about qualification |
+|job_positions        | Applicant's jobs |
+|category             | Applicant's Job category |
+|current_employers    | Organization applicant is working in |
+|employers            | All organizations applicant has worked in |
+|extra_info           | Extra information about the applicant|
+<!--
+## configurations.py
+Contains the regular expressions used throughout the project
+## converter.py
+Contains methods to convert resume from input format to raw text
+#### pdf_to_text
+Uses pdfminer library to fetch raw text from the resume. Special characters and bullets in the resume are replaced with a newline character.  
+This formatted text from the resume is returned.
+ -->
diff --git a/cvscan/__init__.py b/cvscan/__init__.py
@@ -35,7 +35,7 @@ def extract(self):
         if self.raw_text is not '':
             self.parse()
         else:
-            raise ValueError("Error parsing resume.")
+            raise ValueError("Error extracting resume text.")
 
     def parse(self):
         self.URLs = annotations_parser.fetch_pdf_urls(self.path)
@@ -45,10 +45,13 @@ def parse(self):
         self.address = dp.fetch_address(self.raw_text)
         self.experience = dp.calculate_experience(self.raw_text)
         self.cleaned_resume = lp.clean_resume(self.raw_text)
-        self.skills = lp.fetch_skills(self.cleaned_resume)
+        self.skills = dp.fetch_skills(self.cleaned_resume)
+        (self.qualifications,self.degree_info) = dp.fetch_qualifications(
+            self.raw_text)
         self.job_positions, self.category = dp.fetch_jobs(self.cleaned_resume)
         self.current_employers,self.employers = lp.fetch_employers(
             self.raw_text,self.job_positions)
+        self.extra_info = dp.fetch_extra(self.raw_text)
 
     # TODO: Add more fetch here
     def show(self):
@@ -63,5 +66,8 @@ def show(self):
             "jobs" : self.job_positions,
             "job category" : self.category,
             "employers" : self.employers,
-            "current_employers" : self.current_employers
-        }
+            "current_employers" : self.current_employers,
+            "qualifications" : self.qualifications,
+            "qualifications_info" : self.degree_info,
+            "extra_info" : self.extra_info
+        }
diff --git a/cvscan/cli/cli.py b/cvscan/cli/cli.py
@@ -42,21 +42,29 @@ def parse(name):
 @click.option('--org','-o',help='Explicitly add organizations')
 @click.option('--skill','-s',help='Add skills')
 @click.option('--job','-j',help='For adding jobs: -j <job:category>')
-def add(org,skill,job):
+@click.option('--qual','-q',help="Add qualifications")
+@click.option('--extra','-e',help = "Add Extra information")
+def add(org,skill,job,qual,extra):
   """
 
   Add data to be considered\n
   Params: \n
   org Type: comma separated string\n
   skill Type: comma separated string\n
   job Type: comma separated string (comma separated - job:category)\n
+  qual Type: comma separated string\n
   Usage:\n
   For adding organization:\n
   cvscan add --org <org_name,org_name,...>\n
   For adding skill:\n
   cvscan add --skill <skill,skill,...>\n
   For adding job:\n
   cvscan add --job <job:category,job:category,...>\n
+  For adding qualification:\n
+  cvscan add --qual <degree,degree,..>\n
+  punctuations before the first and after the last alphabet are excluded\n
+  For adding extra information:\n
+  cvscan add --extra <extra,extra>\n
   The above can be combined together also. Eg:\n
   cvscan add -o <org_name,org_name,..> -s <skill,skill,..> is also valid
 
@@ -74,29 +82,40 @@ def add(org,skill,job):
       except Exception:
         print "Something wnet wrong: " + Exception
     do.add_jobs(jobs)
-
+  if qual:
+    do.add_qualifications(qual.split(','))
+  if extra:
+    do.add_extra(extra.split(','))
 
 @main.command()
 @click.option('--org','-o',help='Explicitly remove organizations')
 @click.option('--skill','-s',help='Remove skills')
 @click.option('--job','-j',help='For removing jobs -j <job>')
-def remove(org,skill,job):
+@click.option('--qual','-q',help="Remove qualifications")
+@click.option('--extra','-e',help = "Remove Extra information")
+def remove(org,skill,job,qual,extra):
   """
 
   Remove data from consideration\n
   Params:\n
   org Type: comma separated string\n
   skill Type: comma separated string\n  
   job Type: comma separated string\n
+  qual Type: comma separated string\n
   Usage:\n   
-  For adding organization:\n
+  For removing organization:\n
   cvscan remove --org <org_name,org_name,..>\n
-  For adding skill:\n
+  For removing skill:\n
   cvscan remove --skill <skill,skill,..>\n
-  For adding job:\n
+  For removing job:\n
   cvscan remove --job <job,job,..>\n
+  For removing qualification:\n
+  cvscan remove -q <degree,degree,..>\n
+  punctuations before the first and after the last alphabet are excluded\n
+  For removing extra information:\n
+  cvscan remove -e <extra,extra>\n
   The above can be combined together also. Eg:\n
-  cvscan remove -o <org_name,org_name,..> -s <skill,skill,..> -j <job> 
+  cvscan remove -o <org_name,org_name,..> -s <skill,skill,..> -j <job>
   is also valid
 
   """
@@ -105,4 +124,8 @@ def remove(org,skill,job):
   if skill:
     do.remove_skills(skill.split(','))
   if job:
-    do.remove_jobs(job.split(','))
+    do.remove_jobs(job.split(','))
+  if qual:
+    do.remove_qualifications(qual.split(','))
+  if extra:
+    do.remove_extra(extra.split(','))
diff --git a/cvscan/data/extra/extra b/cvscan/data/extra/extra
@@ -0,0 +1,2 @@
+(lp0
+.
diff --git a/cvscan/data/qualifications/degree b/cvscan/data/qualifications/degree
@@ -0,0 +1,14 @@
+(lp0
+S'B.Tech'
+p1
+aS'B.E'
+p2
+aS'B.Arch'
+p3
+aS'B. Tech'
+p4
+aS'M.Tech'
+p5
+aS'M. Tech'
+p6
+a.
diff --git a/cvscan/data_operations.py b/cvscan/data_operations.py
@@ -169,3 +169,95 @@ def remove_jobs(jobs_to_remove):
   with open(DATAPATH +'job_positions/positions','wb') as fp:
     pickle.dump(jobs,fp)
   logging.debug("updated positions file")
+
+
+"""
+
+An Utility function to add qualification to the degree file.
+Params: qualifications Type: List of String
+Qualifications are case-sensitive.
+Care should be taken with the punctuations.
+Exclude punctuations before the first alphabet and after the last alphabet.
+
+"""
+def add_qualifications(quals):
+  with open(DATAPATH + 'qualifications/degree','rb') as fp:
+    qualifications = pickle.load(fp)
+  logging.debug("degree file loaded")
+
+  for qual in quals:
+    if qual not in qualifications:
+      qualifications.append(qual)
+      logging.debug(qual + " added to qualifications")
+
+  with open(DATAPATH + 'qualifications/degree','wb') as fp:
+    pickle.dump(qualifications, fp)
+  logging.debug("degree file written")
+
+
+"""
+
+An Utility function to remove qualification from the degree file.
+Params: qualifications Type: List of String
+Qualifications are case-sensitive.
+Care should be taken with the punctuations.
+Exclude punctuations before the first alphabet and after the last alphabet.
+
+"""
+def remove_qualifications(quals):
+  with open(DATAPATH + 'qualifications/degree','rb') as fp:
+    qualifications = pickle.load(fp)
+  logging.debug("degree file loaded")
+
+  for qual in quals:
+    if qual in qualifications:
+      qualifications.remove(qual)
+      logging.debug(qual + " removed from qualifications")
+
+  with open(DATAPATH + 'qualifications/degree','wb') as fp:
+    pickle.dump(qualifications, fp)
+  logging.debug("degree file written")
+
+
+"""
+
+An Utility function to add extra information to the extra file.
+Params: extra_info Type: List of String
+extra_info are case-sensitive.
+
+"""
+def add_extra(extra_info):
+  with open(DATAPATH + 'extra/extra','rb') as fp:
+    extra = pickle.load(fp)
+  logging.debug("extra file loaded")
+
+  for e in extra_info:
+    if e not in extra:
+      extra.append(e)
+      logging.debug(e + " added to extra information")
+
+  with open(DATAPATH + 'extra/extra','wb') as fp:
+    pickle.dump(extra, fp)
+  logging.debug("extra file written")
+
+
+"""
+
+An Utility function to remove extra information from the extra file.
+Params: extra_info Type: List of String
+Extra informations are case-sensitive.
+
+"""
+def remove_extra(extra_info):
+  with open(DATAPATH + 'extra/extra','rb') as fp:
+    extra = pickle.load(fp)
+  logging.debug("extra file loaded")
+
+  for e in extra_info:
+    if e in extra:
+      extra.remove(e)
+      logging.debug(e + " removed from extra information")
+
+  with open(DATAPATH + 'extra/extra','wb') as fp:
+    pickle.dump(extra, fp)
+  logging.debug("extra file written")