Skip to content

Commit

Permalink
Clean up imports + expand travis smoke testing to include USA
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulMcInnis committed Aug 31, 2020
1 parent 9b95f26 commit daf1979
Show file tree
Hide file tree
Showing 11 changed files with 40 additions and 44 deletions.
8 changes: 7 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@ install:
before_script:
- 'flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics'
script:
- 'funnel -s demo/settings.yaml'
- 'funnel -s demo/settings.yaml -log-level DEBUG'
# NOTE: we might want to make below search somewhere else so it isn't
# so very specific.
- 'funnel -s demo/settings.yaml -kw Python Data Scientist PHD AI -ps WA -c Seattle -l USA_ENGLISH -log-level DEBUG'
- 'pytest --cov=jobfunnel --cov-report=xml'
# - './tests/verify-artifacts.sh' TODO: verify that JSON exist and are good
# - './tests/verify_time.sh' TODO: some way of verifying execution time
after_success:
- 'bash <(curl -s https://codecov.io/bash)'
# - './demo/gen_call_graphs.sh' TODO: some way of showing .dot on GitHub?
9 changes: 4 additions & 5 deletions jobfunnel/backend/scrapers/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
"""The base scraper class to be used for all web-scraping emitting Job objects
"""
import logging
import os
import random
import sys
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor, as_completed
from multiprocessing import Lock, Manager
from time import sleep, time
from typing import Any, Dict, List, Optional, Tuple, Union
from time import sleep
from typing import Any, Dict, List, Optional

from bs4 import BeautifulSoup
from requests import Session
Expand All @@ -23,8 +20,10 @@
from jobfunnel.resources import (MAX_CPU_WORKERS, USER_AGENT_LIST, JobField,
Locale)

# pylint: disable=using-constant-test,unused-import
if False: # or typing.TYPE_CHECKING if python3.5.3+
from jobfunnel.config import JobFunnelConfigManager
# pylint: enable=using-constant-test,unused-import


class BaseScraper(ABC, Logger):
Expand Down
11 changes: 5 additions & 6 deletions jobfunnel/backend/scrapers/glassdoor.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
"""Scraper for www.glassdoor.X
FIXME: this is currently unable to get past page 1 of job results.
"""
import logging
import re
from abc import abstractmethod
from concurrent.futures import ThreadPoolExecutor, wait
from datetime import date, datetime, timedelta
from math import ceil
from time import sleep, time
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Tuple, Union

from bs4 import BeautifulSoup
from requests import Session

from jobfunnel.backend import Job, JobStatus
from jobfunnel.backend import Job
from jobfunnel.backend.scrapers.base import (BaseCANEngScraper, BaseScraper,
BaseUSAEngScraper)
from jobfunnel.backend.tools import get_webdriver
from jobfunnel.backend.tools.filters import JobFilter
from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str
from jobfunnel.resources import MAX_CPU_WORKERS, JobField, Locale
from jobfunnel.resources import MAX_CPU_WORKERS, JobField

# pylint: disable=using-constant-test,unused-import
if False: # or typing.TYPE_CHECKING if python3.5.3+
from jobfunnel.config import JobFunnelConfigManager
# pylint: enable=using-constant-test,unused-import


MAX_GLASSDOOR_LOCATIONS_TO_RETURN = 10
Expand Down
15 changes: 6 additions & 9 deletions jobfunnel/backend/scrapers/indeed.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
"""Scraper designed to get jobs from www.indeed.X
"""
import logging
import re
from abc import abstractmethod
from concurrent.futures import ThreadPoolExecutor, wait
from datetime import date, datetime, timedelta
from math import ceil
from time import sleep, time
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional

from bs4 import BeautifulSoup
from requests import Session

from jobfunnel.backend import Job, JobStatus
from jobfunnel.backend import Job
from jobfunnel.backend.scrapers.base import (BaseCANEngScraper, BaseScraper,
BaseUSAEngScraper)
from jobfunnel.backend.tools.filters import JobFilter
from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str
from jobfunnel.resources import MAX_CPU_WORKERS, JobField, Locale
from jobfunnel.resources import MAX_CPU_WORKERS, JobField

# pylint: disable=using-constant-test,unused-import
if False: # or typing.TYPE_CHECKING if python3.5.3+
from jobfunnel.config import JobFunnelConfigManager

# pylint: enable=using-constant-test,unused-import

ID_REGEX = re.compile(r'id=\"sj_([a-zA-Z0-9]*)\"')
MAX_RESULTS_PER_INDEED_PAGE = 50
Expand Down Expand Up @@ -210,7 +207,7 @@ def _get_search_url(self, method: Optional[str] = 'get') -> str:
self.config.search_config.domain,
self.query,
self.config.search_config.city.replace(' ', '+',),
self.config.search_config.province_or_state,
self.config.search_config.province_or_state.upper(),
self._convert_radius(self.config.search_config.radius),
self.max_results_per_page,
int(self.config.search_config.return_similar_results)
Expand Down
12 changes: 5 additions & 7 deletions jobfunnel/backend/scrapers/monster.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
"""Scrapers for www.monster.X
"""
import logging
import re
from abc import abstractmethod
from concurrent.futures import ThreadPoolExecutor, wait
from datetime import date, datetime, timedelta
from math import ceil
from time import sleep, time
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional

from bs4 import BeautifulSoup
from requests import Session

from jobfunnel.backend import Job, JobStatus
from jobfunnel.backend import Job
from jobfunnel.backend.scrapers.base import (BaseCANEngScraper, BaseScraper,
BaseUSAEngScraper)
from jobfunnel.backend.tools.filters import JobFilter
from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str
from jobfunnel.resources import MAX_CPU_WORKERS, JobField, Locale
from jobfunnel.resources import JobField

# pylint: disable=using-constant-test,unused-import
if False: # or typing.TYPE_CHECKING if python3.5.3+
from jobfunnel.config import JobFunnelConfigManager
# pylint: enable=using-constant-test,unused-import


MAX_RESULTS_PER_MONSTER_PAGE = 25
Expand Down
6 changes: 2 additions & 4 deletions jobfunnel/backend/tools/delay.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
"""
from math import ceil, log, sqrt
from random import uniform
from time import time
from typing import Dict, List, Union
from typing import List, Union

from numpy import arange
from scipy.special import expit
from scipy.special import expit # pylint: disable=no-name-in-module

from jobfunnel.backend import Job
from jobfunnel.config import DelayConfig
from jobfunnel.resources import DelayAlgorithm

Expand Down
4 changes: 3 additions & 1 deletion jobfunnel/backend/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ def get_logger(logger_name: str, level: int, file_path: str,
"""
logger = logging.getLogger(logger_name)
logger.setLevel(level)
logging.basicConfig(filename=file_path, level=level)
formatter = logging.Formatter(message_format)
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(formatter)
logger.addHandler(stdout_handler)
file_handler = logging.FileHandler(file_path)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger


Expand Down
12 changes: 4 additions & 8 deletions jobfunnel/config/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
"""Configuration parsing module for CLI --> JobFunnelConfigManager
"""
import argparse
import logging
import os
from typing import Any, Dict, List

import yaml

from jobfunnel.config import (SETTINGS_YAML_SCHEMA, DelayConfig,
JobFunnelConfigManager, ProxyConfig,
SearchConfig, SettingsValidator)
from jobfunnel.config import (DelayConfig, JobFunnelConfigManager,
ProxyConfig, SearchConfig, SettingsValidator)
from jobfunnel.resources import (LOG_LEVEL_NAMES, DelayAlgorithm, Locale,
Provider)
from jobfunnel.resources.defaults import *
Expand Down Expand Up @@ -85,14 +82,13 @@ def parse_cli():
)

parser.add_argument(
'-lf',
dest='log_file',
'-log-file',
type=str,
help=f'path to logging file. defaults to {DEFAULT_LOG_FILE}'
)

parser.add_argument(
'--log-level',
'-log-level',
type=str,
default=DEFAULT_LOG_LEVEL_NAME,
choices=LOG_LEVEL_NAMES,
Expand Down
4 changes: 3 additions & 1 deletion jobfunnel/config/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from jobfunnel.config import BaseConfig, DelayConfig, ProxyConfig, SearchConfig
from jobfunnel.resources import BS4_PARSER

# pylint: disable=using-constant-test,unused-import
if False: # or typing.TYPE_CHECKING if python3.5.3+
from jobfunnel.backend.scrapers.base import BaseScraper
from jobfunnel.config import JobFunnelConfigManager
# pylint: enable=using-constant-test,unused-import


class JobFunnelConfigManager(BaseConfig):
Expand Down
1 change: 0 additions & 1 deletion jobfunnel/config/settings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Settings YAML Schema w/ validator
"""
import ipaddress
import logging

from cerberus import Validator

Expand Down
2 changes: 1 addition & 1 deletion jobfunnel/resources/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
DEFAULT_RECOVER = False
DEFAULT_RETURN_SIMILAR_RESULTS = False
DEFAULT_SAVE_DUPLICATES = False
DEFAULT_RANDOM_DELAY= False
DEFAULT_RANDOM_DELAY = False
DEFAULT_RANDOM_CONVERGING_DELAY = False

# Defaults we use from localization, the scraper can always override it.
Expand Down

0 comments on commit daf1979

Please sign in to comment.