Skip to content

Commit

Permalink
Merge pull request #101 from AlexMathew/feature/validate-config
Browse files Browse the repository at this point in the history
Validate configuration file before run
  • Loading branch information
AlexMathew authored Apr 7, 2017
2 parents 090a64c + 5e3a771 commit 6624a92
Show file tree
Hide file tree
Showing 17 changed files with 89 additions and 38 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# All configuration values have a default; values that are commented out
# serve to show the default.

import sys
import os
import sys

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

Expand Down
9 changes: 6 additions & 3 deletions scrapple/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@
"""

from __future__ import print_function
from docopt import docopt

from operator import itemgetter

from docopt import docopt

from scrapple.utils.dynamicdispatch import get_command_class
from scrapple.utils.exceptions import check_arguments, InvalidType, \
InvalidSelector, InvalidOutputType, InvalidProjectName, InvalidLevels
from scrapple.utils.exceptions import (InvalidLevels, InvalidOutputType,
InvalidProjectName, InvalidSelector,
InvalidType, check_arguments)

POSSIBLE_EXCEPTIONS = (
InvalidType,
Expand Down
9 changes: 6 additions & 3 deletions scrapple/commands/genconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@
"""

from __future__ import print_function
from jinja2 import Template
import os

import json
from colorama import init, Fore, Back
import os

from colorama import Back, Fore, init
from jinja2 import Template

import scrapple
from scrapple.commands import command


class GenconfigCommand(command.Command):
"""
Defines the execution of :ref:`genconfig <command-genconfig>`
Expand Down
9 changes: 6 additions & 3 deletions scrapple/commands/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@
"""

from __future__ import print_function
from jinja2 import Template
import os

import json
from colorama import init, Fore, Back
import os

from colorama import Back, Fore, init
from jinja2 import Template

import scrapple
from scrapple.commands import command


class GenerateCommand(command.Command):
"""
Defines the execution of :ref:`generate <command-generate>`
Expand Down
13 changes: 10 additions & 3 deletions scrapple/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
"""

from __future__ import print_function

import os
from colorama import init, Fore, Back

from colorama import Back, Fore, init

from scrapple.commands import command
from scrapple.selectors import xpath, css
from scrapple.utils.config import traverse_next, extract_fieldnames
from scrapple.selectors import css, xpath
from scrapple.utils.config import (InvalidConfigException, extract_fieldnames,
traverse_next, validate_config)


class RunCommand(command.Command):
"""
Expand Down Expand Up @@ -57,13 +61,16 @@ def execute_command(self):
import json
with open(self.args['<projectname>'] + '.json', 'r') as f:
self.config = json.load(f)
validate_config(self.config)
self.run()
except ValueError:
print(Back.WHITE + Fore.RED + "Use 0, 1 or 2 for verbosity." \
+ Back.RESET + Fore.RESET, sep="")
except IOError:
print(Back.WHITE + Fore.RED + self.args['<projectname>'], ".json does not ", \
"exist. Use ``scrapple genconfig``." + Back.RESET + Fore.RESET, sep="")
except InvalidConfigException as e:
print(Back.WHITE + Fore.RED + e + Back.RESET + Fore.RESET, sep="")


def run(self):
Expand Down
7 changes: 5 additions & 2 deletions scrapple/commands/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@
"""

from __future__ import print_function
from flask import Flask, render_template, request

import webbrowser
from multiprocessing import Process
from colorama import init, Fore, Back

from colorama import Back, Fore, init
from flask import Flask, render_template, request

from scrapple.commands import command
from scrapple.utils.form import form_to_json


class WebCommand(command.Command):
"""
Defines the execution of :ref:`web <command-web>`
Expand Down
8 changes: 5 additions & 3 deletions scrapple/selectors/css.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@
"""

from __future__ import print_function

from lxml import cssselect

from scrapple.selectors.selector import Selector
from scrapple.utils.text import make_ascii

try:
from urlparse import urljoin
except ImportError:
from urllib.parse import urljoin

from scrapple.selectors.selector import Selector
from scrapple.utils.text import make_ascii


class CssSelector(Selector):
"""
Expand Down
4 changes: 3 additions & 1 deletion scrapple/selectors/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
"""

from __future__ import print_function

import random

import requests
from lxml import etree
import random

requests.warnings.filterwarnings('ignore')

Expand Down
9 changes: 5 additions & 4 deletions scrapple/selectors/xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@

from __future__ import print_function

from lxml.etree import XPathError

from scrapple.selectors.selector import Selector
from scrapple.utils.text import make_ascii

try:
from urlparse import urljoin
except ImportError:
from urllib.parse import urljoin

from lxml.etree import XPathError
from scrapple.selectors.selector import Selector
from scrapple.utils.text import make_ascii


class XpathSelector(Selector):
"""
Expand Down
24 changes: 23 additions & 1 deletion scrapple/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@
"""

from __future__ import print_function
from colorama import init, Fore, Back

from colorama import Back, Fore, init

init()


class InvalidConfigException(Exception):
"""Exception class for invalid config file. Example: duplicate field names"""
pass


def traverse_next(page, nextx, results, tabular_data_headers=[], verbosity=0):
"""
Recursive generator to traverse through the next attribute and \
Expand Down Expand Up @@ -52,6 +58,22 @@ def traverse_next(page, nextx, results, tabular_data_headers=[], verbosity=0):
yield (tdh, result)


def validate_config(config):
"""
Validates the extractor configuration file. Ensures that there are no duplicate field names, etc.
:param config: The configuration file that contains the specification of the extractor
:return: True if config is valid, else raises a exception that specifies the correction to be made
"""
fields = [f for f in get_fields(config)]
if len(fields) != len(set(fields)):
raise InvalidConfigException(
"Invalid configuration file - %d duplicate field names" % len(fields) - len(set(fields))
)
return True


def get_fields(config):
"""
Recursive generator that yields the field names in the config file
Expand Down
1 change: 1 addition & 0 deletions scrapple/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import re


class InvalidType(ValueError):
"""Exception class for invalid type in arguments."""
pass
Expand Down
4 changes: 2 additions & 2 deletions scrapple/utils/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
Functions related to form handling.
"""

import os
import json
import itertools
import json
import os


def form_to_json(form):
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from __future__ import print_function

from subprocess import call

try:
from setuptools import setup, find_packages
except ImportError:
Expand Down Expand Up @@ -54,7 +56,6 @@
tests_require=test_requirements
)

from subprocess import call
print("Setting up argument completion")
x = call(["bash", "scrapple.sh"])

Expand Down
2 changes: 1 addition & 1 deletion tests/test_cmd.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from nose.tools import assert_equals
from docopt import docopt
from nose.tools import assert_equals

from scrapple import cmd

Expand Down
5 changes: 3 additions & 2 deletions tests/test_genconfig.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from nose.tools import assert_equals, assert_list_equal, assert_is_instance
from docopt import docopt
import json
import os

from docopt import docopt
from nose.tools import assert_equals, assert_is_instance, assert_list_equal

from scrapple import cmd
from scrapple.commands import genconfig

Expand Down
9 changes: 5 additions & 4 deletions tests/test_generate.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from nose.tools import assert_is_instance, assert_in
from docopt import docopt
import os
import sys
from contextlib import contextmanager

from docopt import docopt
from nose.tools import assert_in, assert_is_instance

from scrapple import cmd
from scrapple.commands import generate

doc = cmd.__doc__


import sys
from contextlib import contextmanager
try:
from StringIO import StringIO
except ImportError:
Expand Down
9 changes: 5 additions & 4 deletions tests/test_run.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from nose.tools import assert_is_instance, assert_in, assert_dict_equal
from docopt import docopt
import json
import os
import sys
from contextlib import contextmanager

from docopt import docopt
from nose.tools import assert_dict_equal, assert_in, assert_is_instance

from scrapple import cmd
from scrapple.commands import run

doc = cmd.__doc__


import sys
from contextlib import contextmanager
try:
from StringIO import StringIO
except ImportError:
Expand Down

0 comments on commit 6624a92

Please sign in to comment.