Skip to content

Commit

Permalink
Massive improvements to testing infrastructure
Browse files Browse the repository at this point in the history
  • Loading branch information
Sterling Paramore committed Sep 2, 2017
1 parent b827cf2 commit 27c458a
Show file tree
Hide file tree
Showing 14 changed files with 1,753 additions and 691 deletions.
545 changes: 545 additions & 0 deletions CasesAndScenarios.ipynb

Large diffs are not rendered by default.

111 changes: 111 additions & 0 deletions MarkdownTable.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "AssertionError",
"evalue": "DataFrame are different\n\nDataFrame shape mismatch\n[left]: (2, 4)\n[right]: (1, 4)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-0d5fbcebcabc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m \u001b[0massert_frame_equal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpected\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactual\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/miniconda3/envs/pemi/lib/python3.6/site-packages/pandas/util/testing.py\u001b[0m in \u001b[0;36massert_frame_equal\u001b[0;34m(left, right, check_dtype, check_index_type, check_column_type, check_frame_type, check_less_precise, check_names, by_blocks, check_exact, check_datetimelike_compat, check_categorical, check_like, obj)\u001b[0m\n\u001b[1;32m 1397\u001b[0m \u001b[0;34m'DataFrame shape mismatch'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1398\u001b[0m \u001b[0;34m'({0}, {1})'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1399\u001b[0;31m '({0}, {1})'.format(*right.shape))\n\u001b[0m\u001b[1;32m 1400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1401\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcheck_like\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/pemi/lib/python3.6/site-packages/pandas/util/testing.py\u001b[0m in \u001b[0;36mraise_assert_detail\u001b[0;34m(obj, message, left, right, diff)\u001b[0m\n\u001b[1;32m 1147\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"\\n[diff]: {diff}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdiff\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdiff\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1149\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1151\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAssertionError\u001b[0m: DataFrame are different\n\nDataFrame shape mismatch\n[left]: (2, 4)\n[right]: (1, 4)"
]
}
],
"source": [
"import importlib\n",
"import pemi.testing\n",
"importlib.reload(pemi.testing)\n",
"\n",
"from pandas.util.testing import assert_frame_equal\n",
"\n",
"schema = {\n",
" 'alpha': { 'type': 'string' },\n",
" 'beta': { 'type': 'integer' },\n",
" 'gamma': { 'type': 'date', 'in_format': '%m/%d/%Y' }\n",
"}\n",
"\n",
"expected = pemi.testing.MarkdownTable(\n",
" '''\n",
" | alpha | beta | gamma | delta |\n",
" | - | - | - | - |\n",
" | one | 2 | 5/3/2017 | purple |\n",
" | one | 2 | 5/3/2017 | purple |\n",
" ''',\n",
" schema\n",
").df\n",
"\n",
"actual = pemi.testing.MarkdownTable(\n",
" '''\n",
" | alpha | beta | gamma | delta |\n",
" | - | - | - | - |\n",
" | one | 2 | 5/3/2017 | purple |\n",
" ''',\n",
" schema\n",
").df\n",
"\n",
"\n",
"assert_frame_equal(expected, actual)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"2\n",
"3\n"
]
}
],
"source": [
"class Mofo():\n",
" def __init__(self):\n",
" self.x = 0\n",
" \n",
" @property\n",
" def happypants(self):\n",
" self.x += 1\n",
" print(self.x)\n",
" \n",
"m = Mofo()\n",
"m.happypants\n",
"m.happypants\n",
"m.happypants "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
12 changes: 12 additions & 0 deletions beer_sales.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
beer_id,sold_at,quantity,id,name,style_id,abv,price,style
1,2017-01-01,3,1,SpinCycle,1,7.2,4.99,IPA
1,2017-01-04,2,1,SpinCycle,1,7.2,4.99,IPA
1,2017-01-16,4,1,SpinCycle,1,7.2,4.99,IPA
1,2017-01-20,2,1,SpinCycle,1,7.2,4.99,IPA
2,2017-01-06,7,2,OldStyle,2,4.2,3.99,Pale
2,2017-01-16,3,2,OldStyle,2,4.2,3.99,Pale
4,2017-01-06,8,4,AbstRedRibbon,7,3.2,1.99,Unknown id 7
4,2017-01-17,5,4,AbstRedRibbon,7,3.2,1.99,Unknown id 7
3,2017-01-14,1,3,Pipewrench,1,8.3,10.99,IPA
3,2017-01-15,3,3,Pipewrench,1,8.3,10.99,IPA
3,2017-01-20,1,3,Pipewrench,1,8.3,10.99,IPA
5 changes: 5 additions & 0 deletions jobs/fixtures/beers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id|name|style_id|abv|price
1|SpinCycle|1|7.2|4.99
2|OldStyle|2|4.2|3.99
3|Pipewrench|1|8.3|10.99
4|AbstRedRibbon|7|3.2|1.99
12 changes: 12 additions & 0 deletions jobs/fixtures/sales.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
beer_id|sold_at|quantity
1|01/01/2017|3
1|01/04/2017|2
2|01/06/2017|7
4|01/06/2017|8
3|01/14/2017|1
3|01/15/2017|3
2|01/16/2017|3
1|01/16/2017|4
4|01/17/2017|5
1|01/20/2017|2
3|01/20/2017|1
16 changes: 12 additions & 4 deletions jobs/simple_job.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import pandas as pd

import pemi
Expand Down Expand Up @@ -129,7 +131,7 @@ def config(self):
'sources': {
'sales_file': {
'beer_id': {'type': 'integer', 'required': True},
'sold_at': {'type': 'date', 'in_format': '%d/%m/%Y', 'required': True},
'sold_at': {'type': 'date', 'in_format': '%m/%d/%Y', 'required': True},
'quantity': {'type': 'integer', 'required': True}
},
'beers_file': {
Expand All @@ -145,7 +147,7 @@ def config(self):
'beer_id': {'type': 'integer', 'required': True},
'name': {'type': 'string', 'required': True},
'style': {'type': 'string'},
'sold_at': {'type': 'date', 'in_format': '%d/%m/%Y', 'required': True},
'sold_at': {'type': 'date', 'in_format': '%m/%d/%Y', 'required': True},
'quantity': {'type': 'integer', 'required': True},
'unit_price': {'type': 'decimal', 'precision': 16, 'scale': 2},
'sell_price': {'type': 'decimal', 'precision': 16, 'scale': 2}
Expand All @@ -167,15 +169,21 @@ def config(self):
name='sales_file',
pipe=pemi.pipes.csv.LocalCsvFileSourcePipe(
schema=self.schemas['sources']['sales_file'],
paths=['sales.csv']
paths=[Path(__file__).parent / Path('fixtures') / Path('sales.csv')],
csv_opts={
'sep': '|'
}
)
)

self.pipe(
name='beers_file',
pipe=pemi.pipes.csv.LocalCsvFileSourcePipe(
schema=self.schemas['sources']['beers_file'],
paths=['beers.csv']
paths=[Path(__file__).parent / Path('fixtures') / Path('beers.csv')],
csv_opts={
'sep': '|'
}
)
)

Expand Down
1 change: 1 addition & 0 deletions packages.info
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ jupyter
nose2(pip)
dask
graphviz pip
faker(pip)
21 changes: 13 additions & 8 deletions pemi/data_subject.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,29 @@

class DataSubject():
'''
A data subject is just a schema and a generic data object
A data subject is mostly just a schema and a generic data object
'''

def __init__(self, schema=pemi.Schema(), data=None, name=None, stype=None):
def __init__(self, schema=pemi.Schema(), data=None, name=None, stype=None, pipe=None):
self.schema = pemi.Schema(schema)
self.data = data
self.name = name
self.stype = stype

self.pipe = pipe

def __str__(self):
return '<{}({})>'.format(self.__class__.__name__, self.name)
subject_str = '<{}({}) {}>'.format(self.__class__.__name__, self.name, id(self))
if self.pipe:
return '{}.{}'.format(self.pipe, subject_str)
else:
return subject_str


class DataSource(DataSubject):
def __init__(self, schema=pemi.Schema(), data=None, name=None):
super().__init__(schema=schema, data=data, name=name, stype='source')
def __init__(self, **kwargs):
super().__init__(stype='source', **kwargs)


class DataTarget(DataSubject):
def __init__(self, schema=pemi.Schema(), data=None, name=None):
super().__init__(schema=schema, data=data, name=name, stype='target')
def __init__(self, **kwargs):
super().__init__(stype='target', **kwargs)
6 changes: 3 additions & 3 deletions pemi/pipes/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __get_target(daskpipe):
def _node_edge(self, conn):
return {
'{}.targets'.format(conn.from_pipe.name): (DaskPipe(conn.from_pipe), []),
'{}.targets[{}]'.format(conn.from_pipe.name, conn.from_subject): (self._get_target(conn.from_subject), '{}.targets'.format(conn.from_pipe.name)),
'{}.sources[{}]'.format(conn.to_pipe.name, conn.to_subject): (self._connect_to(conn.to_pipe.sources[conn.to_subject], conn), '{}.targets[{}]'.format(conn.from_pipe.name, conn.from_subject)),
'{}.targets'.format(conn.to_pipe.name): (DaskPipe(conn.to_pipe), ['{}.sources[{}]'.format(conn.to_pipe.name, conn.to_subject)])
'{}.targets[{}]'.format(conn.from_pipe.name, conn.from_subject.name): (self._get_target(conn.from_subject.name), '{}.targets'.format(conn.from_pipe.name)),
'{}.sources[{}]'.format(conn.to_pipe.name, conn.to_subject.name): (self._connect_to(conn.to_pipe.sources[conn.to_subject.name], conn), '{}.targets[{}]'.format(conn.from_pipe.name, conn.from_subject.name)),
'{}.targets'.format(conn.to_pipe.name): (DaskPipe(conn.to_pipe), ['{}.sources[{}]'.format(conn.to_pipe.name, conn.to_subject.name)])
}
52 changes: 36 additions & 16 deletions pemi/pipes/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,41 @@
from collections import OrderedDict

class PipeConnection():
def __init__(self, pipe, target):
self.from_pipe = pipe
self.from_subject = target

def to(self, pipe, source):
self.to_pipe = pipe
self.to_subject = source
def __init__(self, parent, from_subject):
self.parent = parent
self.from_pipe_name = from_subject.pipe.name
self.from_subject_name = from_subject.name

def to(self, to_subject):
self.to_pipe_name = to_subject.pipe.name
self.to_subject_name = to_subject.name
return self

@property
def from_pipe(self):
return self.parent.pipes[self.from_pipe_name]

@property
def to_pipe(self):
return self.parent.pipes[self.to_pipe_name]

@property
def from_subject(self):
return self.from_pipe.targets[self.from_subject_name]

@property
def to_subject(self):
return self.to_pipe.sources[self.to_subject_name]

def connect(self):
self.to_pipe.sources[self.to_subject] = self.from_pipe.targets[self.from_subject]
self.to_subject.data = self.from_subject.data

def __str__(self):
return 'PipeConnection: {from_pipe}[{from_subject}] -> {to_pipe}[{to_subject}]'.format(
from_pipe=self.from_pipe,
from_subject=self.from_subject,
to_pipe=self.to_pipe,
to_subject=self.to_subject
return 'PipeConnection: {}.{} -> {}.{}'.format(
self.from_pipe,
self.from_subject,
self.to_pipe,
self.to_subject
)

def __repr__(self):
Expand All @@ -47,12 +64,14 @@ def config(self):

def source(self, name, schema=pemi.Schema()):
self.sources[name] = pemi.DataSource(
pipe=self,
name=name,
schema=schema
)

def target(self, name, schema=pemi.Schema()):
self.targets[name] = pemi.DataTarget(
pipe=self,
name=name,
schema=schema
)
Expand All @@ -62,8 +81,9 @@ def pipe(self, name, pipe):
self.pipes[name] = pipe


def connect(self, pipe, target):
conn = PipeConnection(pipe, target)
def connect(self, connect_from):
conn = PipeConnection(self, connect_from)

self.connections.append(conn)
return conn

Expand All @@ -72,7 +92,7 @@ def flow(self):
raise NotImplementedError

def __str__(self):
return "<{}({})>".format(self.__class__.__name__, self.name)
return "<{}({}) {}>".format(self.__class__.__name__, self.name, id(self))


class SourcePipe(Pipe):
Expand Down
Loading

0 comments on commit 27c458a

Please sign in to comment.