Skip to content

Commit

Permalink
Merge pull request #108 from IBM/pythoncall_transition
Browse files Browse the repository at this point in the history
Pythoncall transition
  • Loading branch information
ppalmes authored Jun 24, 2022
2 parents 9ff9510 + 297f19c commit 20d00bd
Show file tree
Hide file tree
Showing 11 changed files with 133 additions and 150 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*.swp
.CondaPkg
.DS_Store
.ipynb_checkpoints
*.serialized
Expand Down
5 changes: 5 additions & 0 deletions CondaPkg.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels = ["mkl", "scikit-learn"]

[deps]
scikit-learn = ""
python = ""
14 changes: 7 additions & 7 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
name = "AutoMLPipeline"
uuid = "08437348-eef5-4817-bc1b-d4e9459680d6"
authors = ["Paulito Palmes <[email protected]>"]
version = "0.3.6"
version = "0.4.0"

[deps]
AMLPipelineBase = "e3c3008a-8869-4d53-9f34-c96f99c8a2b6"
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
AMLPipelineBase = "0.1"
Conda = "1.0, 1.1, 1.2, 1.3, 1.4, 1.5"
CondaPkg = "0.2"
DataFrames = "0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 1.0, 1.1"
PyCall = "1.90, 1.91, 1.92"
PythonCall = "0.9"
julia = "1"

[extras]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Dates", "Test", "Random", "PyCall", "Statistics"]
test = ["Dates", "Test", "Random", "PythonCall", "Statistics"]
48 changes: 0 additions & 48 deletions deps/build.jl

This file was deleted.

69 changes: 37 additions & 32 deletions src/skcrossvalidator.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module SKCrossValidators

using PyCall
import PythonCall
const PYC=PythonCall

# standard included modules
using DataFrames
Expand All @@ -11,11 +12,11 @@ using ..Utils
import ..CrossValidators: crossvalidate
export crossvalidate

const metric_dict = Dict{String,PyObject}()
const SKM = PyNULL()
const metric_dict = Dict{String,PYC.Py}()
const SKM = PYC.pynew()

function __init__()
copy!(SKM, pyimport_conda("sklearn.metrics","scikit-learn"))
PYC.pycopy!(SKM, PYC.pyimport("sklearn.metrics"))

metric_dict["roc_auc_score"] = SKM.roc_auc_score
metric_dict["accuracy_score"] = SKM.accuracy_score
Expand Down Expand Up @@ -67,53 +68,57 @@ end
Runs K-fold cross-validation using balanced accuracy as the default. It support the
following metrics for classification:
- accuracy_score
- balanced_accuracy_score
- cohen_kappa_score
- jaccard_score
- matthews_corrcoef
- hamming_loss
- zero_one_loss
- f1_score
- precision_score
- recall_score
- "accuracy_score"
- "balanced_accuracy_score"
- "cohen_kappa_score"
- "jaccard_score"
- "matthews_corrcoef"
- "hamming_loss"
- "zero_one_loss"
- "f1_score"
- "precision_score"
- "recall_score"
and the following metrics for regression:
- mean_squared_error
- mean_squared_log_error
- median_absolute_error
- r2_score
- max_error
- explained_variance_score
- "mean_squared_error"
- "mean_squared_log_error"
- "median_absolute_error"
- "r2_score"
- "max_error"
- "explained_variance_score"
"""
function crossvalidate(pl::Machine,X::DataFrame,Y::Vector,
sfunc::String; nfolds=10,verbose::Bool=true)

YC=Y
if !(eltype(YC) <: Real)
YC = Y |> Vector{String}
end

checkfun(sfunc)
pfunc = metric_dict[sfunc]
metric(a,b) = pfunc(a,b)
crossvalidate(pl,X,Y,metric,nfolds,verbose)
metric(a,b) = pfunc(a,b) |> (x -> PYC.pyconvert(Float64,x))
crossvalidate(pl,X,YC,metric,nfolds,verbose)
end

function crossvalidate(pl::Machine,X::DataFrame,Y::Vector,sfunc::String,folds::Int)
crossvalidate(pl,X,Y,sfunc,nfolds=folds)
function crossvalidate(pl::Machine,X::DataFrame,Y::Vector,sfunc::String,nfolds::Int)
crossvalidate(pl,X,Y,sfunc; nfolds)
end

function crossvalidate(pl::Machine,X::DataFrame,Y::Vector,sfunc::String,verby::Bool)
crossvalidate(pl,X,Y,sfunc,verbose=verby)
function crossvalidate(pl::Machine,X::DataFrame,Y::Vector,sfunc::String,verbose::Bool)
crossvalidate(pl,X,Y,sfunc; verbose)
end

function crossvalidate(pl::Machine,X::DataFrame,Y::Vector,
sfunc::String, folds::Int,verby::Bool)
crossvalidate(pl,X,Y,sfunc,nfolds=folds,verbose=verby)
sfunc::String, nfolds::Int,verbose::Bool)
crossvalidate(pl,X,Y,sfunc; nfolds,verbose)
end



function crossvalidate(pl::Machine,X::DataFrame,Y::Vector,
sfunc::String,averagetype::String,nfolds=10,verbose::Bool=true)
sfunc::String,averagetype::String;nfolds=10,verbose::Bool=true)
checkfun(sfunc)
pfunc = metric_dict[sfunc]
metric(a,b) = pfunc(a,b,average=averagetype)
metric(a,b) = pfunc(a,b,average=averagetype) |> (x -> PYC.pyconvert(Float64,x))
crossvalidate(pl,X,Y,metric,nfolds,verbose)
end

Expand Down
69 changes: 42 additions & 27 deletions src/sklearners.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module SKLearners

using PyCall
import PythonCall
const PYC=PythonCall

# standard included modules
using DataFrames
Expand All @@ -12,31 +13,31 @@ import ..AbsTypes: fit, fit!, transform, transform!
export fit, fit!, transform, transform!
export SKLearner, sklearners

const learner_dict = Dict{String,PyObject}()
const ENS = PyNULL()
const LM = PyNULL()
const DA = PyNULL()
const NN = PyNULL()
const SVM = PyNULL()
const TREE = PyNULL()
const ANN = PyNULL()
const GP = PyNULL()
const KR = PyNULL()
const NB = PyNULL()
const ISO = PyNULL()
const learner_dict = Dict{String,PYC.Py}()
const ENS = PYC.pynew()
const LM = PYC.pynew()
const DA = PYC.pynew()
const NN = PYC.pynew()
const SVM = PYC.pynew()
const TREE = PYC.pynew()
const ANN = PYC.pynew()
const GP = PYC.pynew()
const KR = PYC.pynew()
const NB = PYC.pynew()
const ISO = PYC.pynew()

function __init__()
copy!(ENS , pyimport_conda("sklearn.ensemble","scikit-learn"))
copy!(LM , pyimport_conda("sklearn.linear_model","scikit-learn"))
copy!(DA , pyimport_conda("sklearn.discriminant_analysis","scikit-learn"))
copy!(NN , pyimport_conda("sklearn.neighbors","scikit-learn"))
copy!(SVM , pyimport_conda("sklearn.svm","scikit-learn"))
copy!(TREE, pyimport_conda("sklearn.tree","scikit-learn"))
copy!(ANN , pyimport_conda("sklearn.neural_network","scikit-learn"))
copy!(GP , pyimport_conda("sklearn.gaussian_process","scikit-learn"))
copy!(KR , pyimport_conda("sklearn.kernel_ridge","scikit-learn"))
copy!(NB , pyimport_conda("sklearn.naive_bayes","scikit-learn"))
copy!(ISO , pyimport_conda("sklearn.isotonic","scikit-learn"))
PYC.pycopy!(ENS , PYC.pyimport("sklearn.ensemble"))
PYC.pycopy!(LM , PYC.pyimport("sklearn.linear_model"))
PYC.pycopy!(DA , PYC.pyimport("sklearn.discriminant_analysis"))
PYC.pycopy!(NN , PYC.pyimport("sklearn.neighbors"))
PYC.pycopy!(SVM , PYC.pyimport("sklearn.svm"))
PYC.pycopy!(TREE, PYC.pyimport("sklearn.tree"))
PYC.pycopy!(ANN , PYC.pyimport("sklearn.neural_network"))
PYC.pycopy!(GP , PYC.pyimport("sklearn.gaussian_process"))
PYC.pycopy!(KR , PYC.pyimport("sklearn.kernel_ridge"))
PYC.pycopy!(NB , PYC.pyimport("sklearn.naive_bayes"))
PYC.pycopy!(ISO , PYC.pyimport("sklearn.isotonic"))

# Available scikit-learn learners.
learner_dict["AdaBoostClassifier"] = ENS
Expand Down Expand Up @@ -157,8 +158,16 @@ function sklearners()
println("Note: Consult Scikitlearn's online help for more details about the learner's arguments.")
end

function fit!(skl::SKLearner, xx::DataFrame, y::Vector)::Nothing
function fit!(skl::SKLearner, xx::DataFrame, yy::Vector)::Nothing
# normalize inputs
x = xx |> Array
y = yy
skl.model[:predtype] = :numeric
if !(eltype(yy) <: Real)
y = yy |> Vector{String}
skl.model[:predtype] = :alpha
end

impl_args = copy(skl.model[:impl_args])
learner = skl.model[:learner]
py_learner = getproperty(learner_dict[learner],learner)
Expand All @@ -185,9 +194,15 @@ end

function transform!(skl::SKLearner, xx::DataFrame)::Vector
x = deepcopy(xx) |> Array
#return collect(skl.model[:predict](x))
sklearner = skl.model[:sklearner]
return collect(sklearner.predict(x))
res = sklearner.predict(x)
if skl.model[:predtype] == :numeric
predn = PYC.pyconvert(Vector{Float64},res)
return predn
else
predc = PYC.pyconvert(Vector{String},res)
return predc
end
end

transform(skl::SKLearner, xx::DataFrame)::Vector = transform!(skl,xx)
Expand Down
31 changes: 19 additions & 12 deletions src/skpreprocessor.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module SKPreprocessors

using PyCall
import PythonCall
const PYC=PythonCall

# standard included modules
using DataFrames
Expand All @@ -12,18 +13,18 @@ import ..AbsTypes: fit, fit!, transform, transform!
export fit, fit!, transform, transform!
export SKPreprocessor, skpreprocessors

const preprocessor_dict = Dict{String,PyObject}()
const DEC = PyNULL()
const FS = PyNULL()
const IMP = PyNULL()
const PREP = PyNULL()
const preprocessor_dict = Dict{String,PYC.Py}()
const DEC = PYC.pynew()
const FS = PYC.pynew()
const IMP = PYC.pynew()
const PREP = PYC.pynew()


function __init__()
copy!(DEC , pyimport_conda("sklearn.decomposition","scikit-learn"))
copy!(FS , pyimport_conda("sklearn.feature_selection","scikit-learn"))
copy!(IMP , pyimport_conda("sklearn.impute","scikit-learn"))
copy!(PREP, pyimport_conda("sklearn.preprocessing","scikit-learn"))
PYC.pycopy!(DEC , PYC.pyimport("sklearn.decomposition"))
PYC.pycopy!(FS , PYC.pyimport("sklearn.feature_selection",))
PYC.pycopy!(IMP , PYC.pyimport("sklearn.impute"))
PYC.pycopy!(PREP, PYC.pyimport("sklearn.preprocessing"))

# Available scikit-learn learners.
preprocessor_dict["DictionaryLearning"] = DEC
Expand Down Expand Up @@ -155,8 +156,13 @@ function skpreprocessors()
println("Note: Please consult Scikitlearn's online help for more details about the preprocessor's arguments.")
end

function fit!(skp::SKPreprocessor, x::DataFrame, y::Vector=[])::Nothing
function fit!(skp::SKPreprocessor, x::DataFrame, yc::Vector=[])::Nothing
features = x |> Array
y = yc
if !(eltype(yc) <: Real)
y = yc |> Vector{String}
end

impl_args = copy(skp.model[:impl_args])
autocomp = skp.model[:autocomponent]
if autocomp == true
Expand Down Expand Up @@ -186,7 +192,8 @@ end
function transform!(skp::SKPreprocessor, x::DataFrame)::DataFrame
features = deepcopy(x) |> Array
model=skp.model[:skpreprocessor]
return collect(model.transform(features)) |> x->DataFrame(x,:auto)
res = (model.transform(features))
PYC.pyconvert(Matrix,res) |> x->DataFrame(x,:auto)
end

transform(skp::SKPreprocessor, x::DataFrame)::DataFrame = transform!(skp,x)
Expand Down
8 changes: 4 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
module TestAutoMLPipeline
using Test

# suppress warnings
@info "suppressing PyCall warnings"
using PyCall
warnings = pyimport("warnings")
# @info "suppressing Python warnings"
import PythonCall
const PYC=PythonCall
warnings = PYC.pyimport("warnings")
warnings.filterwarnings("ignore")

include("test_skpreprocessing.jl")
Expand Down
Loading

2 comments on commit 20d00bd

@ppalmes
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/63045

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.0 -m "<description of version>" 20d00bde26d36aaf4e009f86be3952543472621d
git push origin v0.4.0

Please sign in to comment.