Skip to content

Commit

Permalink
Revert "Revert "Move feature weight to skl parameters.""
Browse files Browse the repository at this point in the history
This reverts commit 1749338.
  • Loading branch information
trivialfis committed Oct 9, 2023
1 parent 1749338 commit 5e70ba7
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 70 deletions.
104 changes: 36 additions & 68 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,12 @@ def task(i: int) -> float:
Used for specifying feature types without constructing a dataframe. See
:py:class:`DMatrix` for details.
feature_weights : Optional[ArrayLike]
Weight for each feature, defines the probability of each feature being selected
when colsample is being used. All values must be greater than 0, otherwise a
`ValueError` is thrown.
max_cat_to_onehot : Optional[int]
.. versionadded:: 1.6.0
Expand Down Expand Up @@ -519,7 +525,7 @@ def _wrap_evaluation_matrices(
qid: Optional[Any],
sample_weight: Optional[Any],
base_margin: Optional[Any],
feature_weights: Optional[Any],
feature_weights: Optional[ArrayLike],
eval_set: Optional[Sequence[Tuple[Any, Any]]],
sample_weight_eval_set: Optional[Sequence[Any]],
base_margin_eval_set: Optional[Sequence[Any]],
Expand Down Expand Up @@ -661,6 +667,7 @@ def __init__(
validate_parameters: Optional[bool] = None,
enable_categorical: bool = False,
feature_types: Optional[FeatureTypes] = None,
feature_weights: Optional[ArrayLike] = None,
max_cat_to_onehot: Optional[int] = None,
max_cat_threshold: Optional[int] = None,
multi_strategy: Optional[str] = None,
Expand Down Expand Up @@ -707,6 +714,7 @@ def __init__(
self.validate_parameters = validate_parameters
self.enable_categorical = enable_categorical
self.feature_types = feature_types
self.feature_weights = feature_weights
self.max_cat_to_onehot = max_cat_to_onehot
self.max_cat_threshold = max_cat_threshold
self.multi_strategy = multi_strategy
Expand Down Expand Up @@ -864,16 +872,13 @@ def load_model(self, fname: ModelIn) -> None:
def _configure_fit(
self,
booster: Optional[Union[Booster, "XGBModel", str]],
eval_metric: Optional[Union[Callable, str, Sequence[str]]],
params: Dict[str, Any],
early_stopping_rounds: Optional[int],
callbacks: Optional[Sequence[TrainingCallback]],
feature_weights: Optional[ArrayLike],
) -> Tuple[
Optional[Union[Booster, str, "XGBModel"]],
Optional[Metric],
Dict[str, Any],
Optional[int],
Optional[Sequence[TrainingCallback]],
Optional[ArrayLike],
]:
"""Configure parameters for :py:meth:`fit`."""
if isinstance(booster, XGBModel):
Expand All @@ -896,56 +901,35 @@ def _duplicated(parameter: str) -> None:
)

# Configure evaluation metric.
if eval_metric is not None:
_deprecated("eval_metric")
if self.eval_metric is not None and eval_metric is not None:
_duplicated("eval_metric")
# - track where does the evaluation metric come from
if self.eval_metric is not None:
from_fit = False
eval_metric = self.eval_metric
else:
from_fit = True
eval_metric = self.eval_metric
# - configure callable evaluation metric
metric: Optional[Metric] = None
if eval_metric is not None:
if callable(eval_metric) and from_fit:
# No need to wrap the evaluation function for old parameter.
metric = eval_metric
elif callable(eval_metric):
# Parameter from constructor or set_params
if callable(eval_metric):
if self._get_type() == "ranker":
metric = ltr_metric_decorator(eval_metric, self.n_jobs)
else:
metric = _metric_decorator(eval_metric)
else:
params.update({"eval_metric": eval_metric})

# Configure early_stopping_rounds
if early_stopping_rounds is not None:
_deprecated("early_stopping_rounds")
if early_stopping_rounds is not None and self.early_stopping_rounds is not None:
_duplicated("early_stopping_rounds")
early_stopping_rounds = (
self.early_stopping_rounds
if self.early_stopping_rounds is not None
else early_stopping_rounds
if feature_weights is not None:
_deprecated("feature_weights")
if feature_weights is not None and self.feature_types is not None:
_duplicated("feature_weights")
feature_weights = (
self.feature_weights
if self.feature_weights is not None
else feature_weights
)

# Configure callbacks
if callbacks is not None:
_deprecated("callbacks")
if callbacks is not None and self.callbacks is not None:
_duplicated("callbacks")
callbacks = self.callbacks if self.callbacks is not None else callbacks

tree_method = params.get("tree_method", None)
if self.enable_categorical and tree_method == "exact":
raise ValueError(
"Experimental support for categorical data is not implemented for"
" current tree method yet."
)
return model, metric, params, early_stopping_rounds, callbacks
return model, metric, params, feature_weights

def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
# Use `QuantileDMatrix` to save memory.
Expand Down Expand Up @@ -1037,16 +1021,23 @@ def fit(
A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like
object storing base margin for the i-th validation set.
feature_weights :
Weight for each feature, defines the probability of each feature being
selected when colsample is being used. All values must be greater than 0,
otherwise a `ValueError` is thrown.
.. deprecated:: 1.6.0
Use `early_stopping_rounds` in :py:meth:`__init__` or :py:meth:`set_params`
instead.
callbacks :
.. deprecated:: 1.6.0
Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
"""
with config_context(verbosity=self.verbosity):
params = self.get_xgb_params()

model, metric, params, feature_weights = self._configure_fit(
xgb_model, params, feature_weights
)

evals_result: TrainingCallback.EvalsLog = {}
train_dmatrix, evals = _wrap_evaluation_matrices(
missing=self.missing,
Expand All @@ -1066,23 +1057,13 @@ def fit(
enable_categorical=self.enable_categorical,
feature_types=self.feature_types,
)
params = self.get_xgb_params()

if callable(self.objective):
obj: Optional[Objective] = _objective_decorator(self.objective)
params["objective"] = "reg:squarederror"
else:
obj = None

(
model,
metric,
params,
early_stopping_rounds,
callbacks,
) = self._configure_fit(
xgb_model, eval_metric, params, early_stopping_rounds, callbacks
)
self._Booster = train(
params,
train_dmatrix,
Expand Down Expand Up @@ -1484,14 +1465,8 @@ def fit(
params["objective"] = "multi:softprob"
params["num_class"] = self.n_classes_

(
model,
metric,
params,
early_stopping_rounds,
callbacks,
) = self._configure_fit(
xgb_model, eval_metric, params, early_stopping_rounds, callbacks
model, metric, params, feature_weights = self._configure_fit(
xgb_model, params, feature_weights
)
train_dmatrix, evals = _wrap_evaluation_matrices(
missing=self.missing,
Expand Down Expand Up @@ -2020,16 +1995,9 @@ def fit(
evals_result: TrainingCallback.EvalsLog = {}
params = self.get_xgb_params()

(
model,
metric,
params,
early_stopping_rounds,
callbacks,
) = self._configure_fit(
xgb_model, eval_metric, params, early_stopping_rounds, callbacks
model, metric, params, feature_weights = self._configure_fit(
xgb_model, params, feature_weights
)

self._Booster = train(
params,
train_dmatrix,
Expand Down
8 changes: 6 additions & 2 deletions python-package/xgboost/testing/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,13 @@ def get_feature_weights(
"""Get feature weights using the demo parser."""
with tempfile.TemporaryDirectory() as tmpdir:
colsample_bynode = 0.5
reg = model(tree_method=tree_method, colsample_bynode=colsample_bynode)
reg = model(
tree_method=tree_method,
colsample_bynode=colsample_bynode,
feature_weights=fw,
)

reg.fit(X, y, feature_weights=fw)
reg.fit(X, y)
model_path = os.path.join(tmpdir, "model.json")
reg.save_model(model_path)
with open(model_path, "r", encoding="utf-8") as fd:
Expand Down

0 comments on commit 5e70ba7

Please sign in to comment.