Revert "Revert "Move feature weight to skl parameters.""

This reverts commit 1749338.
dmlc · Oct 9, 2023 · 5e70ba7 · 5e70ba7
1 parent 1749338
commit 5e70ba7
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 70 deletions.
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
@@ -304,6 +304,12 @@ def task(i: int) -> float:
         Used for specifying feature types without constructing a dataframe. See
         :py:class:`DMatrix` for details.
 
+    feature_weights : Optional[ArrayLike]
+
+        Weight for each feature, defines the probability of each feature being selected
+        when colsample is being used.  All values must be greater than 0, otherwise a
+        `ValueError` is thrown.
+
     max_cat_to_onehot : Optional[int]
 
         .. versionadded:: 1.6.0
@@ -519,7 +525,7 @@ def _wrap_evaluation_matrices(
     qid: Optional[Any],
     sample_weight: Optional[Any],
     base_margin: Optional[Any],
-    feature_weights: Optional[Any],
+    feature_weights: Optional[ArrayLike],
     eval_set: Optional[Sequence[Tuple[Any, Any]]],
     sample_weight_eval_set: Optional[Sequence[Any]],
     base_margin_eval_set: Optional[Sequence[Any]],
@@ -661,6 +667,7 @@ def __init__(
         validate_parameters: Optional[bool] = None,
         enable_categorical: bool = False,
         feature_types: Optional[FeatureTypes] = None,
+        feature_weights: Optional[ArrayLike] = None,
         max_cat_to_onehot: Optional[int] = None,
         max_cat_threshold: Optional[int] = None,
         multi_strategy: Optional[str] = None,
@@ -707,6 +714,7 @@ def __init__(
         self.validate_parameters = validate_parameters
         self.enable_categorical = enable_categorical
         self.feature_types = feature_types
+        self.feature_weights = feature_weights
         self.max_cat_to_onehot = max_cat_to_onehot
         self.max_cat_threshold = max_cat_threshold
         self.multi_strategy = multi_strategy
@@ -864,16 +872,13 @@ def load_model(self, fname: ModelIn) -> None:
     def _configure_fit(
         self,
         booster: Optional[Union[Booster, "XGBModel", str]],
-        eval_metric: Optional[Union[Callable, str, Sequence[str]]],
         params: Dict[str, Any],
-        early_stopping_rounds: Optional[int],
-        callbacks: Optional[Sequence[TrainingCallback]],
+        feature_weights: Optional[ArrayLike],
     ) -> Tuple[
         Optional[Union[Booster, str, "XGBModel"]],
         Optional[Metric],
         Dict[str, Any],
-        Optional[int],
-        Optional[Sequence[TrainingCallback]],
+        Optional[ArrayLike],
     ]:
         """Configure parameters for :py:meth:`fit`."""
         if isinstance(booster, XGBModel):
@@ -896,56 +901,35 @@ def _duplicated(parameter: str) -> None:
             )
 
         # Configure evaluation metric.
-        if eval_metric is not None:
-            _deprecated("eval_metric")
-        if self.eval_metric is not None and eval_metric is not None:
-            _duplicated("eval_metric")
-        # - track where does the evaluation metric come from
-        if self.eval_metric is not None:
-            from_fit = False
-            eval_metric = self.eval_metric
-        else:
-            from_fit = True
+        eval_metric = self.eval_metric
         # - configure callable evaluation metric
         metric: Optional[Metric] = None
         if eval_metric is not None:
-            if callable(eval_metric) and from_fit:
-                # No need to wrap the evaluation function for old parameter.
-                metric = eval_metric
-            elif callable(eval_metric):
-                # Parameter from constructor or set_params
+            if callable(eval_metric):
                 if self._get_type() == "ranker":
                     metric = ltr_metric_decorator(eval_metric, self.n_jobs)
                 else:
                     metric = _metric_decorator(eval_metric)
             else:
                 params.update({"eval_metric": eval_metric})
 
-        # Configure early_stopping_rounds
-        if early_stopping_rounds is not None:
-            _deprecated("early_stopping_rounds")
-        if early_stopping_rounds is not None and self.early_stopping_rounds is not None:
-            _duplicated("early_stopping_rounds")
-        early_stopping_rounds = (
-            self.early_stopping_rounds
-            if self.early_stopping_rounds is not None
-            else early_stopping_rounds
+        if feature_weights is not None:
+            _deprecated("feature_weights")
+        if feature_weights is not None and self.feature_types is not None:
+            _duplicated("feature_weights")
+        feature_weights = (
+            self.feature_weights
+            if self.feature_weights is not None
+            else feature_weights
         )
 
-        # Configure callbacks
-        if callbacks is not None:
-            _deprecated("callbacks")
-        if callbacks is not None and self.callbacks is not None:
-            _duplicated("callbacks")
-        callbacks = self.callbacks if self.callbacks is not None else callbacks
-
         tree_method = params.get("tree_method", None)
         if self.enable_categorical and tree_method == "exact":
             raise ValueError(
                 "Experimental support for categorical data is not implemented for"
                 " current tree method yet."
             )
-        return model, metric, params, early_stopping_rounds, callbacks
+        return model, metric, params, feature_weights
 
     def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
         # Use `QuantileDMatrix` to save memory.
@@ -1037,16 +1021,23 @@ def fit(
             A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like
             object storing base margin for the i-th validation set.
         feature_weights :
-            Weight for each feature, defines the probability of each feature being
-            selected when colsample is being used.  All values must be greater than 0,
-            otherwise a `ValueError` is thrown.
+            .. deprecated:: 1.6.0
+
+            Use `early_stopping_rounds` in :py:meth:`__init__` or :py:meth:`set_params`
+            instead.
 
         callbacks :
             .. deprecated:: 1.6.0
                 Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
 
         """
         with config_context(verbosity=self.verbosity):
+            params = self.get_xgb_params()
+
+            model, metric, params, feature_weights = self._configure_fit(
+                xgb_model, params, feature_weights
+            )
+
             evals_result: TrainingCallback.EvalsLog = {}
             train_dmatrix, evals = _wrap_evaluation_matrices(
                 missing=self.missing,
@@ -1066,23 +1057,13 @@ def fit(
                 enable_categorical=self.enable_categorical,
                 feature_types=self.feature_types,
             )
-            params = self.get_xgb_params()
 
             if callable(self.objective):
                 obj: Optional[Objective] = _objective_decorator(self.objective)
                 params["objective"] = "reg:squarederror"
             else:
                 obj = None
 
-            (
-                model,
-                metric,
-                params,
-                early_stopping_rounds,
-                callbacks,
-            ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
-            )
             self._Booster = train(
                 params,
                 train_dmatrix,
@@ -1484,14 +1465,8 @@ def fit(
                     params["objective"] = "multi:softprob"
                 params["num_class"] = self.n_classes_
 
-            (
-                model,
-                metric,
-                params,
-                early_stopping_rounds,
-                callbacks,
-            ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
+            model, metric, params, feature_weights = self._configure_fit(
+                xgb_model, params, feature_weights
             )
             train_dmatrix, evals = _wrap_evaluation_matrices(
                 missing=self.missing,
@@ -2020,16 +1995,9 @@ def fit(
             evals_result: TrainingCallback.EvalsLog = {}
             params = self.get_xgb_params()
 
-            (
-                model,
-                metric,
-                params,
-                early_stopping_rounds,
-                callbacks,
-            ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
+            model, metric, params, feature_weights = self._configure_fit(
+                xgb_model, params, feature_weights
             )
-
             self._Booster = train(
                 params,
                 train_dmatrix,

diff --git a/python-package/xgboost/testing/shared.py b/python-package/xgboost/testing/shared.py
@@ -61,9 +61,13 @@ def get_feature_weights(
     """Get feature weights using the demo parser."""
     with tempfile.TemporaryDirectory() as tmpdir:
         colsample_bynode = 0.5
-        reg = model(tree_method=tree_method, colsample_bynode=colsample_bynode)
+        reg = model(
+            tree_method=tree_method,
+            colsample_bynode=colsample_bynode,
+            feature_weights=fw,
+        )
 
-        reg.fit(X, y, feature_weights=fw)
+        reg.fit(X, y)
         model_path = os.path.join(tmpdir, "model.json")
         reg.save_model(model_path)
         with open(model_path, "r", encoding="utf-8") as fd: