Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Commit

Permalink
Move TansorBlock -> array conversion into _solver (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
PicoCentauri authored Mar 1, 2023
1 parent 9b4cfd4 commit d0d5c03
Showing 1 changed file with 65 additions and 41 deletions.
106 changes: 65 additions & 41 deletions src/equisolve/numpy/models/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,28 +118,65 @@ def _validate_params(
"properties."
)

def _numpy_lstsq_solver(self, X, y, sample_weights, alphas, rcond):
def _solver(
self,
X: TensorBlock,
y: TensorBlock,
alpha: TensorBlock,
sample_weight: TensorBlock,
rcond: float,
) -> TensorBlock:
"""A regularized solver using ``np.linalg.lstsq``."""

# Convert TensorMaps into arrays for processing them with NumPy.

# X_arr has shape of (n_targets, n_properties)
X_arr = block_to_array(X, self.parameter_keys)

# y_arr has shape lentgth of n_targets
y_arr = block_to_array(y, self.parameter_keys)

# sw_arr has shape of (n_samples, 1)
sw_arr = block_to_array(sample_weight, self.parameter_keys)

# alpha_arr has shape of (1, n_properties)
alpha_arr = block_to_array(alpha, ["values"])

# Flatten into 1d arrays
y_arr = y_arr.ravel()
sw_arr = sw_arr.ravel()
alpha_arr = alpha_arr.ravel()

# Convert problem with regularization term into an equivalent
# problem without the regularization term
num_properties = X.shape[1]
num_properties = X_arr.shape[1]

regularization_all = np.hstack((sample_weights, alphas))
regularization_all = np.hstack((sw_arr, alpha_arr))
regularization_eff = np.diag(np.sqrt(regularization_all))

X_eff = regularization_eff @ np.vstack((X, np.eye(num_properties)))
y_eff = regularization_eff @ np.hstack((y, np.zeros(num_properties)))
X_eff = regularization_eff @ np.vstack((X_arr, np.eye(num_properties)))
y_eff = regularization_eff @ np.hstack((y_arr, np.zeros(num_properties)))

w = np.linalg.lstsq(X_eff, y_eff, rcond=rcond)[0]

return np.linalg.lstsq(X_eff, y_eff, rcond=rcond)[0]
weights_block = TensorBlock(
values=w.reshape(1, -1),
samples=y.properties,
components=[],
properties=X.properties,
)

return weights_block

def fit(
self,
X: TensorMap,
y: TensorMap,
alpha: Union[float, TensorMap] = 1.0,
sample_weight: Optional[TensorMap] = None,
sample_weight: Union[float, TensorMap] = 1.0,
rcond: float = 1e-13,
) -> None:
"""Fit Ridge regression model to each block in X.
"""Fit a regression model to each block in `X`.
:param X:
training data
Expand All @@ -148,13 +185,13 @@ def fit(
:param alpha:
Constant α that multiplies the L2 term, controlling regularization strength.
Values must be non-negative floats i.e. in [0, inf). α can be different for
each column in ``X`` to regulerize each property differently.
each column in `X` to regulerize each property differently.
:param sample_weight:
sample weights
:param rcond:
Cut-off ratio for small singular values during the fit. For the purposes of
rank determination, singular values are treated as zero if they are smaller
than ``rcond`` times the largest singular value in "weightsficient" matrix.
than `rcond` times the largest singular value in "weights" matrix.
"""

if type(alpha) is float:
Expand All @@ -167,49 +204,36 @@ def fit(

alpha_tensor = slice(alpha_tensor, samples=samples)
alpha = multiply(alpha_tensor, alpha)

if type(alpha) is not TensorMap:
elif type(alpha) is not TensorMap:
raise ValueError("alpha must either be a float or a TensorMap")

if type(sample_weight) is float:
sw_tensor = ones_like(X)

properties = Labels(
names=X.property_names,
values=np.zeros([1, len(X.property_names)], dtype=int),
)

sw_tensor = slice(sw_tensor, properties=properties)
sample_weight = multiply(sw_tensor, sample_weight)
elif type(sample_weight) is not TensorMap:
raise ValueError("sample_weight must either be a float or a TensorMap")

self._validate_data(X, y)
self._validate_params(X, alpha, sample_weight)

weights_blocks = []
for key, X_block in X:
y_block = y.block(key)
alpha_block = alpha.block(key)
sw_block = sample_weight.block(key)

# X_arr has shape of (n_targets, n_properties)
X_arr = block_to_array(X_block, self.parameter_keys)
weight_block = self._solver(X_block, y_block, alpha_block, sw_block, rcond)

# y_arr has shape lentgth of n_targets
y_arr = block_to_array(y_block, self.parameter_keys)[:, 0]

# alpha_arr has length of n_properties
alpha_arr = alpha_block.values[0]

# Sample weights
if sample_weight is not None:
sw_block = sample_weight.block(key)
# sw_arr has length of n_targets
sw_arr = block_to_array(sw_block, self.parameter_keys)[:, 0]
assert (
sw_arr.shape == y_arr.shape
), f"shapes = {sw_arr.shape} and {y_arr.shape}"
else:
sw_arr = np.ones((len(y_arr),))

w = self._numpy_lstsq_solver(X_arr, y_arr, sw_arr, alpha_arr, rcond)

weights_block = TensorBlock(
values=w.reshape(1, -1),
samples=y_block.properties,
components=[],
properties=X_block.properties,
)
weights_blocks.append(weights_block)
weights_blocks.append(weight_block)

# convert weightsficients to a dictionary allowing pickle dump of an instance
# convert weights to a dictionary allowing pickle dump of an instance
self._weights = tensor_map_to_dict(TensorMap(X.keys, weights_blocks))

return self
Expand Down

0 comments on commit d0d5c03

Please sign in to comment.