From b97f3609e0a911abb53231eb87dcc8a34679a33d Mon Sep 17 00:00:00 2001
From: Yunbo Ouyang <youyang@linkedin.com>
Date: Wed, 23 Oct 2019 18:51:53 -0700
Subject: [PATCH 1/7] Add incremental training and related unit tests

---
 .../CoordinateFactoryIntegTest.scala          |   6 +
 .../ml/algorithm/CoordinateFactory.scala      |  36 ++-
 .../ml/algorithm/RandomEffectCoordinate.scala |  12 +-
 .../photon/ml/estimators/GameEstimator.scala  | 140 ++++++++--
 .../ml/function/ObjectiveFunctionHelper.scala |   7 +-
 .../glm/DistributedGLMLossFunction.scala      |  41 ++-
 .../ml/function/glm/GLMLossFunction.scala     |  31 ++-
 .../glm/SingleNodeGLMLossFunction.scala       |  41 ++-
 .../svm/SmoothedHingeLossFunction.scala       |  15 +-
 .../DistributedOptimizationProblem.scala      |  11 +-
 .../SingleNodeOptimizationProblem.scala       |   8 +-
 .../RandomEffectOptimizationProblem.scala     |  37 ++-
 .../ObjectiveFunctionHelperTest.scala         |  16 +-
 .../ml/function/glm/GLMLossFunctionTest.scala |  10 +-
 .../svm/SmoothedHingeLossFunctionTest.scala   |   9 +-
 .../photon/ml/util/GameTestUtils.scala        |   2 +-
 .../game/training/GameTrainingDriver.scala    |  26 +-
 .../photon/ml/data/avro/AvroUtils.scala       |   9 +-
 .../ml/function/PriorDistribution.scala       | 250 ++++++++++++++++++
 .../photon/ml/model/Coefficients.scala        |  46 ++--
 .../linkedin/photon/ml/model/GameModel.scala  |  13 +-
 .../linkedin/photon/ml/util/MathUtils.scala   |  10 +
 .../linkedin/photon/ml/util/VectorUtils.scala |  10 +
 .../ml/function/PriorDistributionTest.scala   |  77 ++++++
 24 files changed, 715 insertions(+), 148 deletions(-)
 create mode 100644 photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
 create mode 100644 photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala

diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala
index 0015ecdb..c969f180 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala
@@ -23,6 +23,7 @@ import com.linkedin.photon.ml.TaskType
 import com.linkedin.photon.ml.Types.REId
 import com.linkedin.photon.ml.data.{FixedEffectDataset, LocalDataset, RandomEffectDataset}
 import com.linkedin.photon.ml.function.{DistributedObjectiveFunction, ObjectiveFunctionHelper, SingleNodeObjectiveFunction}
+import com.linkedin.photon.ml.model.{FixedEffectModel, RandomEffectModel}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.game.{FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
 import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType, SingleNodeOptimizationProblem, VarianceComputationType}
@@ -46,6 +47,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
 
     val mockDataset = mock(classOf[FixedEffectDataset])
     val optimizationConfiguration = FixedEffectOptimizationConfiguration(OPTIMIZER_CONFIG)
+    val priorModelOpt: Option[FixedEffectModel] = None
 
     doReturn(sc).when(mockDataset).sparkContext
 
@@ -57,6 +59,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
       DOWN_SAMPLER_FACTORY,
       MOCK_NORMALIZATION,
       VARIANCE_COMPUTATION_TYPE,
+      priorModelOpt,
       INTERCEPT_INDEX)
 
     coordinate match {
@@ -78,6 +81,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
     val mockProjectorsRDD = mock(classOf[RDD[(REId, LinearSubspaceProjector)]])
     val mockProblemsRDD = mock(classOf[RDD[(REId, SingleNodeOptimizationProblem[SingleNodeObjectiveFunction])]])
     val optimizationConfiguration = RandomEffectOptimizationConfiguration(OPTIMIZER_CONFIG)
+    val priorModelOpt: Option[RandomEffectModel] = None
 
     doReturn(sc).when(mockDataset).sparkContext
     doReturn(mockDataRDD).when(mockDataset).activeData
@@ -97,6 +101,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
       DOWN_SAMPLER_FACTORY,
       MOCK_NORMALIZATION,
       VARIANCE_COMPUTATION_TYPE,
+      priorModelOpt,
       INTERCEPT_INDEX)
 
     coordinate match {
@@ -124,6 +129,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
       DOWN_SAMPLER_FACTORY,
       MOCK_NORMALIZATION,
       VARIANCE_COMPUTATION_TYPE,
+      None,
       INTERCEPT_INDEX)
   }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/CoordinateFactory.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/CoordinateFactory.scala
index 95d8bb28..4e2f2dcc 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/CoordinateFactory.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/CoordinateFactory.scala
@@ -17,7 +17,7 @@ package com.linkedin.photon.ml.algorithm
 import com.linkedin.photon.ml.data.{Dataset, FixedEffectDataset, RandomEffectDataset}
 import com.linkedin.photon.ml.function.ObjectiveFunctionHelper.{DistributedObjectiveFunctionFactory, ObjectiveFunctionFactoryFactory, SingleNodeObjectiveFunctionFactory}
 import com.linkedin.photon.ml.function.ObjectiveFunction
-import com.linkedin.photon.ml.model.Coefficients
+import com.linkedin.photon.ml.model.{Coefficients, DatumScoringModel, FixedEffectModel, RandomEffectModel}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.DistributedOptimizationProblem
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
@@ -45,7 +45,9 @@ object CoordinateFactory {
    * @param downSamplerFactory A factory function for the [[DownSampler]] (if down-sampling is enabled)
    * @param normalizationContext The [[NormalizationContext]]
    * @param varianceComputationType Should the trained coefficient variances be computed in addition to the means?
+   * @param priorModelOpt The prior model for warm-start and incremental training
    * @param interceptIndexOpt The index of the intercept, if one is present
+   * @param isIncrementalTraining The index of the intercept, if one is present
    * @return A [[Coordinate]] for the [[Dataset]] of type [[D]]
    */
   def build[D <: Dataset[D]](
@@ -56,15 +58,18 @@ object CoordinateFactory {
       downSamplerFactory: DownSamplerFactory,
       normalizationContext: NormalizationContext,
       varianceComputationType: VarianceComputationType,
-      interceptIndexOpt: Option[Int]): Coordinate[D] = {
+      priorModelOpt: Option[DatumScoringModel],
+      interceptIndexOpt: Option[Int],
+      isIncrementalTraining: Boolean = false): Coordinate[D] = {
 
-    val lossFunctionFactory = lossFunctionFactoryConstructor(coordinateOptConfig)
+    val lossFunctionFactory = lossFunctionFactoryConstructor(coordinateOptConfig, isIncrementalTraining)
 
-    (dataset, coordinateOptConfig, lossFunctionFactory) match {
+    (dataset, coordinateOptConfig, lossFunctionFactory, priorModelOpt) match {
       case (
-          fEDataset: FixedEffectDataset,
-          fEOptConfig: FixedEffectOptimizationConfiguration,
-          distributedLossFunctionFactory: DistributedObjectiveFunctionFactory) =>
+        fEDataset: FixedEffectDataset,
+        fEOptConfig: FixedEffectOptimizationConfiguration,
+        distributedLossFunctionFactory: DistributedObjectiveFunctionFactory,
+        fixedEffectModelOpt: Option[FixedEffectModel]) =>
 
         val downSamplerOpt = if (DownSampler.isValidDownSamplingRate(fEOptConfig.downSamplingRate)) {
           Some(downSamplerFactory(fEOptConfig.downSamplingRate))
@@ -77,21 +82,23 @@ object CoordinateFactory {
           fEDataset,
           DistributedOptimizationProblem(
             fEOptConfig,
-            distributedLossFunctionFactory(interceptIndexOpt),
+            distributedLossFunctionFactory(fixedEffectModelOpt.map(_.model), interceptIndexOpt),
             downSamplerOpt,
             glmConstructor,
             normalizationPhotonBroadcast,
             varianceComputationType)).asInstanceOf[Coordinate[D]]
 
       case (
-          rEDataset: RandomEffectDataset,
-          rEOptConfig: RandomEffectOptimizationConfiguration,
-          singleNodeLossFunctionFactory: SingleNodeObjectiveFunctionFactory) =>
+        rEDataset: RandomEffectDataset,
+        rEOptConfig: RandomEffectOptimizationConfiguration,
+        singleNodeLossFunctionFactory: SingleNodeObjectiveFunctionFactory,
+        randomEffectModelOpt: Option[RandomEffectModel]) =>
 
         RandomEffectCoordinate(
           rEDataset,
           rEOptConfig,
           singleNodeLossFunctionFactory,
+          randomEffectModelOpt,
           glmConstructor,
           normalizationContext,
           varianceComputationType,
@@ -100,9 +107,10 @@ object CoordinateFactory {
       case _ =>
         throw new UnsupportedOperationException(
           s"""Cannot build coordinate for the following input class combination:
-          |  ${dataset.getClass.getName}
-          |  ${coordinateOptConfig.getClass.getName}
-          |  ${lossFunctionFactory.getClass.getName}""".stripMargin)
+             |  ${dataset.getClass.getName}
+             |  ${coordinateOptConfig.getClass.getName}
+             |  ${lossFunctionFactory.getClass.getName}
+             |  ${priorModelOpt.getClass.getName}""".stripMargin)
     }
   }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
index ab1393cc..7efcc5b0 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
@@ -78,8 +78,7 @@ protected[ml] class RandomEffectCoordinate[Objective <: SingleNodeObjectiveFunct
    * @param model The model to use as a starting point
    * @return A (updated model, optional optimization tracking information) tuple
    */
-  override protected[algorithm] def trainModel(
-      model: DatumScoringModel): (DatumScoringModel, OptimizationTracker) =
+  override protected[algorithm] def trainModel(model: DatumScoringModel): (DatumScoringModel, OptimizationTracker) =
 
     model match {
       case randomEffectModel: RandomEffectModel =>
@@ -184,17 +183,19 @@ object RandomEffectCoordinate {
    *                               problems
    * @param randomEffectDataset The data on which to run the optimization algorithm
    * @param configuration The optimization problem configuration
-   * @param objectiveFunctionFactory The objective function to optimize
+   * @param objectiveFunctionFactory The objective function factory option
+   * @param priorRandomEffectModelOpt The prior randomEffectModel option
    * @param glmConstructor The function to use for producing GLMs from trained coefficients
    * @param normalizationContext The normalization context
    * @param varianceComputationType If and how coefficient variances should be computed
    * @param interceptIndexOpt The index of the intercept, if there is one
-   * @return A new [[RandomEffectCoordinate]] object
+   * @return A new [[RandomEffectCoordinate]]
    */
   protected[ml] def apply[RandomEffectObjective <: SingleNodeObjectiveFunction](
       randomEffectDataset: RandomEffectDataset,
       configuration: RandomEffectOptimizationConfiguration,
-      objectiveFunctionFactory: Option[Int] => RandomEffectObjective,
+      objectiveFunctionFactory: (Option[GeneralizedLinearModel], Option[Int]) => RandomEffectObjective,
+      priorRandomEffectModelOpt: Option[RandomEffectModel],
       glmConstructor: Coefficients => GeneralizedLinearModel,
       normalizationContext: NormalizationContext,
       varianceComputationType: VarianceComputationType = VarianceComputationType.NONE,
@@ -205,6 +206,7 @@ object RandomEffectCoordinate {
       randomEffectDataset.projectors,
       configuration,
       objectiveFunctionFactory,
+      priorRandomEffectModelOpt,
       glmConstructor,
       normalizationContext,
       varianceComputationType,
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
index c78d51d3..2990fb8e 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
@@ -14,6 +14,8 @@
  */
 package com.linkedin.photon.ml.estimators
 
+import java.security.InvalidParameterException
+
 import scala.language.existentials
 
 import org.apache.commons.cli.MissingArgumentException
@@ -33,7 +35,7 @@ import com.linkedin.photon.ml.data._
 import com.linkedin.photon.ml.evaluation._
 import com.linkedin.photon.ml.function.ObjectiveFunctionHelper
 import com.linkedin.photon.ml.function.glm._
-import com.linkedin.photon.ml.model.{GameModel, RandomEffectModel}
+import com.linkedin.photon.ml.model.{FixedEffectModel, GameModel, RandomEffectModel}
 import com.linkedin.photon.ml.normalization._
 import com.linkedin.photon.ml.optimization.VarianceComputationType
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
@@ -122,14 +124,18 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
 
   val validationEvaluators: Param[Seq[EvaluatorType]] = ParamUtils.createParam(
     "validation evaluators",
-    "A list of evaluators used to validate computed scores (Note: the first evaluator in the list is the one used " +
-      "for model selection)",
+    "A list of evaluators used to validate computed scores (Note: the first evaluator in the list is the one " +
+      "used for model selection)",
     PhotonParamValidators.nonEmpty[Seq, EvaluatorType])
 
   val ignoreThresholdForNewModels: Param[Boolean] = ParamUtils.createParam[Boolean](
     "ignore threshold for new models",
-    "Flag to ignore the random effect samples lower bound when encountering a random effect ID without an existing " +
-      "model during warm-start training.")
+    "Flag to ignore the random effect samples lower bound when encountering a random effect ID without an " +
+      "existing model during warm-start training.")
+
+  val incrementalTraining: Param[Boolean] = ParamUtils.createParam[Boolean](
+    "incremental training",
+    "Flag to enable incremental training.")
 
   val useWarmStart: Param[Boolean] = ParamUtils.createParam[Boolean](
     "use warm start",
@@ -177,6 +183,8 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
 
   def setUseWarmStart(value: Boolean): this.type = set(useWarmStart, value)
 
+  def setIncrementalTraining(value: Boolean): this.type = set(incrementalTraining, value)
+
   //
   // Params trait extensions
   //
@@ -209,6 +217,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
     setDefault(treeAggregateDepth, DEFAULT_TREE_AGGREGATE_DEPTH)
     setDefault(ignoreThresholdForNewModels, false)
     setDefault(useWarmStart, true)
+    setDefault(incrementalTraining, false)
   }
 
   /**
@@ -229,10 +238,11 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
     val updateSequence = getRequiredParam(coordinateUpdateSequence)
     val dataConfigs = getRequiredParam(coordinateDataConfigurations)
     val initialModelOpt = get(initialModel)
-    val retrainModelCoordsOpt = get(partialRetrainLockedCoordinates)
+    val lockedModelCoordsOpt = get(partialRetrainLockedCoordinates)
     val normalizationContextsOpt = get(coordinateNormalizationContexts)
     val ignoreThreshold = getOrDefault(ignoreThresholdForNewModels)
     val numUniqueCoordinates = updateSequence.toSet.size
+    val isIncrementalTraining = getOrDefault(incrementalTraining)
 
     // Cannot have coordinates repeat in the update sequence
     require(
@@ -244,39 +254,106 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
       !ignoreThreshold || initialModelOpt.isDefined,
       "'Ignore threshold for new models' flag set but no initial model provided for warm-start")
 
-    // Partial retraining and warm-start training require an initial GAME model to be provided as input
-    val coordinatesToTrain = (initialModelOpt, retrainModelCoordsOpt) match {
-      case (Some(initModel), Some(retrainModelCoords)) =>
+    // Warm-start, partial re-training, and incremental training are mutually exclusive.
+    val coordinatesToTrain = (isIncrementalTraining, lockedModelCoordsOpt, initialModelOpt) match {
+      case (true, None, None) =>
+        throw new InvalidParameterException(s"'${incrementalTraining.name}' is enabled but no initial model provided.")
+
+      case (true, None, Some(initModel)) =>
+        // The set of coordinates being trained and the set of coordinates trained previously must be identical
+        require(
+          updateSequence.toSet == initModel.toMap.keySet,
+          s"Coordinate sets don't match for incremental training; missing coordinates: " +
+            s"${MathUtils.symmetricDifference(updateSequence.toSet, initModel.toMap.keySet).mkString(", ")}")
+
+        updateSequence.foreach { coordinateId =>
+          val coordinateConfig = dataConfigs(coordinateId)
+          val coordinateModel = initModel(coordinateId)
+
+          // TODO: Do the feature shards and random effect types need to match? It's possible for them to match
+          // TODO: perfectly with different names (if the initial model is sufficiently old).
+          (coordinateConfig, coordinateModel) match {
+            case (fEC: FixedEffectDataConfiguration, fEM: FixedEffectModel) =>
+
+              // Model and coordinate must be trained on the same feature shard
+              require(
+                fEC.featureShardId == fEM.featureShardId,
+                s"Incremental training error: feature shard ID mismatch for coordinate '$coordinateId' " +
+                  s"('${fEC.featureShardId}' vs. '${fEM.featureShardId}').")
+
+              // Model must contain variance info
+              require(
+                fEM.model.coefficients.variancesOption.isDefined,
+                s"Incremental training error: coordinate '$coordinateId' missing variance information.")
+
+            case (rEC: RandomEffectDataConfiguration, rEM: RandomEffectModel) =>
+
+              // Model and coordinate must be trained on the same feature shard
+              require(
+                rEC.featureShardId == rEM.featureShardId,
+                s"Incremental training error: feature shard ID mismatch for coordinate '$coordinateId' " +
+                  s"('${rEC.featureShardId}' vs. '${rEM.featureShardId}').")
+
+              // Random effect types must match between coordinate and model
+              require(
+                rEC.randomEffectType == rEM.randomEffectType,
+                s"Incremental training error: random effect type mismatch for coordinate '$coordinateId' " +
+                  s"('${rEC.randomEffectType}' vs. '${rEM.randomEffectType}').")
+
+              // Model must contain variance info
+              require(
+                rEM
+                  .modelsRDD
+                  .mapPartitions(
+                    iter => Seq(iter.forall(_._2.coefficients.variancesOption.isDefined)).iterator,
+                    preservesPartitioning = true)
+                  .fold(true)(_ && _),
+                s"Incremental training error: one or more models in coordinate '$coordinateId' missing variance information.")
+
+            case (_, _) =>
+              throw new IllegalArgumentException(
+                "Incremental training error: mismatch between coordinate and model types.")
+          }
+        }
+
+        updateSequence
+
+      case (true, Some(_), _) =>
+        throw new InvalidParameterException(
+          "Both incremental training and partial model re-training enabled; these two training options are mutually " +
+            "exclusive")
 
-        val newCoordinates = updateSequence.filterNot(retrainModelCoords.contains)
+      case (false, None, _) =>
+        updateSequence
+
+      case (false, Some(_), None) =>
+        throw new InvalidParameterException("Partial model re-training is enabled but no initial model provided.")
+
+      case (false, Some(lockedModelCoords), Some(initModel)) =>
+
+        val newCoordinates = updateSequence.filterNot(lockedModelCoords.contains)
 
         // Locked coordinates cannot be empty
         require(
-          retrainModelCoords.nonEmpty,
-          "Set of locked coordinates is empty.")
+          lockedModelCoords.nonEmpty,
+          "Empty set of locked coordinates is invalid.")
 
         // No point in training if every coordinate is being reused
         require(
           newCoordinates.nonEmpty,
-          "All coordinates in the update sequence are re-used from the initial model: no new coordinates to train.")
+          "All coordinates in the update sequence are re-used from the initial model; no new coordinates to train.")
 
         // All locked coordinates must be used by the update sequence
         require(
-          retrainModelCoords.forall(updateSequence.contains),
+          lockedModelCoords.forall(updateSequence.contains),
           "One or more locked coordinates for partial retraining are missing from the update sequence.")
 
         // All locked coordinates must be present in the initial model
         require(
-          retrainModelCoords.forall(initModel.toMap.contains),
+          lockedModelCoords.forall(initModel.toMap.contains),
           "One or more locked coordinates for partial retraining are missing from the initial model.")
 
         newCoordinates
-
-      case (Some(_), None) | (None, None) =>
-        updateSequence
-
-      case (None, Some(_)) =>
-        throw new IllegalArgumentException("Partial retraining enabled, but no base model provided.")
     }
 
     // All coordinates (including locked coordinates) should have a data configuration
@@ -468,7 +545,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
    * @return A map of coordinate ID to training [[Dataset]]
    */
   protected def prepareTrainingDatasets(
-      gameDataset: RDD[(UniqueSampleId, GameDatum)]): Map[CoordinateId, D forSome { type D <: Dataset[D] }] = {
+      gameDataset: RDD[(UniqueSampleId, GameDatum)]): Map[CoordinateId, D forSome {type D <: Dataset[D]}] = {
 
     val coordinateDataConfigs = getRequiredParam(coordinateDataConfigurations)
 
@@ -525,7 +602,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
           (coordinateId, randomEffectDataset)
       }
 
-      result.asInstanceOf[(CoordinateId, D forSome { type D <: Dataset[D] })]
+      result.asInstanceOf[(CoordinateId, D forSome {type D <: Dataset[D]})]
     }
   }
 
@@ -627,7 +704,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
    */
   protected def train(
       configuration: GameOptimizationConfiguration,
-      trainingDatasets: Map[CoordinateId, D forSome { type D <: Dataset[D] }],
+      trainingDatasets: Map[CoordinateId, D forSome {type D <: Dataset[D]}],
       coordinateDescent: CoordinateDescent,
       initialModelOpt: Option[GameModel] = None): (GameModel, Option[EvaluationResults]) = Timed(s"Train model:") {
 
@@ -651,18 +728,25 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
     val downSamplerFactory = DownSamplerHelper.buildFactory(task)
     val lockedCoordinates = get(partialRetrainLockedCoordinates).getOrElse(Set())
     val interceptIndices = getOrDefault(coordinateInterceptIndices)
+    val isIncrementalTraining = getOrDefault(incrementalTraining)
 
     // Create the optimization coordinates for each component model
-    val coordinates: Map[CoordinateId, C forSome { type C <: Coordinate[_] }] =
+    val coordinates: Map[CoordinateId, C forSome {type C <: Coordinate[_]}] =
       updateSequence
         .map { coordinateId =>
-          val coordinate: C forSome { type C <: Coordinate[_] } = if (lockedCoordinates.contains(coordinateId)) {
+          val coordinate: C forSome {type C <: Coordinate[_]} = if (lockedCoordinates.contains(coordinateId)) {
             trainingDatasets(coordinateId) match {
               case feDataset: FixedEffectDataset => new FixedEffectModelCoordinate(feDataset)
               case reDataset: RandomEffectDataset => new RandomEffectModelCoordinate(reDataset)
               case dataset => throw new UnsupportedOperationException(s"Unsupported dataset type: ${dataset.getClass}")
             }
+
           } else {
+            val priorModelOpt = initialModelOpt match {
+              case Some(gameModel) => gameModel.getModel(coordinateId)
+              case None => None
+            }
+
             CoordinateFactory.build(
               trainingDatasets(coordinateId),
               configuration(coordinateId),
@@ -671,7 +755,9 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
               downSamplerFactory,
               normalizationContexts.getOrElse(coordinateId, NoNormalization()),
               variance,
-              interceptIndices.get(coordinateId))
+              priorModelOpt,
+              interceptIndices.get(coordinateId),
+              isIncrementalTraining)
           }
 
           (coordinateId, coordinate)
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala
index cbac3167..f2c71a01 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala
@@ -20,15 +20,16 @@ import com.linkedin.photon.ml.algorithm.Coordinate
 import com.linkedin.photon.ml.function.glm.{GLMLossFunction, LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
 import com.linkedin.photon.ml.function.svm.SmoothedHingeLossFunction
 import com.linkedin.photon.ml.optimization.game.CoordinateOptimizationConfiguration
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
  * Helper for [[ObjectiveFunction]] related tasks.
  */
 object ObjectiveFunctionHelper {
 
-  type ObjectiveFunctionFactoryFactory = CoordinateOptimizationConfiguration => Option[Int] => ObjectiveFunction
-  type DistributedObjectiveFunctionFactory = Option[Int] => DistributedObjectiveFunction
-  type SingleNodeObjectiveFunctionFactory = Option[Int] => SingleNodeObjectiveFunction
+  type ObjectiveFunctionFactoryFactory = (CoordinateOptimizationConfiguration, Boolean) => (Option[GeneralizedLinearModel], Option[Int]) => ObjectiveFunction
+  type DistributedObjectiveFunctionFactory = (Option[GeneralizedLinearModel], Option[Int]) => DistributedObjectiveFunction
+  type SingleNodeObjectiveFunctionFactory = (Option[GeneralizedLinearModel], Option[Int]) => SingleNodeObjectiveFunction
 
   /**
    * Construct a factory function for building [[ObjectiveFunction]] objects.
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
index f6eb323c..d4e81c3c 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
@@ -20,9 +20,11 @@ import org.apache.spark.rdd.RDD
 
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function._
+import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.RegularizationType
 import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
@@ -109,7 +111,7 @@ protected[ml] class DistributedGLMLossFunction private (
    * @param normalizationContext The normalization context
    * @return The computed Hessian multiplied by the given multiplyVector
    */
-    override protected[ml] def hessianVector(
+  override protected[ml] def hessianVector(
       input: RDD[LabeledPoint],
       coefficients: Broadcast[Vector[Double]],
       multiplyVector: Broadcast[Vector[Double]],
@@ -155,27 +157,50 @@ object DistributedGLMLossFunction {
    * @param configuration The optimization problem configuration
    * @param singleLossFunction The PointwiseLossFunction providing functionality for l(z, y)
    * @param treeAggregateDepth The tree aggregation depth
+   * @param priorModelOpt Optional prior model, required if this is an objective function for incremental training
    * @param interceptIndexOpt The index of the intercept, if there is one
+   * @param isIncrementalTrainingEnabled Is this an objective function for incremental training?
    * @return A new DistributedGLMLossFunction
    */
   def apply(
       configuration: GLMOptimizationConfiguration,
       singleLossFunction: PointwiseLossFunction,
       treeAggregateDepth: Int,
-      interceptIndexOpt: Option[Int] = None): DistributedGLMLossFunction = {
+      priorModelOpt: Option[GeneralizedLinearModel] = None,
+      interceptIndexOpt: Option[Int] = None,
+      isIncrementalTrainingEnabled: Boolean = false): DistributedGLMLossFunction = {
 
     val regularizationContext = configuration.regularizationContext
     val regularizationWeight = configuration.regularizationWeight
 
-    regularizationContext.regularizationType match {
-      case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
-        new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth) with L2RegularizationTwiceDiff {
-          l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+    (priorModelOpt, isIncrementalTrainingEnabled) match {
+      case (_, false) =>
+        regularizationContext.regularizationType match {
+          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
+            new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth)
+              with L2RegularizationTwiceDiff {
 
-          override def interceptOpt: Option[Int] = interceptIndexOpt
+                l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+
+                override def interceptOpt: Option[Int] = interceptIndexOpt
+              }
+
+          case _ => new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth)
+        }
+
+      case (Some(priorModel), true) =>
+        val l1Weight = regularizationContext.getL1RegularizationWeight(regularizationWeight)
+        val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+        val priorModelCoefficients = priorModel.coefficients
+
+        new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth) with PriorDistributionTwiceDiff {
+          override val priorCoefficients: ModelCoefficients = priorModelCoefficients
+          l1RegWeight = l1Weight
+          l2RegWeight = l2Weight
         }
 
-      case _ => new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth)
+      case (None, true) =>
+        throw new IllegalArgumentException("Incremental training is enabled, but prior model is missing")
     }
   }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala
index 5b4a918c..f317bb21 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala
@@ -17,6 +17,7 @@ package com.linkedin.photon.ml.function.glm
 import com.linkedin.photon.ml.algorithm.Coordinate
 import com.linkedin.photon.ml.function.ObjectiveFunction
 import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
  * Helper for generalized linear model loss function related tasks.
@@ -28,21 +29,35 @@ object GLMLossFunction {
    *
    * @param lossFunction A [[PointwiseLossFunction]] for training a generalized linear model
    * @param treeAggregateDepth The tree-aggregate depth to use during aggregation
+   * @param config Optimization problem configuration
+   * @param isIncrementalTraining Is this an objective function for incremental training?
    * @return A function which builds the appropriate type of [[ObjectiveFunction]] for a given [[Coordinate]] type and
    *         optimization settings.
    */
-  def buildFactory
-      (lossFunction: PointwiseLossFunction, treeAggregateDepth: Int)
-      (config: CoordinateOptimizationConfiguration): Option[Int] => ObjectiveFunction =
-
+  def buildFactory(
+      lossFunction: PointwiseLossFunction,
+      treeAggregateDepth: Int)(
+      config: CoordinateOptimizationConfiguration,
+      isIncrementalTraining: Boolean = false): (Option[GeneralizedLinearModel], Option[Int]) => ObjectiveFunction =
     config match {
       case fEOptConfig: FixedEffectOptimizationConfiguration =>
-        (interceptIndexOpt: Option[Int]) =>
-          DistributedGLMLossFunction(fEOptConfig, lossFunction, treeAggregateDepth, interceptIndexOpt)
+        (generalizedLinearModelOpt: Option[GeneralizedLinearModel], interceptIndexOpt: Option[Int]) =>
+          DistributedGLMLossFunction(
+            fEOptConfig,
+            lossFunction,
+            treeAggregateDepth,
+            generalizedLinearModelOpt,
+            interceptIndexOpt,
+            isIncrementalTraining)
 
       case rEOptConfig: RandomEffectOptimizationConfiguration =>
-        (interceptIndexOpt: Option[Int]) =>
-          SingleNodeGLMLossFunction(rEOptConfig, lossFunction, interceptIndexOpt)
+        (generalizedLinearModelOpt: Option[GeneralizedLinearModel], interceptIndexOpt: Option[Int]) =>
+          SingleNodeGLMLossFunction(
+            rEOptConfig,
+            lossFunction,
+            generalizedLinearModelOpt,
+            interceptIndexOpt,
+            isIncrementalTraining)
 
       case _ =>
         throw new UnsupportedOperationException(
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
index eb45d3bb..3ce83018 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
@@ -18,9 +18,11 @@ import breeze.linalg._
 
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function._
+import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.RegularizationType
 import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
@@ -55,7 +57,7 @@ protected[ml] class SingleNodeGLMLossFunction private (singlePointLossFunction:
       input: Iterable[LabeledPoint],
       coefficients: Vector[Double],
       normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
-      calculate(input, coefficients, normalizationContext)._1
+    calculate(input, coefficients, normalizationContext)._1
 
   /**
    * Compute the gradient of the function over the given data for the given model coefficients.
@@ -144,26 +146,49 @@ object SingleNodeGLMLossFunction {
    *
    * @param configuration The optimization problem configuration
    * @param singleLossFunction The PointwiseLossFunction providing functionality for l(z, y)
+   * @param priorModelOpt Optional prior model, required if this is an objective function for incremental training
    * @param interceptIndexOpt The index of the intercept, if there is one
+   * @param isIncrementalTrainingEnabled Is this an objective function for incremental training?
    * @return A new SingleNodeGLMLossFunction
    */
   def apply(
       configuration: GLMOptimizationConfiguration,
       singleLossFunction: PointwiseLossFunction,
-      interceptIndexOpt: Option[Int] = None): SingleNodeGLMLossFunction = {
+      priorModelOpt: Option[GeneralizedLinearModel] = None,
+      interceptIndexOpt: Option[Int] = None,
+      isIncrementalTrainingEnabled: Boolean = false): SingleNodeGLMLossFunction = {
 
     val regularizationContext = configuration.regularizationContext
     val regularizationWeight = configuration.regularizationWeight
 
-    regularizationContext.regularizationType match {
-      case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
-        new SingleNodeGLMLossFunction(singleLossFunction) with L2RegularizationTwiceDiff {
-          l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+    (priorModelOpt, isIncrementalTrainingEnabled) match {
+      case (_, false) =>
+        regularizationContext.regularizationType match {
+          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
+            new SingleNodeGLMLossFunction(singleLossFunction) with L2RegularizationTwiceDiff {
 
-          override def interceptOpt: Option[Int] = interceptIndexOpt
+              l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+
+              override def interceptOpt: Option[Int] = interceptIndexOpt
+            }
+
+          case _ => new SingleNodeGLMLossFunction(singleLossFunction)
+        }
+
+      case (Some(priorModel), true) =>
+        val l1Weight = regularizationContext.getL1RegularizationWeight(regularizationWeight)
+        val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+        val priorModelCoefficients = priorModel.coefficients
+
+        new SingleNodeGLMLossFunction(singleLossFunction) with PriorDistributionTwiceDiff {
+          override val priorCoefficients: ModelCoefficients = priorModelCoefficients
+          l1RegWeight = l1Weight
+          l2RegWeight = l2Weight
         }
 
-      case _ => new SingleNodeGLMLossFunction(singleLossFunction)
+      case (None, true) =>
+        throw new IllegalArgumentException(
+          s"Incremental training is enabled, but prior model is missing")
     }
   }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala
index a67667d9..eaa6355b 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala
@@ -21,6 +21,7 @@ import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function.ObjectiveFunction
 import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
  * Implement Rennie's smoothed hinge loss function (http://qwone.com/~jason/writing/smoothHinge.pdf) as an
@@ -91,20 +92,22 @@ object SmoothedHingeLossFunction {
    * Construct a factory function for building distributed and non-distributed smoothed hinge loss functions.
    *
    * @param treeAggregateDepth The tree-aggregate depth to use during aggregation
+   * @param config Optimization problem configuration
+   * @param isIncrementalTraining Is this an objective function for incremental training?
    * @return A function which builds the appropriate type of [[ObjectiveFunction]] for a given [[Coordinate]] type and
    *         optimization settings.
    */
-  def buildFactory
-      (treeAggregateDepth: Int)
-      (config: CoordinateOptimizationConfiguration): Option[Int] => ObjectiveFunction =
-
+  def buildFactory(
+      treeAggregateDepth: Int)(
+      config: CoordinateOptimizationConfiguration,
+      isIncrementalTraining: Boolean = false): (Option[GeneralizedLinearModel], Option[Int]) => ObjectiveFunction =
     config match {
       case fEOptConfig: FixedEffectOptimizationConfiguration =>
-        (interceptIndexOpt: Option[Int]) =>
+        (_: Option[GeneralizedLinearModel], _: Option[Int]) =>
           DistributedSmoothedHingeLossFunction(fEOptConfig, treeAggregateDepth)
 
       case rEOptConfig: RandomEffectOptimizationConfiguration =>
-        (interceptIndexOpt: Option[Int]) =>
+        (_: Option[GeneralizedLinearModel], _: Option[Int]) =>
           SingleNodeSmoothedHingeLossFunction(rEOptConfig)
 
       case _ =>
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala
index feecc74c..6e3be671 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala
@@ -28,7 +28,7 @@ import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceCompu
 import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
 import com.linkedin.photon.ml.sampling.DownSampler
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.util.BroadcastWrapper
+import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
 import com.linkedin.photon.ml.util.Linalg.choleskyInverse
 
 /**
@@ -43,7 +43,7 @@ import com.linkedin.photon.ml.util.Linalg.choleskyInverse
  * @param regularizationContext The regularization context
  * @param varianceComputation If an how to compute coefficient variances
  */
-protected[ml] class DistributedOptimizationProblem[Objective <: DistributedObjectiveFunction] protected[optimization] (
+protected[ml] class DistributedOptimizationProblem[Objective <: DistributedObjectiveFunction] protected[optimization](
     optimizer: Optimizer[Objective],
     objectiveFunction: Objective,
     samplerOption: Option[DownSampler],
@@ -62,11 +62,13 @@ protected[ml] class DistributedOptimizationProblem[Objective <: DistributedObjec
    * @param regularizationWeight The new regularization weight
    */
   def updateRegularizationWeight(regularizationWeight: Double): Unit = {
+
     optimizer match {
       case owlqn: OWLQN =>
         owlqn.l1RegularizationWeight = regularizationContext.getL1RegularizationWeight(regularizationWeight)
       case _ =>
     }
+
     objectiveFunction match {
       case l2RegFunc: DistributedObjectiveFunction with L2Regularization =>
         l2RegFunc.l2RegularizationWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
@@ -87,10 +89,7 @@ protected[ml] class DistributedOptimizationProblem[Objective <: DistributedObjec
 
     val result = (objectiveFunction, varianceComputation) match {
       case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.SIMPLE) =>
-        Some(
-          twiceDiffFunc
-            .hessianDiagonal(input, broadcastCoefficients)
-            .map(v => 1.0 / math.max(v, MathConst.EPSILON)))
+        Some(VectorUtils.invertVector(twiceDiffFunc.hessianDiagonal(input, broadcastCoefficients)))
 
       case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.FULL) =>
         val hessianMatrix = twiceDiffFunc.hessianMatrix(input, broadcastCoefficients)
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala
index c5875a8b..58a17393 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala
@@ -24,7 +24,7 @@ import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
 import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.util.BroadcastWrapper
+import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
 import com.linkedin.photon.ml.util.Linalg.choleskyInverse
 
 /**
@@ -37,7 +37,7 @@ import com.linkedin.photon.ml.util.Linalg.choleskyInverse
  * @param glmConstructor The function to use for producing GLMs from trained coefficients
  * @param varianceComputationType If an how to compute coefficient variances
  */
-protected[ml] class SingleNodeOptimizationProblem[Objective <: SingleNodeObjectiveFunction] protected[optimization] (
+protected[ml] class SingleNodeOptimizationProblem[Objective <: SingleNodeObjectiveFunction] protected[optimization](
     optimizer: Optimizer[Objective],
     objectiveFunction: Objective,
     glmConstructor: Coefficients => GeneralizedLinearModel,
@@ -59,9 +59,7 @@ protected[ml] class SingleNodeOptimizationProblem[Objective <: SingleNodeObjecti
   override def computeVariances(input: Iterable[LabeledPoint], coefficients: Vector[Double]): Option[Vector[Double]] =
     (objectiveFunction, varianceComputationType) match {
       case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.SIMPLE) =>
-        Some(twiceDiffFunc
-          .hessianDiagonal(input, coefficients)
-          .map(v => 1.0 / math.max(v, MathConst.EPSILON)))
+        Some(VectorUtils.invertVector(twiceDiffFunc.hessianDiagonal(input, coefficients)))
 
       case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.FULL) =>
         val hessianMatrix = twiceDiffFunc.hessianMatrix(input, coefficients)
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
index bbc2cb92..40fb696a 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
@@ -20,10 +20,10 @@ import org.apache.spark.storage.StorageLevel
 
 import com.linkedin.photon.ml.Types.REId
 import com.linkedin.photon.ml.function.SingleNodeObjectiveFunction
-import com.linkedin.photon.ml.model.Coefficients
+import com.linkedin.photon.ml.model.{Coefficients, RandomEffectModel}
 import com.linkedin.photon.ml.normalization.NormalizationContext
-import com.linkedin.photon.ml.optimization.{SingleNodeOptimizationProblem, VarianceComputationType}
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
+import com.linkedin.photon.ml.optimization.{SingleNodeOptimizationProblem, VarianceComputationType}
 import com.linkedin.photon.ml.projector.LinearSubspaceProjector
 import com.linkedin.photon.ml.spark.RDDLike
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
@@ -134,34 +134,43 @@ protected[ml] class RandomEffectOptimizationProblem[Objective <: SingleNodeObjec
 object RandomEffectOptimizationProblem {
 
   /**
-   * Build a new [[RandomEffectOptimizationProblem]].
+   * Build a new [[RandomEffectOptimizationProblem]] to optimize.
    *
    * @tparam RandomEffectObjective The type of objective function used to solve individual random effect optimization
    *                               problems
    * @param linearSubspaceProjectorsRDD The per-entity [[LinearSubspaceProjector]] objects used to compress the
    *                                    per-entity feature spaces
    * @param configuration The optimization problem configuration
-   * @param objectiveFunctionFactory The objective function to optimize
+   * @param objectiveFunctionFactory Factory for the objective function
    * @param glmConstructor The function to use for producing GLMs from trained coefficients
    * @param normalizationContext The normalization context
    * @param varianceComputationType If and how coefficient variances should be computed
    * @param interceptIndexOpt The option of intercept index
-   * @return A new [[RandomEffectOptimizationProblem]] object
+   * @return A new [[RandomEffectOptimizationProblem]]
    */
-  def apply[RandomEffectObjective <: SingleNodeObjectiveFunction](
+  protected[ml] def apply[RandomEffectObjective <: SingleNodeObjectiveFunction](
       linearSubspaceProjectorsRDD: RDD[(REId, LinearSubspaceProjector)],
       configuration: RandomEffectOptimizationConfiguration,
-      objectiveFunctionFactory: Option[Int] => RandomEffectObjective,
+      objectiveFunctionFactory: (Option[GeneralizedLinearModel], Option[Int]) => RandomEffectObjective,
+      priorRandomEffectModelOpt: Option[RandomEffectModel],
       glmConstructor: Coefficients => GeneralizedLinearModel,
       normalizationContext: NormalizationContext,
       varianceComputationType: VarianceComputationType = VarianceComputationType.NONE,
       interceptIndexOpt: Option[Int]): RandomEffectOptimizationProblem[RandomEffectObjective] = {
 
+    val sc = linearSubspaceProjectorsRDD.sparkContext
+    val configurationBroadcast = sc.broadcast(configuration)
+    val objectiveFunctionBuilderBroadcast = sc.broadcast(objectiveFunctionFactory)
+    val glmConstructorBroadcast = sc.broadcast(glmConstructor)
+    val normalizationContextBroadcast = sc.broadcast(normalizationContext)
+
     // Generate new NormalizationContext and SingleNodeOptimizationProblem objects
     val optimizationProblems = linearSubspaceProjectorsRDD
-      .mapValues { projector =>
-        val factors = normalizationContext.factorsOpt.map(factors => projector.projectForward(factors))
-        val shiftsAndIntercept = normalizationContext
+      .leftOuterJoin(priorRandomEffectModelOpt.map(_.modelsRDD).getOrElse(sc.emptyRDD[(REId, GeneralizedLinearModel)]))
+      .mapValues { case (projector: LinearSubspaceProjector, priorModelOpt: Option[GeneralizedLinearModel]) =>
+        val normContext = normalizationContextBroadcast.value
+        val factors = normContext.factorsOpt.map(factors => projector.projectForward(factors))
+        val shiftsAndIntercept = normContext
           .shiftsAndInterceptOpt
           .map { case (shifts, intercept) =>
             val newShifts = projector.projectForward(shifts)
@@ -170,15 +179,15 @@ object RandomEffectOptimizationProblem {
             (newShifts, newIntercept)
           }
         val projectedNormalizationContext = new NormalizationContext(factors, shiftsAndIntercept)
+        val objectiveFunctionBuilder = objectiveFunctionBuilderBroadcast.value
         val projectedInterceptOpt = interceptIndexOpt.map { interceptIndex =>
           projector.originalToProjectedSpaceMap(interceptIndex)
         }
 
-        // TODO: Broadcast arguments to SingleNodeOptimizationProblem?
         SingleNodeOptimizationProblem(
-          configuration,
-          objectiveFunctionFactory(projectedInterceptOpt),
-          glmConstructor,
+          configurationBroadcast.value,
+          objectiveFunctionBuilder(priorModelOpt, projectedInterceptOpt),
+          glmConstructorBroadcast.value,
           PhotonNonBroadcast(projectedNormalizationContext),
           varianceComputationType)
       }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala
index c6447291..d91ad859 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala
@@ -23,6 +23,7 @@ import com.linkedin.photon.ml.function.glm.DistributedGLMLossFunction
 import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
 import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
 import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType}
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
  * Unit tests for [[ObjectiveFunctionHelper]].
@@ -48,15 +49,21 @@ class ObjectiveFunctionHelperTest {
   @Test(dataProvider = "trainingTaskProvider")
   def testBuildFactory(trainingTask: TaskType): Unit = {
 
-    val objectiveFunction =
-      ObjectiveFunctionHelper.buildFactory(trainingTask, TREE_AGGREGATE_DEPTH)(COORDINATE_OPT_CONFIG)
+    val objectiveFunction = ObjectiveFunctionHelper.buildFactory(
+      trainingTask,
+      TREE_AGGREGATE_DEPTH)(
+      COORDINATE_OPT_CONFIG,
+      ENABLE_INCREMENTAL_TRAINING)
 
     trainingTask match {
       case TaskType.LOGISTIC_REGRESSION | TaskType.LINEAR_REGRESSION | TaskType.POISSON_REGRESSION =>
-        assertTrue(objectiveFunction.isInstanceOf[Option[Int] => DistributedGLMLossFunction])
+        assertTrue(
+          objectiveFunction.isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedGLMLossFunction])
 
       case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
-        assertTrue(objectiveFunction.isInstanceOf[Option[Int] => DistributedSmoothedHingeLossFunction])
+        assertTrue(
+          objectiveFunction
+            .isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedSmoothedHingeLossFunction])
     }
   }
 }
@@ -64,6 +71,7 @@ class ObjectiveFunctionHelperTest {
 object ObjectiveFunctionHelperTest {
 
   val COORDINATE_OPT_CONFIG = FixedEffectOptimizationConfiguration(OptimizerConfig(OptimizerType.LBFGS, 1, 2e-2))
+  val ENABLE_INCREMENTAL_TRAINING = false
   val MAXIMUM_ITERATIONS = 1
   val TOLERANCE = 2e-2
   val TREE_AGGREGATE_DEPTH = 3
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala
index 648ef3be..4905b355 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala
@@ -20,6 +20,7 @@ import org.testng.annotations.{DataProvider, Test}
 import com.linkedin.photon.ml.function.ObjectiveFunction
 import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType}
 import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
  * Unit tests for [[GLMLossFunction]].
@@ -47,15 +48,16 @@ class GLMLossFunctionTest {
   @Test(dataProvider = "coordinateOptimizationProblemProvider")
   def testBuildFactory(coordinateOptConfig: CoordinateOptimizationConfiguration): Unit = {
 
-    val objectiveFunction =
-      GLMLossFunction.buildFactory(LOSS_FUNCTION, TREE_AGGREGATE_DEPTH)(coordinateOptConfig)
+    val objectiveFunction = GLMLossFunction.buildFactory(LOSS_FUNCTION, TREE_AGGREGATE_DEPTH)(coordinateOptConfig)
 
     coordinateOptConfig match {
       case _: FixedEffectOptimizationConfiguration =>
-        assertTrue(objectiveFunction.isInstanceOf[Option[Int] => DistributedGLMLossFunction])
+        assertTrue(
+          objectiveFunction.isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedGLMLossFunction])
 
       case _: RandomEffectOptimizationConfiguration =>
-        assertTrue(objectiveFunction.isInstanceOf[Option[Int] => SingleNodeGLMLossFunction])
+        assertTrue(
+          objectiveFunction.isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => SingleNodeGLMLossFunction])
 
       case _ =>
         assertTrue(false)
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala
index 1bb15fff..9d4eda16 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala
@@ -20,6 +20,7 @@ import org.testng.annotations.{DataProvider, Test}
 import com.linkedin.photon.ml.function.ObjectiveFunction
 import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType}
 import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
  * Unit tests for [[SmoothedHingeLossFunction]].
@@ -51,10 +52,14 @@ class SmoothedHingeLossFunctionTest {
 
     coordinateOptConfig match {
       case _: FixedEffectOptimizationConfiguration =>
-        assertTrue(objectiveFunctionFactory.isInstanceOf[Option[Int] => DistributedSmoothedHingeLossFunction])
+        assertTrue(
+          objectiveFunctionFactory
+            .isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedSmoothedHingeLossFunction])
 
       case _: RandomEffectOptimizationConfiguration =>
-        assertTrue(objectiveFunctionFactory.isInstanceOf[Option[Int] => SingleNodeSmoothedHingeLossFunction])
+        assertTrue(
+          objectiveFunctionFactory
+            .isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => SingleNodeSmoothedHingeLossFunction])
 
       case _ =>
         assertTrue(false)
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala
index 2deb5862..2317b86f 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala
@@ -314,7 +314,7 @@ trait GameTestUtils extends SparkTestUtils {
       seed)
 
     val optimizationProblem = generateRandomEffectOptimizationProblem(randomEffectDataset)
-    val coordinate = new RandomEffectCoordinate[SingleNodeGLMLossFunction](randomEffectDataset, optimizationProblem)
+    val coordinate = new RandomEffectCoordinate(randomEffectDataset, optimizationProblem)
     val models = sc.parallelize(generateLinearModelsForRandomEffects(randomEffectIds, dimensions))
     val model = new RandomEffectModel(
       models,
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
index 94ca91e1..6755a4e0 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
@@ -168,8 +168,12 @@ object GameTrainingDriver extends GameDriver {
 
   val ignoreThresholdForNewModels: Param[Boolean] = ParamUtils.createParam[Boolean](
     "ignore threshold for new models",
-    "Flag to ignore the random effect samples lower bound when encountering a random effect ID without an existing " +
-      "model during warm-start training.")
+    "Flag to ignore the random effect samples lower bound when encountering a random effect ID without an " +
+      "existing model during warm-start training.")
+
+  val incrementalTraining: Param[Boolean] = ParamUtils.createParam[Boolean](
+    "incremental training",
+    "Flag to enable incremental training.")
 
   //
   // Initialize object
@@ -216,6 +220,7 @@ object GameTrainingDriver extends GameDriver {
     setDefault(modelSparsityThreshold, VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
     setDefault(timeZone, Constants.DEFAULT_TIME_ZONE)
     setDefault(ignoreThresholdForNewModels, false)
+    setDefault(incrementalTraining, false)
   }
 
   /**
@@ -245,11 +250,7 @@ object GameTrainingDriver extends GameDriver {
     val normalizationType = paramMap.getOrElse(normalization, getDefault(normalization).get)
     val hyperParameterTuningMode = paramMap.getOrElse(hyperParameterTuning, getDefault(hyperParameterTuning).get)
     val ignoreThreshold = paramMap.getOrElse(ignoreThresholdForNewModels, getDefault(ignoreThresholdForNewModels).get)
-
-    // Warm-start must be enabled to ignore threshold
-    require(
-      !ignoreThreshold || baseModelDirOpt.isDefined,
-      "'Ignore threshold for new models' flag set but no initial model provided for warm-start")
+    val isIncrementalTraining = paramMap.getOrElse(incrementalTraining, getDefault(incrementalTraining).get)
 
     // Partial retraining and warm-start training require an initial GAME model to be provided as input
     val coordinatesToTrain = (baseModelDirOpt, retrainModelCoordsOpt) match {
@@ -330,6 +331,16 @@ object GameTrainingDriver extends GameDriver {
 
       case _ =>
     }
+
+    // Warm-start must be enabled to ignore threshold
+    require(
+      !ignoreThreshold || baseModelDirOpt.isDefined,
+      s"'${ignoreThresholdForNewModels.name}' set but no initial model provided (warm-start not enabled).")
+
+    // Warm-start must be enabled to ignore threshold
+    require(
+      !isIncrementalTraining || baseModelDirOpt.isDefined,
+      s"'${incrementalTraining.name}' set but no initial model provided.")
   }
 
   //
@@ -458,6 +469,7 @@ object GameTrainingDriver extends GameDriver {
         .setVarianceComputation(getOrDefault(varianceComputationType))
         .setIgnoreThresholdForNewModels(getOrDefault(ignoreThresholdForNewModels))
         .setUseWarmStart(true)
+        .setIncrementalTraining(getOrDefault(incrementalTraining))
 
       get(inputColumnNames).foreach(estimator.setInputColumnNames)
       modelOpt.foreach(estimator.setInitialModel)
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
index a900ae23..52995bfc 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
@@ -68,7 +68,7 @@ object AvroUtils {
 
     val minPartitionsPerPath = math.ceil(1.0 * minPartitions / inputPaths.length).toInt
 
-    sc.union(inputPaths.map { path => readAvroFilesInDir[GenericRecord](sc, path, minPartitionsPerPath) } )
+    sc.union(inputPaths.map { path => readAvroFilesInDir[GenericRecord](sc, path, minPartitionsPerPath) })
   }
 
   /**
@@ -251,8 +251,10 @@ object AvroUtils {
    * @return The nameAndTerm parsed from the Avro record
    */
   protected[avro] def readNameAndTermFromGenericRecord(record: GenericRecord): NameAndTerm = {
+
     val name = Utils.getStringAvro(record, AvroFieldNames.NAME)
     val term = Utils.getStringAvro(record, AvroFieldNames.TERM, isNullOK = true)
+
     NameAndTerm(name, term)
   }
 
@@ -269,6 +271,7 @@ object AvroUtils {
     genericRecords
       .flatMap {
         _.get(featureSectionKey) match {
+
           case recordList: JList[_] =>
             recordList.asScala.map {
               case record: GenericRecord =>
@@ -278,8 +281,8 @@ object AvroUtils {
                 throw new IllegalArgumentException(
                   s"$any in features list is not a record. It needs to be an Avro record containingg a name and term for " +
                     s"each feature.")
-
             }
+
           case _ =>
             throw new IllegalArgumentException(
               s"$featureSectionKey is not a list (and might be null). It needs to be a list of Avro records containing a " +
@@ -422,7 +425,7 @@ object AvroUtils {
    * @return The (effectId, latentFactor) pair converted from the input Avro record
    */
   protected[avro] def convertLatentFactorAvroToLatentFactor(
-      latentFactorAvro: LatentFactorAvro): (String, Vector[Double]) = {
+    latentFactorAvro: LatentFactorAvro): (String, Vector[Double]) = {
 
     val effectId = latentFactorAvro.getEffectId.toString
     val latentFactor = new DenseVector[Double](latentFactorAvro.getLatentFactor.toArray().map(_.asInstanceOf[Double]))
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
new file mode 100644
index 00000000..f28a71a1
--- /dev/null
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2019 LinkedIn Corp. All rights reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package com.linkedin.photon.ml.function
+
+import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, sum}
+import breeze.numerics.{abs, sqrt}
+
+import com.linkedin.photon.ml.normalization.NormalizationContext
+import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
+import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
+
+/**
+ * Trait for an incremental training objective function. It is assumed that the prior is a product of Gaussian and
+ * Laplace distributions. The L1 regularization weight refers to the relative weight of the Laplace prior. The L2
+ * regularization weight refers to the relative weight of the Gaussian prior.
+ */
+trait PriorDistribution extends ObjectiveFunction {
+
+  val priorCoefficients: ModelCoefficients = ModelCoefficients(DenseVector.zeros(1))
+
+  lazy protected val priorMeans: Vector[Double] = priorCoefficients.means
+  lazy protected val priorVariances: Vector[Double] = priorCoefficients.variancesOption.get
+  lazy protected val inversePriorVariances: DenseVector[Double] = VectorUtils.invertVector(priorVariances).toDenseVector
+  protected var l1RegWeight: Double = 0D
+  protected var l2RegWeight: Double = 0D
+
+  require(l1RegWeight >= 0D, s"Invalid regularization weight '$l1RegWeight")
+  require(l2RegWeight >= 0D, s"Invalid regularization weight '$l2RegWeight")
+
+  /**
+   * Getter for the Laplace weight of the prior.
+   *
+   * @return The L1 regularization weight
+   */
+  def l1RegularizationWeight: Double = l1RegWeight
+
+  /**
+   * Getter for the Gaussian weight of the prior.
+   *
+   * @return The L2 regularization weight
+   */
+  def l2RegularizationWeight: Double = l2RegWeight
+
+  /**
+   * Compute the value of the function over the given data for the given model coefficients, with regularization towards
+   * the prior coefficients.
+   *
+   * @param input The data over which to compute the objective function value
+   * @param coefficients The model coefficients for which to compute the objective function's value
+   * @param normalizationContext The normalization context
+   * @return The value of the objective function and regularization terms
+   */
+  abstract override protected[ml] def value(
+      input: Data,
+      coefficients: Coefficients,
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
+    super.value(input, coefficients, normalizationContext) +
+      l1RegValue(convertToVector(coefficients)) +
+      l2RegValue(convertToVector(coefficients))
+
+  /**
+   * Compute the Laplace regularization term for the given model coefficients.
+   *
+   * @param coefficients The model coefficients
+   * @return The Laplace regularization term value
+   */
+  protected def l1RegValue(coefficients: Vector[Double]): Double = {
+
+    val normalizedCoefficients = (coefficients - priorMeans) :/ sqrt(priorVariances)
+
+    l1RegWeight * sum(abs(normalizedCoefficients))
+  }
+
+  /**
+   * Compute the Gaussian regularization term for the given model coefficients.
+   *
+   * @param coefficients The model coefficients
+   * @return The Gaussian regularization term value
+   */
+  protected def l2RegValue(coefficients: Vector[Double]): Double = {
+
+    val normalizedCoefficients = (coefficients - priorMeans) :/ sqrt(priorVariances)
+
+    l2RegWeight * normalizedCoefficients.dot(normalizedCoefficients) / 2
+  }
+}
+
+trait PriorDistributionDiff extends DiffFunction with PriorDistribution {
+
+  /**
+   * Compute the value of the function over the given data for the given model coefficients, with regularization towards
+   * the prior coefficients.
+   *
+   * @param input The data over which to compute the objective function value
+   * @param coefficients The model coefficients for which to compute the objective function's value
+   * @param normalizationContext The normalization context
+   * @return The value of the objective function and regularization terms
+   */
+  abstract override protected[ml] def value(
+      input: Data,
+      coefficients: Coefficients,
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
+    calculate(input, coefficients, normalizationContext)._1
+
+  /**
+   * Compute the gradient of the function over the given data for the given model coefficients, with regularization
+   * towards the prior coefficients.
+   *
+   * @param input The data over which to compute the objective function gradient
+   * @param coefficients The model coefficients for which to compute the objective function's gradient
+   * @param normalizationContext The normalization context
+   * @return The gradient of the objective function and regularization terms
+   */
+  abstract override protected[ml] def gradient(
+      input: Data,
+      coefficients: Coefficients,
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
+    calculate(input, coefficients, normalizationContext)._2
+
+  /**
+   * Compute both the value and the gradient of the function over the given data for the given model coefficients, with
+   * regularization towards the prior coefficients (computing value and gradient at once is more efficient than
+   * computing them sequentially).
+   *
+   * @param input The data over which to compute the objective function value and gradient
+   * @param coefficients The model coefficients for which to compute the objective function's value and gradient
+   * @param normalizationContext The normalization context
+   * @return The value and gradient of the objective function and regularization terms
+   */
+  abstract override protected[ml] def calculate(
+      input: Data,
+      coefficients: Coefficients,
+      normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) = {
+
+    val (baseValue, baseGradient) = super.calculate(input, coefficients, normalizationContext)
+    val valueWithRegularization = baseValue + l1RegValue(convertToVector(coefficients)) +
+      l2RegValue(convertToVector(coefficients))
+    val gradientWithRegularization = baseGradient + l1RegGradient(convertToVector(coefficients)) +
+      l2RegGradient(convertToVector(coefficients))
+
+    (valueWithRegularization, gradientWithRegularization)
+  }
+
+  /**
+   * Compute the gradient of the Laplace term for the given model coefficients.
+   *
+   * @param coefficients The model coefficients
+   * @return The gradient of the Laplace regularization term
+   */
+  protected def l1RegGradient(coefficients: Vector[Double]): Vector[Double] = {
+
+    val coefficientsMask = (coefficients - priorMeans).map(coefficient => if (coefficient > 0) 1.0 else -1.0)
+
+    l1RegWeight * (coefficientsMask :/ sqrt(priorVariances))
+  }
+
+  /**
+   * Compute the gradient of the Gaussian regularization term for the given model coefficients.
+   *
+   * @param coefficients The model coefficients
+   * @return The gradient of the Gaussian regularization term
+   */
+  protected def l2RegGradient(coefficients: Vector[Double]): Vector[Double] = {
+
+    val normalizedCoefficients = (coefficients - priorMeans) :/ priorVariances
+
+    l2RegWeight * normalizedCoefficients
+  }
+}
+
+trait PriorDistributionTwiceDiff extends TwiceDiffFunction with PriorDistributionDiff {
+
+  /**
+   * Compute the Hessian diagonal of the objective function over the given data for the given model coefficients, * the
+   * gradient direction, with regularization towards the prior coefficients.
+   *
+   * @param input The data over which to compute the Hessian diagonal * gradient direction
+   * @param coefficients The model coefficients for which to compute the objective function's Hessian diagonal
+   *                     * gradient direction
+   * @param multiplyVector The gradient direction vector
+   * @param normalizationContext The normalization context
+   * @return The Hessian diagonal (multiplied by the gradient direction) of the objective function and regularization
+   *         terms
+   */
+  abstract override protected[ml] def hessianVector(
+      input: Data,
+      coefficients: Coefficients,
+      multiplyVector: Coefficients,
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
+    super.hessianVector(input, coefficients, multiplyVector, normalizationContext) +
+      l2RegHessianVector(convertToVector(multiplyVector))
+
+  /**
+   * Compute the Hessian diagonal of the objective function over the given data for the given model coefficients, with
+   * regularization towards the prior coefficients.
+   *
+   * @param input The data over which to compute the Hessian diagonal
+   * @param coefficients The model coefficients for which to compute the objective function's Hessian diagonal
+   * @return The Hessian diagonal of the objective function and regularization terms
+   */
+  abstract override protected[ml] def hessianDiagonal(input: Data, coefficients: Coefficients): Vector[Double] =
+    super.hessianDiagonal(input, coefficients) :+ l2RegHessianDiagonal
+
+  /**
+   * Compute the Hessian matrix of the objective function over the given data for the given model coefficients, with
+   * regularization towards the prior coefficients.
+   *
+   * @param input The data over which to compute the Hessian matrix
+   * @param coefficients The model coefficients for which to compute the objective function's Hessian matrix
+   * @return The Hessian matrix of the objective function and regularization terms
+   */
+  abstract override protected[ml] def hessianMatrix(input: Data, coefficients: Coefficients): DenseMatrix[Double] =
+    super.hessianMatrix(input, coefficients) + l2RegHessianMatrix
+
+  /**
+   * Compute the Hessian diagonal * gradient direction of the Gaussian regularization term for the given model
+   * coefficients.
+   *
+   * @param multiplyVector The gradient direction vector
+   * @return The Hessian diagonal of the Gaussian regularization term, with gradient direction vector
+   */
+  protected def l2RegHessianVector(multiplyVector: Vector[Double]): Vector[Double] =
+    l2RegWeight * (multiplyVector /:/ priorVariances)
+
+  /**
+   * Compute the Hessian diagonal of the Gaussian regularization term for the given model coefficients.
+   *
+   * @return The Hessian diagonal of the Gaussian regularization term
+   */
+  protected def l2RegHessianDiagonal: Vector[Double] = l2RegWeight * inversePriorVariances
+
+  /**
+   * Compute the Hessian matrix of the Gaussian regularization term for the given model coefficients.
+   *
+   * @return The Hessian matrix of the Gaussian regularization term
+   */
+  protected def l2RegHessianMatrix: DenseMatrix[Double] = l2RegWeight * diag(inversePriorVariances)
+}
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
index 38dc40c1..d88fa97a 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
@@ -14,7 +14,7 @@
  */
 package com.linkedin.photon.ml.model
 
-import breeze.linalg.{DenseVector, SparseVector, Vector, norm}
+import breeze.linalg.{Vector, norm}
 import breeze.stats.meanAndVariance
 
 import com.linkedin.photon.ml.constants.MathConst
@@ -33,13 +33,16 @@ case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Do
 
   // Force means and variances to be of the same type (dense or sparse). This seems reasonable
   // and greatly reduces the number of combinations to check in unit testing.
-  require(variancesOption.isEmpty || variancesOption.get.getClass == means.getClass,
+  require(
+    variancesOption.isEmpty || variancesOption.get.getClass == means.getClass,
     "Coefficients: If variances are provided, must be of the same vector type as means")
   // GAME over if variances are given but don't have the same length as the vector of means
-  require(variancesOption.isEmpty || variancesOption.get.length == means.length,
+  require(
+    variancesOption.isEmpty || variancesOption.get.length == means.length,
     "Coefficients: Means and variances have different lengths")
 
   def length: Int = means.length
+
   lazy val meansL2Norm: Double = norm(means, 2)
   lazy val variancesL2NormOption: Option[Double] = variancesOption.map(variances => norm(variances, 2))
 
@@ -51,6 +54,7 @@ case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Do
    * @return The score
    */
   def computeScore(features: Vector[Double]): Double = {
+
     require(
       means.length == features.length,
       s"Coefficients length (${means.length}) != features length (${features.length})")
@@ -64,6 +68,7 @@ case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Do
    * @return A summary of the object in string representation
    */
   override def toSummaryString: String = {
+
     val sb = new StringBuilder()
     val isDense = means.getClass.getName.contains("Dense")
     val meanAndVar = meanAndVariance(means)
@@ -100,22 +105,22 @@ case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Do
    * @param that The other Coefficients to compare to
    * @return True if the Coefficients are equal, false otherwise
    */
-  override def equals(that: Any): Boolean =
-    that match {
-      case other: Coefficients =>
-        val (m1, v1, m2, v2) = (this.means, this.variancesOption, other.means, other.variancesOption)
-        val sameType = m1.getClass == m2.getClass && v1.map(_.getClass) == v2.map(_.getClass)
-        lazy val sameMeans = VectorUtils.areAlmostEqual(m1, m2)
-        lazy val sameVariance = (v1, v2) match {
-          case (None, None) => true
-          case (Some(val1), Some(val2)) => VectorUtils.areAlmostEqual(val1, val2)
-          case (_, _) => false
-        }
-
-        sameType && sameMeans && sameVariance
-
-      case _ => false
-    }
+  override def equals(that: Any): Boolean = that match {
+    case other: Coefficients =>
+      val (m1, v1, m2, v2) = (this.means, this.variancesOption, other.means, other.variancesOption)
+      val sameType = (m1.getClass == m2.getClass) && (v1.map(_.getClass) == v2.map(_.getClass))
+      lazy val sameMeans = VectorUtils.areAlmostEqual(m1, m2)
+      lazy val sameVariance = (v1, v2) match {
+        case (None, None) => true
+
+        case (Some(val1), Some(val2)) => VectorUtils.areAlmostEqual(val1, val2)
+        case (_, _) => false
+      }
+
+      sameType && sameMeans && sameVariance
+
+    case _ => false
+  }
 
   /**
    * Returns a hash code value for the object.
@@ -135,7 +140,6 @@ protected[ml] object Coefficients {
    * @param dimension Dimensionality of the coefficient vector
    * @return Zero coefficient vector
    */
-  def initializeZeroCoefficients(dimension: Int): Coefficients = {
+  def initializeZeroCoefficients(dimension: Int): Coefficients =
     Coefficients(Vector.zeros[Double](dimension), variancesOption = None)
-  }
 }
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/GameModel.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/GameModel.scala
index 417ba0e2..74fb89ba 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/GameModel.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/GameModel.scala
@@ -40,8 +40,17 @@ class GameModel (private val gameModels: Map[CoordinateId, DatumScoringModel]) e
   /**
    * Get a sub-model by name.
    *
-   * @param name The model name
-   * @return An [[Option]] containing the sub-model associated with `name` in the GAME model, or `None` if none exists.
+   * @throws NoSuchElementException if no sub-model with key [[name]] exists
+   * @param name The sub-model name
+   * @return The sub-model associated with [[name]] in the GAME model
+   */
+  def apply(name: CoordinateId): DatumScoringModel = gameModels(name)
+
+  /**
+   * Get a sub-model by name.
+   *
+   * @param name The sub-model name
+   * @return [[Some]] sub-model associated with [[name]] in the GAME model, or [[None]] if none exists.
    */
   def getModel(name: CoordinateId): Option[DatumScoringModel] = gameModels.get(name)
 
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/util/MathUtils.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/util/MathUtils.scala
index 313b561f..eae9ad8b 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/util/MathUtils.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/util/MathUtils.scala
@@ -63,4 +63,14 @@ object MathUtils {
    * @return True if x1 is greater than x2, false otherwise
    */
   def greaterThan(x1: Double, x2: Double): Boolean = x1 > x2
+
+  /**
+   * Compute the symmetrical difference of two sets (i.e. A ∆ B = (A ⋃ B) - (A ⋂ B))
+   *
+   * @tparam T Some type
+   * @param a The first set
+   * @param b The second set
+   * @return A set containing of elements that are in the first set or the second set but not both sets
+   */
+  def symmetricDifference[T](a: Set[T], b: Set[T]): Set[T] = a.diff(b).union(b.diff(a))
 }
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala
index 24cbde2d..d7121f3c 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala
@@ -20,6 +20,8 @@ import breeze.linalg.{DenseVector, SparseVector, Vector}
 import org.apache.spark.ml.linalg.{DenseVector => SparkMLDenseVector, SparseVector => SparkMLSparseVector, Vector => SparkMLVector}
 import org.apache.spark.mllib.linalg.{DenseVector => SparkDenseVector, SparseVector => SparkSparseVector, Vector => SparkVector}
 
+import com.linkedin.photon.ml.constants.MathConst
+
 /**
  * A utility object that contains operations to create, copy, compare, and convert [[Vector]] objects.
  */
@@ -284,4 +286,12 @@ object VectorUtils {
 
       set
   }
+
+  /**
+   * Element-wise inversion of a [[Vector]].
+   *
+   * @param vector The [[Vector]] to invert
+   * @return The inverted [[Vector]]
+   */
+  def invertVector(vector: Vector[Double]): Vector[Double] = vector.map(v => 1.0 / math.max(v, MathConst.EPSILON))
 }
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
new file mode 100644
index 00000000..8721b6f2
--- /dev/null
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2018 LinkedIn Corp. All rights reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package com.linkedin.photon.ml.function
+
+import breeze.linalg.{DenseVector, diag}
+import org.testng.annotations.Test
+import org.testng.Assert.assertEquals
+import org.mockito.Mockito.mock
+
+import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
+import com.linkedin.photon.ml.normalization.NormalizationContext
+import com.linkedin.photon.ml.util.BroadcastWrapper
+
+/**
+ * Unit tests for [[PriorDistribution]], [[PriorDistributionDiff]], and [[PriorDistributionTwiceDiff]].
+ */
+class PriorDistributionTest {
+
+  import L2RegularizationTest._
+
+  private val DIMENSION = 4
+
+  /**
+   * Test that the prior distribution mixin traits can correctly modify the existing behaviour of an objective function.
+   */
+  @Test
+  def testAll(): Unit = {
+
+    val mockNormalization = mock(classOf[BroadcastWrapper[NormalizationContext]])
+
+    val coefficients = DenseVector.ones[Double](DIMENSION)
+    val priorMean = coefficients :* 2D
+    val multiplyVector = coefficients * 3D
+    val priorVar = coefficients :* 4D
+
+    val l1Weight = 10D
+    val l2Weight = 10D
+
+    val mockObjectiveFunction = new MockObjectiveFunction with PriorDistributionTwiceDiff {
+      override val priorCoefficients = ModelCoefficients(priorMean, Option(priorVar))
+      l1RegWeight = l1Weight
+      l2RegWeight = l2Weight
+    }
+
+    // Assume that coefficients = 1-vector, prior mean = 2-vector, multiply = 3-vector, prior variance = 4-vector for all expected values below
+    val expectedValue = MockObjectiveFunction.VALUE + 0.25 * l2Weight * DIMENSION / 2 + 0.5 * l1Weight * DIMENSION
+    val expectedGradient = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.GRADIENT +
+      (-0.25) * l2Weight +
+      (-0.5) * l1Weight))
+    val expectedVector = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_VECTOR +
+      0.75 * l2Weight))
+    val expectedDiagonal = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_DIAGONAL +
+      0.25 * l2Weight))
+    val expectedMatrix =
+      diag(DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_MATRIX + 0.25 * l2Weight)))
+
+    assertEquals(mockObjectiveFunction.value(Unit, coefficients, mockNormalization), expectedValue)
+    assertEquals(mockObjectiveFunction.gradient(Unit, coefficients, mockNormalization), expectedGradient)
+    assertEquals(
+      mockObjectiveFunction.hessianVector(Unit, coefficients, multiplyVector, mockNormalization),
+      expectedVector)
+    assertEquals(mockObjectiveFunction.hessianDiagonal(Unit, coefficients), expectedDiagonal)
+    assertEquals(mockObjectiveFunction.hessianMatrix(Unit, coefficients), expectedMatrix)
+  }
+}
\ No newline at end of file

From d5d9ac9d2c5f199cbb55c8713abb3d8685c61922 Mon Sep 17 00:00:00 2001
From: Yunbo Ouyang <youyang@linkedin.com>
Date: Mon, 28 Oct 2019 19:03:35 -0700
Subject: [PATCH 2/7] Add calculation details in unit tests and fix integ test
 failure

---
 .../algorithm/CoordinateFactoryIntegTest.scala   |  3 ++-
 .../game/RandomEffectOptimizationProblem.scala   |  5 +++++
 .../photon/ml/function/PriorDistribution.scala   | 16 +++++++++++-----
 .../ml/function/PriorDistributionTest.scala      | 14 +++++++++++---
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala
index c969f180..6f47fd50 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/CoordinateFactoryIntegTest.scala
@@ -84,6 +84,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
     val priorModelOpt: Option[RandomEffectModel] = None
 
     doReturn(sc).when(mockDataset).sparkContext
+    doReturn(sc).when(mockProjectorsRDD).sparkContext
     doReturn(mockDataRDD).when(mockDataset).activeData
     doReturn(mockDataRDD)
       .when(mockDataRDD)
@@ -145,7 +146,7 @@ object CoordinateFactoryIntegTest {
   private val INTERCEPT_INDEX = None
 
   private val OPTIMIZER_CONFIG = OptimizerConfig(OPTIMIZER_TYPE, MAX_ITER, TOLERANCE)
-  private val MOCK_NORMALIZATION = mock(classOf[NormalizationContext])
+  private val MOCK_NORMALIZATION = mock(classOf[NormalizationContext], withSettings().serializable())
   private val GLM_CONSTRUCTOR = LogisticRegressionModel.apply _
   private val LOSS_FUNCTION_FACTORY = ObjectiveFunctionHelper.buildFactory(TRAINING_TASK, TREE_AGGREGATE_DEPTH)
   private val DOWN_SAMPLER_FACTORY = DownSamplerHelper.buildFactory(TRAINING_TASK)
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
index 40fb696a..7fa29586 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
@@ -192,6 +192,11 @@ object RandomEffectOptimizationProblem {
           varianceComputationType)
       }
 
+    configurationBroadcast.unpersist()
+    objectiveFunctionBuilderBroadcast.unpersist()
+    glmConstructorBroadcast.unpersist()
+    normalizationContextBroadcast.unpersist()
+
     new RandomEffectOptimizationProblem(optimizationProblems, glmConstructor)
   }
 }
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
index f28a71a1..c894e2d5 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
@@ -71,7 +71,8 @@ trait PriorDistribution extends ObjectiveFunction {
       l2RegValue(convertToVector(coefficients))
 
   /**
-   * Compute the Laplace regularization term for the given model coefficients.
+   * Compute the Laplace regularization term for the given model coefficients. L1 regularization term is
+   * l1RegWeight * sum(abs(coefficients - priorMeans) :/ sqrt(priorVariance)).
    *
    * @param coefficients The model coefficients
    * @return The Laplace regularization term value
@@ -84,7 +85,8 @@ trait PriorDistribution extends ObjectiveFunction {
   }
 
   /**
-   * Compute the Gaussian regularization term for the given model coefficients.
+   * Compute the Gaussian regularization term for the given model coefficients. L2 regularization term is
+   * l2RegWeight * sum(pow(coefficients - priorMeans, 2) :/ priorVariance) / 2.
    *
    * @param coefficients The model coefficients
    * @return The Gaussian regularization term value
@@ -154,7 +156,9 @@ trait PriorDistributionDiff extends DiffFunction with PriorDistribution {
   }
 
   /**
-   * Compute the gradient of the Laplace term for the given model coefficients.
+   * Compute the gradient of the Laplace term for the given model coefficients. Gradient is
+   * l1RegWeight :/ sqrt(priorVariance) if coefficients >= priorMeans;
+   * - l1RegWeight :/ sqrt(priorVariance) if coefficients < priorMeans.
    *
    * @param coefficients The model coefficients
    * @return The gradient of the Laplace regularization term
@@ -167,7 +171,8 @@ trait PriorDistributionDiff extends DiffFunction with PriorDistribution {
   }
 
   /**
-   * Compute the gradient of the Gaussian regularization term for the given model coefficients.
+   * Compute the gradient of the Gaussian regularization term for the given model coefficients. Gradient is
+   * l2RegWeight * (coefficients - priorMeans) :/ priorVariance.
    *
    * @param coefficients The model coefficients
    * @return The gradient of the Gaussian regularization term
@@ -235,7 +240,8 @@ trait PriorDistributionTwiceDiff extends TwiceDiffFunction with PriorDistributio
     l2RegWeight * (multiplyVector /:/ priorVariances)
 
   /**
-   * Compute the Hessian diagonal of the Gaussian regularization term for the given model coefficients.
+   * Compute the Hessian diagonal of the Gaussian regularization term for the given model coefficients. Hessian
+   * diagonal is l2RegWeight :/ priorVariance.
    *
    * @return The Hessian diagonal of the Gaussian regularization term
    */
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
index 8721b6f2..3fa12e0b 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright 2018 LinkedIn Corp. All rights reserved.
+ * Copyright 2019 LinkedIn Corp. All rights reserved.
  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  * not use this file except in compliance with the License. You may obtain a
  * copy of the License at
@@ -54,7 +54,15 @@ class PriorDistributionTest {
       l2RegWeight = l2Weight
     }
 
-    // Assume that coefficients = 1-vector, prior mean = 2-vector, multiply = 3-vector, prior variance = 4-vector for all expected values below
+    /**
+     * Assume that coefficients = 1-vector, prior mean = 2-vector, multiply = 3-vector, prior variance = 4-vector for all expected values below
+     * l2RegValue = pow(1 - 2, 2) / 4 * l2Weight * DIMENSION / 2 = 0.25 * l2Weight * DIMENSION / 2;
+     * l1RegValue = abs(1 - 2) / 2 * l2Weight * DIMENSION = 0.5 * l2Weight * DIMENSION;
+     * l2RegGradient = (1 - 2) / 4 * l2Weight = (-0.25) * l2Weight;
+     * l1RegGradient = -1 / 2 * l1Weight = (-0.5) * l1Weight;
+     * l2RegHessianDiagonal = 1 / 4 * l2Weight = 0.25 * l2Weight;
+     * l2RegHessianVector = 3 / 4 * l2Weight = 0.75 * l2Weight.
+     */
     val expectedValue = MockObjectiveFunction.VALUE + 0.25 * l2Weight * DIMENSION / 2 + 0.5 * l1Weight * DIMENSION
     val expectedGradient = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.GRADIENT +
       (-0.25) * l2Weight +
@@ -74,4 +82,4 @@ class PriorDistributionTest {
     assertEquals(mockObjectiveFunction.hessianDiagonal(Unit, coefficients), expectedDiagonal)
     assertEquals(mockObjectiveFunction.hessianMatrix(Unit, coefficients), expectedMatrix)
   }
-}
\ No newline at end of file
+}

From eecae47ef9857b02c4ceb92c40b7c4f8d17c96e1 Mon Sep 17 00:00:00 2001
From: Yunbo Ouyang <youyang@linkedin.com>
Date: Tue, 12 Nov 2019 18:32:50 -0800
Subject: [PATCH 3/7] Revise incremental training code to test via GAME
 training template

---
 .../photon/ml/estimators/GameEstimator.scala  |  8 ++-
 .../glm/SingleNodeGLMLossFunction.scala       | 28 +++++-----
 .../RandomEffectOptimizationProblem.scala     | 13 ++++-
 .../game/training/GameTrainingDriver.scala    |  4 +-
 .../photon/ml/data/avro/AvroUtils.scala       | 54 +++++++++++++------
 .../ml/data/avro/ModelProcessingUtils.scala   |  2 -
 .../ScoptGameTrainingParametersParser.scala   |  6 ++-
 .../ml/function/PriorDistribution.scala       | 10 ++--
 .../photon/ml/model/Coefficients.scala        |  5 --
 .../photon/ml/model/CoefficientsTest.scala    | 13 -----
 10 files changed, 76 insertions(+), 67 deletions(-)

diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
index 2990fb8e..8155f90b 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
@@ -318,10 +318,8 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
 
         updateSequence
 
-      case (true, Some(_), _) =>
-        throw new InvalidParameterException(
-          "Both incremental training and partial model re-training enabled; these two training options are mutually " +
-            "exclusive")
+      case (true, Some(_), None) =>
+        throw new InvalidParameterException("No initial model is provided when partial retraining is turned on.")
 
       case (false, None, _) =>
         updateSequence
@@ -329,7 +327,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
       case (false, Some(_), None) =>
         throw new InvalidParameterException("Partial model re-training is enabled but no initial model provided.")
 
-      case (false, Some(lockedModelCoords), Some(initModel)) =>
+      case (_, Some(lockedModelCoords), Some(initModel)) =>
 
         val newCoordinates = updateSequence.filterNot(lockedModelCoords.contains)
 
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
index 3ce83018..95ab0764 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
@@ -162,19 +162,6 @@ object SingleNodeGLMLossFunction {
     val regularizationWeight = configuration.regularizationWeight
 
     (priorModelOpt, isIncrementalTrainingEnabled) match {
-      case (_, false) =>
-        regularizationContext.regularizationType match {
-          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
-            new SingleNodeGLMLossFunction(singleLossFunction) with L2RegularizationTwiceDiff {
-
-              l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
-
-              override def interceptOpt: Option[Int] = interceptIndexOpt
-            }
-
-          case _ => new SingleNodeGLMLossFunction(singleLossFunction)
-        }
-
       case (Some(priorModel), true) =>
         val l1Weight = regularizationContext.getL1RegularizationWeight(regularizationWeight)
         val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
@@ -186,9 +173,18 @@ object SingleNodeGLMLossFunction {
           l2RegWeight = l2Weight
         }
 
-      case (None, true) =>
-        throw new IllegalArgumentException(
-          s"Incremental training is enabled, but prior model is missing")
+      case _ =>
+        regularizationContext.regularizationType match {
+          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
+            new SingleNodeGLMLossFunction(singleLossFunction) with L2RegularizationTwiceDiff {
+
+              l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+
+              override def interceptOpt: Option[Int] = interceptIndexOpt
+            }
+
+          case _ => new SingleNodeGLMLossFunction(singleLossFunction)
+        }
     }
   }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
index 7fa29586..ded62d05 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/game/RandomEffectOptimizationProblem.scala
@@ -184,9 +184,20 @@ object RandomEffectOptimizationProblem {
           projector.originalToProjectedSpaceMap(interceptIndex)
         }
 
+        // Project prior model coefficients
+        val projectedPriorModelOpt = priorModelOpt.map{
+          model =>
+            val oldCoefficients = model.coefficients
+            val newCoefficients = Coefficients(
+              projector.projectForward(oldCoefficients.means),
+              oldCoefficients.variancesOption.map(projector.projectForward))
+
+            model.updateCoefficients(newCoefficients)
+        }
+
         SingleNodeOptimizationProblem(
           configurationBroadcast.value,
-          objectiveFunctionBuilder(priorModelOpt, projectedInterceptOpt),
+          objectiveFunctionBuilder(projectedPriorModelOpt, projectedInterceptOpt),
           glmConstructorBroadcast.value,
           PhotonNonBroadcast(projectedNormalizationContext),
           varianceComputationType)
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
index 6755a4e0..16e860f4 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
@@ -337,7 +337,7 @@ object GameTrainingDriver extends GameDriver {
       !ignoreThreshold || baseModelDirOpt.isDefined,
       s"'${ignoreThresholdForNewModels.name}' set but no initial model provided (warm-start not enabled).")
 
-    // Warm-start must be enabled to ignore threshold
+    // If incremental training is enabled, prior model must be defined.
     require(
       !isIncrementalTraining || baseModelDirOpt.isDefined,
       s"'${incrementalTraining.name}' set but no initial model provided.")
@@ -386,7 +386,7 @@ object GameTrainingDriver extends GameDriver {
     validationData.map(_.persist(StorageLevel.DISK_ONLY))
 
     val modelOpt = get(modelInputDirectory).map { modelDir =>
-      Timed("Load model for warm-start training") {
+      Timed("Load model for warm-start training / incremental learning") {
         ModelProcessingUtils.loadGameModelFromHDFS(
           sc,
           modelDir,
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
index 52995bfc..aa30e107 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
@@ -367,27 +367,17 @@ object AvroUtils {
       featureMap: IndexMap): GeneralizedLinearModel = {
 
     val meansAvros = bayesianLinearModelAvro.getMeans
+    val variancesAvros = bayesianLinearModelAvro.getVariances
     val modelClass = bayesianLinearModelAvro.getModelClass.toString
-    val indexAndValueArrayBuffer = new mutable.ArrayBuffer[(Int, Double)]
 
-    val iterator = meansAvros.iterator()
-    while (iterator.hasNext) {
-      val feature = iterator.next()
-      val name = feature.getName.toString
-      val term = feature.getTerm.toString
-      val featureKey = Utils.getFeatureKey(name, term)
-      if (featureMap.contains(featureKey)) {
-        val value = feature.getValue
-        val index = featureMap.getOrElse(featureKey,
-          throw new NoSuchElementException(s"nameAndTerm $featureKey not found in the feature map"))
-        indexAndValueArrayBuffer += ((index, value))
-      }
+    val means = convertNameTermValueAvroList(meansAvros, featureMap)
+    val coefficients = if (variancesAvros == null) {
+      Coefficients(means)
+    } else {
+      val variances = convertNameTermValueAvroList(variancesAvros, featureMap)
+      Coefficients(means, Some(variances))
     }
 
-    val length = featureMap.featureDimension
-    val coefficients = Coefficients(
-      VectorUtils.toVector(indexAndValueArrayBuffer.toArray, length))
-
     // Load and instantiate the model
     try {
       Class.forName(modelClass)
@@ -402,6 +392,36 @@ object AvroUtils {
     }
   }
 
+  /**
+   * Convert the NameTermValueAvro List of the type [[JList[NameTermValue]]] to Breeze vector of type [[Vector[Double]]].
+   *
+   * @param nameTermValueAvroList List of the type [[JList[NameTermValue]]]
+   * @param featureMap The map from feature name of type [[NameAndTerm]] to feature index of type [[Int]]
+   * @return Breeze vector of type [[Vector[Double]]]
+   */
+  protected[avro] def convertNameTermValueAvroList(
+    nameTermValueAvroList: JList[NameTermValueAvro],
+    featureMap: IndexMap): Vector[Double] = {
+
+    val iterator = nameTermValueAvroList.iterator()
+    val indexAndValueArrayBuffer = new mutable.ArrayBuffer[(Int, Double)]
+    val length = featureMap.featureDimension
+
+    while (iterator.hasNext) {
+      val feature = iterator.next()
+      val name = feature.getName.toString
+      val term = feature.getTerm.toString
+      val featureKey = Utils.getFeatureKey(name, term)
+      if (featureMap.contains(featureKey)) {
+        val value = feature.getValue
+        val index = featureMap.getOrElse(featureKey,
+          throw new NoSuchElementException(s"nameAndTerm $featureKey not found in the feature map"))
+        indexAndValueArrayBuffer += ((index, value))
+      }
+    }
+    VectorUtils.toVector(indexAndValueArrayBuffer.toArray, length)
+  }
+
   /**
    * Convert the latent factor of type [[Vector[Double]]] to Avro record of type [[LatentFactorAvro]].
    *
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala
index 7c03c199..cc1b3aad 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala
@@ -323,8 +323,6 @@ object ModelProcessingUtils {
   /**
    * Load a single GLM from HDFS.
    *
-   * TODO: Currently only the means of the coefficients are loaded, the variances are discarded
-   *
    * @param inputDir The directory from which to load the model
    * @param indexMap A feature to index map
    * @param sc The Spark Context
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/io/scopt/game/ScoptGameTrainingParametersParser.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/io/scopt/game/ScoptGameTrainingParametersParser.scala
index 35599903..dc3e5069 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/io/scopt/game/ScoptGameTrainingParametersParser.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/io/scopt/game/ScoptGameTrainingParametersParser.scala
@@ -164,7 +164,11 @@ object ScoptGameTrainingParametersParser extends ScoptGameParametersParser {
 
       // Ignore Threshold for New Models
       ScoptParameter[Boolean, Boolean](
-        GameTrainingDriver.ignoreThresholdForNewModels))
+        GameTrainingDriver.ignoreThresholdForNewModels),
+
+      // Incremental training
+      ScoptParameter[Boolean, Boolean](
+        GameTrainingDriver.incrementalTraining))
 
   override protected val parser: OptionParser[ParamMap] = new OptionParser[ParamMap]("GAME-Training") {
 
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
index c894e2d5..8ddae430 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
@@ -79,7 +79,7 @@ trait PriorDistribution extends ObjectiveFunction {
    */
   protected def l1RegValue(coefficients: Vector[Double]): Double = {
 
-    val normalizedCoefficients = (coefficients - priorMeans) :/ sqrt(priorVariances)
+    val normalizedCoefficients = (coefficients - priorMeans) *:* sqrt(inversePriorVariances)
 
     l1RegWeight * sum(abs(normalizedCoefficients))
   }
@@ -93,7 +93,7 @@ trait PriorDistribution extends ObjectiveFunction {
    */
   protected def l2RegValue(coefficients: Vector[Double]): Double = {
 
-    val normalizedCoefficients = (coefficients - priorMeans) :/ sqrt(priorVariances)
+    val normalizedCoefficients = (coefficients - priorMeans) *:* sqrt(inversePriorVariances)
 
     l2RegWeight * normalizedCoefficients.dot(normalizedCoefficients) / 2
   }
@@ -167,7 +167,7 @@ trait PriorDistributionDiff extends DiffFunction with PriorDistribution {
 
     val coefficientsMask = (coefficients - priorMeans).map(coefficient => if (coefficient > 0) 1.0 else -1.0)
 
-    l1RegWeight * (coefficientsMask :/ sqrt(priorVariances))
+    l1RegWeight * (coefficientsMask *:* sqrt(inversePriorVariances))
   }
 
   /**
@@ -179,7 +179,7 @@ trait PriorDistributionDiff extends DiffFunction with PriorDistribution {
    */
   protected def l2RegGradient(coefficients: Vector[Double]): Vector[Double] = {
 
-    val normalizedCoefficients = (coefficients - priorMeans) :/ priorVariances
+    val normalizedCoefficients = (coefficients - priorMeans) *:* inversePriorVariances
 
     l2RegWeight * normalizedCoefficients
   }
@@ -237,7 +237,7 @@ trait PriorDistributionTwiceDiff extends TwiceDiffFunction with PriorDistributio
    * @return The Hessian diagonal of the Gaussian regularization term, with gradient direction vector
    */
   protected def l2RegHessianVector(multiplyVector: Vector[Double]): Vector[Double] =
-    l2RegWeight * (multiplyVector /:/ priorVariances)
+    l2RegWeight * (multiplyVector *:* inversePriorVariances)
 
   /**
    * Compute the Hessian diagonal of the Gaussian regularization term for the given model coefficients. Hessian
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
index d88fa97a..84219d7b 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
@@ -31,11 +31,6 @@ import com.linkedin.photon.ml.util.{MathUtils, Summarizable, VectorUtils}
 case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Double]] = None)
   extends Summarizable {
 
-  // Force means and variances to be of the same type (dense or sparse). This seems reasonable
-  // and greatly reduces the number of combinations to check in unit testing.
-  require(
-    variancesOption.isEmpty || variancesOption.get.getClass == means.getClass,
-    "Coefficients: If variances are provided, must be of the same vector type as means")
   // GAME over if variances are given but don't have the same length as the vector of means
   require(
     variancesOption.isEmpty || variancesOption.get.length == means.length,
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala
index 3982cd59..3299789b 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala
@@ -27,19 +27,6 @@ class CoefficientsTest {
 
   import CoefficientsTest._
 
-  @DataProvider(name = "invalidVectorProvider")
-  def makeInvalidVectors(): Array[Array[Vector[Double]]] =
-    Array(
-      Array(dense(0,0,3,0), sparse(4)(0,2)(0,3)),
-      Array(sparse(4)(0,2)(0,3), dense(0,0,3,0)),
-      Array(dense(1,2,3), dense(1,2)),
-      Array(sparse(2)(1,3)(0,2), sparse(3)(4,5)(0,2))
-    )
-
-  @Test(dataProvider = "invalidVectorProvider", expectedExceptions = Array(classOf[IllegalArgumentException]))
-  def testPreconditions(v1: Vector[Double], v2: Vector[Double]): Unit =
-    new Coefficients(v1, Some(v2))
-
   @Test
   def testEquals(): Unit = {
 

From 1d0090267bc46b9cd94eb51b04fd7005eb3525d8 Mon Sep 17 00:00:00 2001
From: Yunbo Ouyang <youyang@linkedin.com>
Date: Fri, 10 Jan 2020 10:08:40 -0800
Subject: [PATCH 4/7] Enable Autotune for Incremental Training and Fix Subspace
 projection bugs

---
 .../data/RandomEffectDatasetIntegTest.scala   |  4 +-
 .../ml/algorithm/RandomEffectCoordinate.scala |  4 +-
 .../photon/ml/data/RandomEffectDataset.scala  | 24 +++++++++--
 .../photon/ml/estimators/GameEstimator.scala  | 40 ++++++++++++++-----
 .../game/training/GameTrainingDriver.scala    | 17 ++++++--
 .../ml/function/PriorDistribution.scala       |  6 +--
 .../ml/function/PriorDistributionTest.scala   |  5 ++-
 7 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/data/RandomEffectDatasetIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/data/RandomEffectDatasetIntegTest.scala
index a95579eb..77416c00 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/data/RandomEffectDatasetIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/data/RandomEffectDatasetIntegTest.scala
@@ -184,7 +184,7 @@ class RandomEffectDatasetIntegTest extends SparkTestUtils {
     val partitioner = new RandomEffectDatasetPartitioner(NUM_PARTITIONS, sc.broadcast(partitionMap))
 
     val projectorsMap = RandomEffectDataset
-      .generateLinearSubspaceProjectors(keyedGameDatasetRDD, partitioner)
+      .generateLinearSubspaceProjectors(keyedGameDatasetRDD, partitioner, None)
       .collect
       .toMap
 
@@ -381,6 +381,7 @@ class RandomEffectDatasetIntegTest extends SparkTestUtils {
       NUM_PARTITIONS)
     val randomEffectDataset = RandomEffectDataset(
       dataRDD,
+      None,
       randomEffectDataConfig,
       rePartitioner,
       None,
@@ -440,6 +441,7 @@ class RandomEffectDatasetIntegTest extends SparkTestUtils {
       Some(activeDataLowerBound))
     val randomEffectDataset = RandomEffectDataset(
       dataRDD,
+      None,
       randomEffectDataConfig,
       rePartitioner,
       None,
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
index 7efcc5b0..df9a4e9c 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
@@ -252,7 +252,7 @@ object RandomEffectCoordinate {
             case (localModel, _) =>
               (localModel, None)
           }
-        modelsAndTrackers.persist(StorageLevel.MEMORY_ONLY_SER)
+        modelsAndTrackers.persist(StorageLevel.MEMORY_AND_DISK_SER)
 
         val models = modelsAndTrackers.mapValues(_._1)
         val optimizationTracker = RandomEffectOptimizationTracker(modelsAndTrackers.flatMap(_._2._2))
@@ -267,7 +267,7 @@ object RandomEffectCoordinate {
 
           (newModel, stateTrackers)
         }
-        modelsAndTrackers.persist(StorageLevel.MEMORY_ONLY_SER)
+        modelsAndTrackers.persist(StorageLevel.MEMORY_AND_DISK_SER)
 
         val models = modelsAndTrackers.mapValues(_._1)
         val optimizationTracker = RandomEffectOptimizationTracker(modelsAndTrackers.map(_._2._2))
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/data/RandomEffectDataset.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/data/RandomEffectDataset.scala
index 179d2439..e7dc8558 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/data/RandomEffectDataset.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/data/RandomEffectDataset.scala
@@ -25,8 +25,10 @@ import org.apache.spark.{Partitioner, SparkContext}
 
 import com.linkedin.photon.ml.Types.{FeatureShardId, REId, REType, UniqueSampleId}
 import com.linkedin.photon.ml.data.scoring.CoordinateDataScores
+import com.linkedin.photon.ml.model.RandomEffectModel
 import com.linkedin.photon.ml.projector.LinearSubspaceProjector
 import com.linkedin.photon.ml.spark.{BroadcastLike, RDDLike}
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.util.VectorUtils
 
 /**
@@ -254,6 +256,7 @@ object RandomEffectDataset {
    */
   def apply(
       gameDataset: RDD[(UniqueSampleId, GameDatum)],
+      priorRandomEffectModelOpt: Option[RandomEffectModel],
       randomEffectDataConfiguration: RandomEffectDataConfiguration,
       randomEffectPartitioner: RandomEffectDatasetPartitioner,
       existingModelKeysRddOpt: Option[RDD[REId]],
@@ -268,7 +271,7 @@ object RandomEffectDataset {
     val keyedGameDataset = generateKeyedGameDataset(gameDataset, randomEffectDataConfiguration)
     keyedGameDataset.persist(StorageLevel.MEMORY_ONLY_SER).count
 
-    val projectors = generateLinearSubspaceProjectors(keyedGameDataset, randomEffectPartitioner)
+    val projectors = generateLinearSubspaceProjectors(keyedGameDataset, randomEffectPartitioner, priorRandomEffectModelOpt)
     projectors.persist(storageLevel).count
 
     val projectedKeyedGameDataset = generateProjectedDataset(keyedGameDataset, projectors, randomEffectPartitioner)
@@ -372,7 +375,8 @@ object RandomEffectDataset {
    */
   protected[data] def generateLinearSubspaceProjectors(
       keyedGameDataset: RDD[(REId, (UniqueSampleId, LabeledPoint))],
-      randomEffectPartitioner: RandomEffectDatasetPartitioner): RDD[(REId, LinearSubspaceProjector)] = {
+      randomEffectPartitioner: RandomEffectDatasetPartitioner,
+      priorRandomEffectModelOpt: Option[RandomEffectModel]): RDD[(REId, LinearSubspaceProjector)] = {
 
     val originalSpaceDimension = keyedGameDataset
       .take(1)
@@ -382,12 +386,26 @@ object RandomEffectDataset {
       .features
       .length
 
-    keyedGameDataset
+    val dataProjectors = keyedGameDataset
       .mapValues { case (_, labeledPoint) =>
         VectorUtils.getActiveIndices(labeledPoint.features)
       }
       .foldByKey(mutable.Set[Int](), randomEffectPartitioner)(_.union(_))
       .mapValues(activeIndices => new LinearSubspaceProjector(activeIndices.toSet, originalSpaceDimension))
+
+    val sc = dataProjectors.sparkContext
+    dataProjectors
+      .leftOuterJoin(priorRandomEffectModelOpt.map(_.modelsRDD).getOrElse(sc.emptyRDD[(REId, GeneralizedLinearModel)]))
+      .mapValues { case (projector: LinearSubspaceProjector, priorModelOpt: Option[GeneralizedLinearModel]) =>
+        val activeCoef = priorModelOpt.map {
+            model =>
+              val means = model.coefficients.means
+              VectorUtils.getActiveIndices(means)
+        }.getOrElse(Set[Int]())
+        val projectedKeySet = projector.originalToProjectedSpaceMap.keySet
+
+        new LinearSubspaceProjector(activeCoef.union(projectedKeySet).toSet, originalSpaceDimension)
+      }
   }
 
   /**
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
index 8155f90b..b431d3e9 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
@@ -546,6 +546,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
       gameDataset: RDD[(UniqueSampleId, GameDatum)]): Map[CoordinateId, D forSome {type D <: Dataset[D]}] = {
 
     val coordinateDataConfigs = getRequiredParam(coordinateDataConfigurations)
+    val isIncrementalTraining = getOrDefault(incrementalTraining)
 
     coordinateDataConfigs.map { case (coordinateId, config) =>
 
@@ -582,12 +583,32 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
             None
           }
 
-          val randomEffectDataset = RandomEffectDataset(
-            gameDataset,
-            reConfig,
-            rePartitioner,
-            existingModelKeysRddOpt,
-            StorageLevel.DISK_ONLY)
+          val randomEffectDataset = if(isIncrementalTraining) {
+            val reModels = getRequiredParam(initialModel).getModel(coordinateId).map {
+              case rem: RandomEffectModel =>
+                rem
+
+              case other =>
+                throw new IllegalArgumentException(
+                  s"Model type mismatch: expected Random Effect Model but found '${other.getClass}'")
+            }
+
+            RandomEffectDataset(
+              gameDataset,
+              reModels,
+              reConfig,
+              rePartitioner,
+              existingModelKeysRddOpt,
+              StorageLevel.DISK_ONLY)
+          } else {
+            RandomEffectDataset(
+              gameDataset,
+              None,
+              reConfig,
+              rePartitioner,
+              existingModelKeysRddOpt,
+              StorageLevel.DISK_ONLY)
+          }
           randomEffectDataset.setName(s"Random Effect Data Set: $coordinateId")
 
           if (logger.isDebugEnabled) {
@@ -740,9 +761,10 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
             }
 
           } else {
-            val priorModelOpt = initialModelOpt match {
-              case Some(gameModel) => gameModel.getModel(coordinateId)
-              case None => None
+            val priorModelOpt = if (getOrDefault(incrementalTraining)) {
+              Some(get(initialModel).get(coordinateId))
+            } else {
+              None
             }
 
             CoordinateFactory.build(
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
index 16e860f4..6751a2f2 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/cli/game/training/GameTrainingDriver.scala
@@ -40,9 +40,9 @@ import com.linkedin.photon.ml.io.scopt.game.ScoptGameTrainingParametersParser
 import com.linkedin.photon.ml.model.{DatumScoringModel, FixedEffectModel, RandomEffectModel}
 import com.linkedin.photon.ml.normalization.NormalizationType.NormalizationType
 import com.linkedin.photon.ml.normalization.{NormalizationContext, NormalizationType}
-import com.linkedin.photon.ml.optimization.VarianceComputationType
+import com.linkedin.photon.ml.optimization.{RegularizationType, VarianceComputationType}
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
-import com.linkedin.photon.ml.optimization.game.CoordinateOptimizationConfiguration
+import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, GLMOptimizationConfiguration}
 import com.linkedin.photon.ml.stat.FeatureDataStatistics
 import com.linkedin.photon.ml.util.Implicits._
 import com.linkedin.photon.ml.util.Utils
@@ -672,7 +672,18 @@ object GameTrainingDriver extends GameDriver {
         val (_, baseConfig, evaluationResults) = models.head
 
         val iteration = getOrDefault(hyperParameterTuningIter)
-        val dimension = baseConfig.toSeq.length
+
+        val dimension = baseConfig.toSeq.map {
+          case (_, config: GLMOptimizationConfiguration) =>
+            config.regularizationContext.regularizationType match {
+              case RegularizationType.ELASTIC_NET => 2
+              case RegularizationType.L2 => 1
+              case RegularizationType.L1 => 1
+              case RegularizationType.NONE => 0
+            }
+          case _ => throw new IllegalArgumentException(s"Unknown optimization config!")
+        }.sum
+
         val mode = getOrDefault(hyperParameterTuning)
 
         val evaluator = evaluationResults.get.primaryEvaluator
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
index 8ddae430..02709a2d 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
@@ -16,10 +16,10 @@ package com.linkedin.photon.ml.function
 
 import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, sum}
 import breeze.numerics.{abs, sqrt}
-
+import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
-import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
+import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
  * Trait for an incremental training objective function. It is assumed that the prior is a product of Gaussian and
@@ -32,7 +32,7 @@ trait PriorDistribution extends ObjectiveFunction {
 
   lazy protected val priorMeans: Vector[Double] = priorCoefficients.means
   lazy protected val priorVariances: Vector[Double] = priorCoefficients.variancesOption.get
-  lazy protected val inversePriorVariances: DenseVector[Double] = VectorUtils.invertVector(priorVariances).toDenseVector
+  lazy protected val inversePriorVariances: DenseVector[Double] = priorVariances.map(v => if (v > MathConst.EPSILON) 1.0 / v else 1.0).toDenseVector
   protected var l1RegWeight: Double = 0D
   protected var l2RegWeight: Double = 0D
 
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
index 3fa12e0b..58abdd94 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
@@ -56,8 +56,9 @@ class PriorDistributionTest {
 
     /**
      * Assume that coefficients = 1-vector, prior mean = 2-vector, multiply = 3-vector, prior variance = 4-vector for all expected values below
-     * l2RegValue = pow(1 - 2, 2) / 4 * l2Weight * DIMENSION / 2 = 0.25 * l2Weight * DIMENSION / 2;
-     * l1RegValue = abs(1 - 2) / 2 * l2Weight * DIMENSION = 0.5 * l2Weight * DIMENSION;
+     *
+     * l2RegValue = sum(DenseVector.fill(DIMENSION){pow(1 - 2, 2) / 4)}) * l2Weight / 2 = 0.25 * l2Weight * DIMENSION / 2;
+     * l1RegValue = sum(DenseVector.fill(DIMENSION){abs(1 - 2) / 2}) * l1Weight = 0.5 * l1Weight * DIMENSION;
      * l2RegGradient = (1 - 2) / 4 * l2Weight = (-0.25) * l2Weight;
      * l1RegGradient = -1 / 2 * l1Weight = (-0.5) * l1Weight;
      * l2RegHessianDiagonal = 1 / 4 * l2Weight = 0.25 * l2Weight;

From d67bb74d02e405ca6df1f0c70d2558ec4af29f62 Mon Sep 17 00:00:00 2001
From: Yunbo Ouyang <youyang@linkedin.com>
Date: Wed, 22 Jan 2020 18:19:36 -0800
Subject: [PATCH 5/7] Remove L1 regularization

---
 .../photon/ml/estimators/GameEstimator.scala  |  7 +--
 .../glm/DistributedGLMLossFunction.scala      |  2 -
 .../glm/SingleNodeGLMLossFunction.scala       |  2 -
 .../ml/function/PriorDistribution.scala       | 49 ++-----------------
 .../ml/function/PriorDistributionTest.scala   | 19 ++-----
 5 files changed, 13 insertions(+), 66 deletions(-)

diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
index b431d3e9..e5806b77 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
@@ -437,7 +437,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
 
     // Train GAME models on training data
     val results = Timed("Training models:") {
-      var prevGameModel: Option[GameModel] = if (getOrDefault(useWarmStart)) {
+      var prevGameModel: Option[GameModel] = if (getOrDefault(useWarmStart) || getOrDefault(incrementalTraining)) {
         get(initialModel)
       } else {
         None
@@ -762,7 +762,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
 
           } else {
             val priorModelOpt = if (getOrDefault(incrementalTraining)) {
-              Some(get(initialModel).get(coordinateId))
+              Some(initialModelOpt.get(coordinateId))
             } else {
               None
             }
@@ -784,7 +784,8 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
         }
         .toMap
 
-    val result = coordinateDescent.run(coordinates, initialModelOpt.map(_.toMap))
+    val warmStartModelOpt = if (getOrDefault(useWarmStart)) initialModelOpt else None
+    val result = coordinateDescent.run(coordinates, warmStartModelOpt.map(_.toMap))
 
     coordinates.foreach { case (_, coordinate) =>
       coordinate match {
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
index d4e81c3c..670f0881 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
@@ -189,13 +189,11 @@ object DistributedGLMLossFunction {
         }
 
       case (Some(priorModel), true) =>
-        val l1Weight = regularizationContext.getL1RegularizationWeight(regularizationWeight)
         val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
         val priorModelCoefficients = priorModel.coefficients
 
         new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth) with PriorDistributionTwiceDiff {
           override val priorCoefficients: ModelCoefficients = priorModelCoefficients
-          l1RegWeight = l1Weight
           l2RegWeight = l2Weight
         }
 
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
index 95ab0764..275c8591 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
@@ -163,13 +163,11 @@ object SingleNodeGLMLossFunction {
 
     (priorModelOpt, isIncrementalTrainingEnabled) match {
       case (Some(priorModel), true) =>
-        val l1Weight = regularizationContext.getL1RegularizationWeight(regularizationWeight)
         val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
         val priorModelCoefficients = priorModel.coefficients
 
         new SingleNodeGLMLossFunction(singleLossFunction) with PriorDistributionTwiceDiff {
           override val priorCoefficients: ModelCoefficients = priorModelCoefficients
-          l1RegWeight = l1Weight
           l2RegWeight = l2Weight
         }
 
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
index 02709a2d..934e9e99 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
@@ -14,8 +14,8 @@
  */
 package com.linkedin.photon.ml.function
 
-import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, sum}
-import breeze.numerics.{abs, sqrt}
+import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag}
+import breeze.numerics.sqrt
 import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
@@ -33,19 +33,10 @@ trait PriorDistribution extends ObjectiveFunction {
   lazy protected val priorMeans: Vector[Double] = priorCoefficients.means
   lazy protected val priorVariances: Vector[Double] = priorCoefficients.variancesOption.get
   lazy protected val inversePriorVariances: DenseVector[Double] = priorVariances.map(v => if (v > MathConst.EPSILON) 1.0 / v else 1.0).toDenseVector
-  protected var l1RegWeight: Double = 0D
   protected var l2RegWeight: Double = 0D
 
-  require(l1RegWeight >= 0D, s"Invalid regularization weight '$l1RegWeight")
   require(l2RegWeight >= 0D, s"Invalid regularization weight '$l2RegWeight")
 
-  /**
-   * Getter for the Laplace weight of the prior.
-   *
-   * @return The L1 regularization weight
-   */
-  def l1RegularizationWeight: Double = l1RegWeight
-
   /**
    * Getter for the Gaussian weight of the prior.
    *
@@ -67,23 +58,8 @@ trait PriorDistribution extends ObjectiveFunction {
       coefficients: Coefficients,
       normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
     super.value(input, coefficients, normalizationContext) +
-      l1RegValue(convertToVector(coefficients)) +
       l2RegValue(convertToVector(coefficients))
 
-  /**
-   * Compute the Laplace regularization term for the given model coefficients. L1 regularization term is
-   * l1RegWeight * sum(abs(coefficients - priorMeans) :/ sqrt(priorVariance)).
-   *
-   * @param coefficients The model coefficients
-   * @return The Laplace regularization term value
-   */
-  protected def l1RegValue(coefficients: Vector[Double]): Double = {
-
-    val normalizedCoefficients = (coefficients - priorMeans) *:* sqrt(inversePriorVariances)
-
-    l1RegWeight * sum(abs(normalizedCoefficients))
-  }
-
   /**
    * Compute the Gaussian regularization term for the given model coefficients. L2 regularization term is
    * l2RegWeight * sum(pow(coefficients - priorMeans, 2) :/ priorVariance) / 2.
@@ -147,29 +123,12 @@ trait PriorDistributionDiff extends DiffFunction with PriorDistribution {
       normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) = {
 
     val (baseValue, baseGradient) = super.calculate(input, coefficients, normalizationContext)
-    val valueWithRegularization = baseValue + l1RegValue(convertToVector(coefficients)) +
-      l2RegValue(convertToVector(coefficients))
-    val gradientWithRegularization = baseGradient + l1RegGradient(convertToVector(coefficients)) +
-      l2RegGradient(convertToVector(coefficients))
+    val valueWithRegularization = baseValue + l2RegValue(convertToVector(coefficients))
+    val gradientWithRegularization = baseGradient + l2RegGradient(convertToVector(coefficients))
 
     (valueWithRegularization, gradientWithRegularization)
   }
 
-  /**
-   * Compute the gradient of the Laplace term for the given model coefficients. Gradient is
-   * l1RegWeight :/ sqrt(priorVariance) if coefficients >= priorMeans;
-   * - l1RegWeight :/ sqrt(priorVariance) if coefficients < priorMeans.
-   *
-   * @param coefficients The model coefficients
-   * @return The gradient of the Laplace regularization term
-   */
-  protected def l1RegGradient(coefficients: Vector[Double]): Vector[Double] = {
-
-    val coefficientsMask = (coefficients - priorMeans).map(coefficient => if (coefficient > 0) 1.0 else -1.0)
-
-    l1RegWeight * (coefficientsMask *:* sqrt(inversePriorVariances))
-  }
-
   /**
    * Compute the gradient of the Gaussian regularization term for the given model coefficients. Gradient is
    * l2RegWeight * (coefficients - priorMeans) :/ priorVariance.
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
index 58abdd94..b68b1086 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
@@ -45,12 +45,10 @@ class PriorDistributionTest {
     val multiplyVector = coefficients * 3D
     val priorVar = coefficients :* 4D
 
-    val l1Weight = 10D
     val l2Weight = 10D
 
     val mockObjectiveFunction = new MockObjectiveFunction with PriorDistributionTwiceDiff {
       override val priorCoefficients = ModelCoefficients(priorMean, Option(priorVar))
-      l1RegWeight = l1Weight
       l2RegWeight = l2Weight
     }
 
@@ -58,22 +56,15 @@ class PriorDistributionTest {
      * Assume that coefficients = 1-vector, prior mean = 2-vector, multiply = 3-vector, prior variance = 4-vector for all expected values below
      *
      * l2RegValue = sum(DenseVector.fill(DIMENSION){pow(1 - 2, 2) / 4)}) * l2Weight / 2 = 0.25 * l2Weight * DIMENSION / 2;
-     * l1RegValue = sum(DenseVector.fill(DIMENSION){abs(1 - 2) / 2}) * l1Weight = 0.5 * l1Weight * DIMENSION;
      * l2RegGradient = (1 - 2) / 4 * l2Weight = (-0.25) * l2Weight;
-     * l1RegGradient = -1 / 2 * l1Weight = (-0.5) * l1Weight;
      * l2RegHessianDiagonal = 1 / 4 * l2Weight = 0.25 * l2Weight;
      * l2RegHessianVector = 3 / 4 * l2Weight = 0.75 * l2Weight.
      */
-    val expectedValue = MockObjectiveFunction.VALUE + 0.25 * l2Weight * DIMENSION / 2 + 0.5 * l1Weight * DIMENSION
-    val expectedGradient = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.GRADIENT +
-      (-0.25) * l2Weight +
-      (-0.5) * l1Weight))
-    val expectedVector = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_VECTOR +
-      0.75 * l2Weight))
-    val expectedDiagonal = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_DIAGONAL +
-      0.25 * l2Weight))
-    val expectedMatrix =
-      diag(DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_MATRIX + 0.25 * l2Weight)))
+    val expectedValue = MockObjectiveFunction.VALUE + 0.25 * l2Weight * DIMENSION / 2
+    val expectedGradient = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.GRADIENT + (-0.25) * l2Weight))
+    val expectedVector = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_VECTOR + 0.75 * l2Weight))
+    val expectedDiagonal = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_DIAGONAL + 0.25 * l2Weight))
+    val expectedMatrix = diag(DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_MATRIX + 0.25 * l2Weight)))
 
     assertEquals(mockObjectiveFunction.value(Unit, coefficients, mockNormalization), expectedValue)
     assertEquals(mockObjectiveFunction.gradient(Unit, coefficients, mockNormalization), expectedGradient)

From 73b475f0497e232dc4663d310c9e009101987442 Mon Sep 17 00:00:00 2001
From: Yunbo Ouyang <youyang@linkedin.com>
Date: Fri, 31 Jan 2020 11:34:43 -0800
Subject: [PATCH 6/7] Add full Hessian matrix calculation

---
 .../photon/ml/model/GameModelIntegTest.scala  |  524 +++----
 .../ml/model/RandomEffectModelIntegTest.scala |  232 ++--
 ...tributedOptimizationProblemIntegTest.scala |  824 +++++------
 ...ngleNodeOptimizationProblemIntegTest.scala |  620 ++++-----
 .../photon/ml/model/RandomEffectModel.scala   |    3 -
 .../DistributedOptimizationProblem.scala      |   16 +-
 ...GeneralizedLinearOptimizationProblem.scala |   13 +-
 .../SingleNodeOptimizationProblem.scala       |   14 +-
 .../projector/LinearSubspaceProjector.scala   |   50 +-
 .../DistributedOptimizationProblemTest.scala  |    5 +-
 ...ralizedLinearOptimizationProblemTest.scala |    6 +-
 .../SingleNodeOptimizationProblemTest.scala   |    5 +-
 .../BayesianLinearModelFullMatrixAvro.avsc    |   48 +
 .../main/avro/DoubleNameTermValueAvro.avsc    |   28 +
 .../avro/ModelProcessingUtilsIntegTest.scala  | 1214 ++++++++---------
 .../photon/ml/data/avro/AvroUtils.scala       |  139 +-
 .../ml/data/avro/ModelProcessingUtils.scala   |   20 +-
 .../photon/ml/data/avro/AvroUtilsTest.scala   |    8 +-
 .../ml/function/PriorDistribution.scala       |   25 +-
 .../photon/ml/model/Coefficients.scala        |   10 +-
 .../normalization/NormalizationContext.scala  |   19 +-
 .../linkedin/photon/ml/util/VectorUtils.scala |   90 +-
 .../ml/function/PriorDistributionTest.scala   |  154 +--
 .../photon/ml/model/CoefficientsTest.scala    |  184 +--
 24 files changed, 2286 insertions(+), 1965 deletions(-)
 create mode 100644 photon-avro-schemas/src/main/avro/BayesianLinearModelFullMatrixAvro.avsc
 create mode 100644 photon-avro-schemas/src/main/avro/DoubleNameTermValueAvro.avsc

diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/GameModelIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/GameModelIntegTest.scala
index abbce132..903c5fd3 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/GameModelIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/GameModelIntegTest.scala
@@ -1,262 +1,262 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.model
-
-import org.apache.spark.SparkContext
-import org.testng.Assert._
-import org.testng.annotations.Test
-
-import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.supervised.regression.PoissonRegressionModel
-import com.linkedin.photon.ml.test.SparkTestUtils
-
-/**
- * Integration tests for [[GameModel]].
- */
-class GameModelIntegTest extends SparkTestUtils {
-
-  /**
-   * Generate a toy fixed effect model.
-   *
-   * @param sc The Spark context
-   * @param coefficientDimension The dimension of the coefficients
-   * @return A fixed effect model
-   */
-  protected def getFixedEffectModel(sc: SparkContext, coefficientDimension: Int): FixedEffectModel = {
-
-    // Coefficients parameter
-    val glm: GeneralizedLinearModel =
-      LogisticRegressionModel(Coefficients.initializeZeroCoefficients(coefficientDimension))
-
-    // Meta data
-    val featureShardId = "featureShardId"
-
-    // Fixed effect model
-    new FixedEffectModel(sc.broadcast(glm), featureShardId)
-  }
-
-  /**
-   * Generate a toy random effect model.
-   *
-   * @param sc The Spark context
-   * @param coefficientDimension The dimension of the coefficients
-   * @return A random effect model
-   */
-  protected def getRandomEffectModel(sc: SparkContext, coefficientDimension: Int): RandomEffectModel = {
-
-    // Coefficients parameter
-    val glm: GeneralizedLinearModel =
-      LogisticRegressionModel(Coefficients.initializeZeroCoefficients(coefficientDimension))
-
-    // Meta data
-    val featureShardId = "featureShardId"
-    val REType = "REType"
-
-    // Random effect model
-    val numCoefficients = 5
-    val modelsRDD = sc.parallelize(Seq.tabulate(numCoefficients)(i => (i.toString, glm)))
-    new RandomEffectModel(modelsRDD, REType, featureShardId)
-  }
-
-  @Test
-  def testGetModel(): Unit = sparkTest("testGetModel") {
-
-    val FEModelName1 = "fix1"
-    val REModelName1 = "random1"
-    val FEModelName2 = "fix2"
-    val REModelName2 = "random2"
-
-    val FEModel1 = getFixedEffectModel(sc, 1)
-    val FEModel2 = getFixedEffectModel(sc, 2)
-    val REModel1 = getRandomEffectModel(sc, 1)
-    val REModel2 = getRandomEffectModel(sc, 2)
-
-    // case 1: fixed effect model only
-    val FEModelOnly = GameModel((FEModelName1, FEModel1), (FEModelName2, FEModel2))
-    assertEquals(FEModel1, FEModelOnly.getModel(FEModelName1).get)
-    assertEquals(FEModel2, FEModelOnly.getModel(FEModelName2).get)
-    assertTrue(FEModelOnly.getModel(REModelName1).isEmpty)
-
-    // case 2: random effect model only
-    val REModelOnly = GameModel((REModelName1, REModel1), (REModelName2, REModel2))
-    assertEquals(REModel1, REModelOnly.getModel(REModelName1).get)
-    assertEquals(REModel2, REModelOnly.getModel(REModelName2).get)
-    assertTrue(REModelOnly.getModel(FEModelName2).isEmpty)
-
-    // case 3: fixed and random effect model
-    val fixedAndRandomEffectModel = GameModel((FEModelName1, FEModel1), (REModelName2, REModel2))
-    assertEquals(FEModel1, fixedAndRandomEffectModel.getModel(FEModelName1).get)
-    assertEquals(REModel2, fixedAndRandomEffectModel.getModel(REModelName2).get)
-    assertTrue(fixedAndRandomEffectModel.getModel(FEModelName2).isEmpty)
-    assertTrue(fixedAndRandomEffectModel.getModel(REModelName1).isEmpty)
-  }
-
-  @Test
-  def testUpdateModelOfSameType(): Unit = sparkTest("testUpdateModelOfSameType") {
-
-    val FEModelName = "fix"
-    val REModelName = "random"
-
-    val FEModel1 = getFixedEffectModel(sc, 1)
-    val FEModel2 = getFixedEffectModel(sc, 2)
-    val REModel1 = getRandomEffectModel(sc, 1)
-    val REModel2 = getRandomEffectModel(sc, 2)
-
-    val gameModel11 = GameModel((FEModelName, FEModel1), (REModelName, REModel1))
-    assertEquals(gameModel11.getModel(FEModelName).get, FEModel1)
-    assertEquals(gameModel11.getModel(REModelName).get, REModel1)
-    val gameModel21 = gameModel11.updateModel(FEModelName, FEModel2)
-    assertEquals(gameModel21.getModel(FEModelName).get, FEModel2)
-    val gameModel22 = gameModel21.updateModel(REModelName, REModel2)
-    assertEquals(gameModel22.getModel(REModelName).get, REModel2)
-  }
-
-  @Test(expectedExceptions = Array(classOf[IllegalArgumentException]))
-  def testUpdateModelOfDifferentType(): Unit = sparkTest("testUpdateModelOfDifferentType") {
-
-    val FEModelName = "fix"
-
-    val FEModel = getFixedEffectModel(sc, 1)
-    val REModel = getRandomEffectModel(sc, 1)
-
-    val gameModel = GameModel((FEModelName, FEModel))
-    gameModel.updateModel(FEModelName, REModel)
-  }
-
-  @Test
-  def testToMap(): Unit = sparkTest("testToMap") {
-
-    val FEModelName = "fix"
-    val REModelName = "random"
-
-    val FEModel = getFixedEffectModel(sc, 1)
-    val REModel = getRandomEffectModel(sc, 1)
-
-    val modelsMap = Map(FEModelName -> FEModel, REModelName -> REModel)
-    val gameModel = new GameModel(modelsMap)
-    assertEquals(gameModel.toMap, modelsMap)
-  }
-
-  @Test
-  def testEquals(): Unit = sparkTest("testEquals") {
-
-    val FEModelName1 = "fix1"
-    val REModelName1 = "random1"
-    val FEModelName2 = "fix2"
-    val REModelName2 = "random2"
-
-    val FEModel1 = getFixedEffectModel(sc, 1)
-    val FEModel2 = getFixedEffectModel(sc, 2)
-    val REModel1 = getRandomEffectModel(sc, 1)
-    val REModel2 = getRandomEffectModel(sc, 1)
-
-    val gameModel1111 = GameModel((FEModelName1, FEModel1), (REModelName1, REModel1))
-    val gameModel1112 = GameModel((FEModelName1, FEModel1), (REModelName1, REModel2))
-    val gameModel1212 = GameModel((FEModelName1, FEModel2), (REModelName1, REModel2))
-    val gameModel1122 = GameModel((FEModelName1, FEModel1), (REModelName2, REModel2))
-    val gameModel2121 = GameModel((FEModelName2, FEModel1), (REModelName2, REModel1))
-    val gameModel2211 = GameModel((FEModelName2, FEModel2), (REModelName1, REModel1))
-    val gameModel2212 = GameModel((FEModelName2, FEModel2), (REModelName1, REModel2))
-
-    // Same name and model
-    assertEquals(gameModel1111, gameModel1111)
-    assertEquals(gameModel1111, gameModel1112)
-    assertEquals(gameModel2211, gameModel2212)
-
-    // Either name or model is different
-    assertNotEquals(gameModel1212, gameModel1122)
-    assertNotEquals(gameModel2121, gameModel2211)
-    assertNotEquals(gameModel1212, gameModel2212)
-  }
-
-  @Test
-  def testModelsConsistencyGood(): Unit = sparkTest("testModelsConsistencyGood") {
-
-    // Features: we have three feature spaces: one for the fixed model, and one for each random model.
-    // Each model has its own separate feature space, but feature values can be shared between spaces.
-    // Features shared between spaces have a unique name, but possibly different indices.
-    val numFeaturesPerModel = Map(("fixedFeatures", 10), ("RE1Features", 10), ("RE2Features", 10))
-
-    // Fixed effect model
-    val glm = new LogisticRegressionModel(
-      CoefficientsTest.sparseCoefficients(numFeaturesPerModel("fixedFeatures"))(1,2,5)(11,21,51))
-    val FEModel = new FixedEffectModel(sc.broadcast(glm), "fixedFeatures")
-
-    // Random effect 1 has 2 items
-    val numFeaturesRE1 = numFeaturesPerModel("RE1Features")
-    val RE1Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,5,7)(111,511,911)
-    val glmRE11: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item1)
-    val RE1Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,2)(112,512)
-    val glmRE12: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item2)
-
-    val glmRE1RDD = sc.parallelize(List(("RE1Item1", glmRE11), ("RE1Item2", glmRE12)))
-    val RE1Model = new RandomEffectModel(glmRE1RDD, "REModel1", "RE1Features")
-
-    // Random effect 2 has 3 items (of a different kind)
-    val numFeaturesRE2 = numFeaturesPerModel("RE2Features")
-    val RE2Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(3,4,6)(321,421,621)
-    val glmRE21: GeneralizedLinearModel = new LogisticRegressionModel(RE2Item1)
-    val RE2Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(4,5)(322,422)
-    val glmRE22: GeneralizedLinearModel = new LogisticRegressionModel(RE2Item2)
-    val RE2Item3 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(2,7,8)(323,423,523)
-    val glmRE23: GeneralizedLinearModel = new LogisticRegressionModel(RE2Item3)
-
-    val glmRE2RDD = sc.parallelize(List(("RE2Item1", glmRE21), ("RE2Item2", glmRE22), ("RE2Item3", glmRE23)))
-    val RE2Model = new RandomEffectModel(glmRE2RDD, "REModel2", "RE2Features")
-
-    // This GAME model has 1 fixed effect, and 2 different random effect models
-    GameModel(("fixed", FEModel), ("RE1", RE1Model), ("RE2", RE2Model))
-  }
-
-  @Test(expectedExceptions = Array(classOf[IllegalArgumentException]))
-  def testModelsConsistencyBad(): Unit = sparkTest("testModelsConsistencyBad") {
-
-    // Features: we have three feature spaces: one for the fixed model, and one for each random model.
-    // Each model has its own separate feature space, but feature values can be shared between spaces.
-    // Features shared between spaces have a unique name, but possibly different indices.
-    val numFeaturesPerModel = Map(("fixedFeatures", 10), ("RE1Features", 10), ("RE2Features", 10))
-
-    // Fixed effect model
-    val glm = new LogisticRegressionModel(
-      CoefficientsTest.sparseCoefficients(numFeaturesPerModel("fixedFeatures"))(1,2,5)(11,21,51))
-    val FEModel = new FixedEffectModel(sc.broadcast(glm), "fixedFeatures")
-
-    // Random effect 1 has 2 items
-    val numFeaturesRE1 = numFeaturesPerModel("RE1Features")
-    val RE1Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,5,7)(111,511,911)
-    val glmRE11: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item1)
-    val RE1Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,2)(112,512)
-    val glmRE12: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item2)
-
-    val glmRE1RDD = sc.parallelize(List(("RE1Item1", glmRE11), ("RE1Item2", glmRE12)))
-    val RE1Model = new RandomEffectModel(glmRE1RDD, "REModel1", "RE1Features")
-
-    // Random effect 2 has 3 items (of a different kind of model)
-    val numFeaturesRE2 = numFeaturesPerModel("RE2Features")
-    val RE2Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(3,4,6)(321,421,621)
-    val glmRE21: GeneralizedLinearModel = new PoissonRegressionModel(RE2Item1)
-    val RE2Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(4,5)(322,422)
-    val glmRE22: GeneralizedLinearModel = new PoissonRegressionModel(RE2Item2)
-    val RE2Item3 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(2,7,8)(323,423,523)
-    val glmRE23: GeneralizedLinearModel = new PoissonRegressionModel(RE2Item3)
-
-    val glmRE2RDD = sc.parallelize(List(("RE2Item1", glmRE21), ("RE2Item2", glmRE22), ("RE2Item3", glmRE23)))
-    val RE2Model = new RandomEffectModel(glmRE2RDD, "REModel2", "RE2Features")
-
-    // This GAME model has 1 fixed effect, and 2 different random effect models
-    GameModel(("fixed", FEModel), ("RE1", RE1Model), ("RE2", RE2Model))
-  }
-}
+///*
+// * Copyright 2017 LinkedIn Corp. All rights reserved.
+// * Licensed under the Apache License, Version 2.0 (the "License"); you may
+// * not use this file except in compliance with the License. You may obtain a
+// * copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// * License for the specific language governing permissions and limitations
+// * under the License.
+// */
+//package com.linkedin.photon.ml.model
+//
+//import org.apache.spark.SparkContext
+//import org.testng.Assert._
+//import org.testng.annotations.Test
+//
+//import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
+//import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+//import com.linkedin.photon.ml.supervised.regression.PoissonRegressionModel
+//import com.linkedin.photon.ml.test.SparkTestUtils
+//
+///**
+// * Integration tests for [[GameModel]].
+// */
+//class GameModelIntegTest extends SparkTestUtils {
+//
+//  /**
+//   * Generate a toy fixed effect model.
+//   *
+//   * @param sc The Spark context
+//   * @param coefficientDimension The dimension of the coefficients
+//   * @return A fixed effect model
+//   */
+//  protected def getFixedEffectModel(sc: SparkContext, coefficientDimension: Int): FixedEffectModel = {
+//
+//    // Coefficients parameter
+//    val glm: GeneralizedLinearModel =
+//      LogisticRegressionModel(Coefficients.initializeZeroCoefficients(coefficientDimension))
+//
+//    // Meta data
+//    val featureShardId = "featureShardId"
+//
+//    // Fixed effect model
+//    new FixedEffectModel(sc.broadcast(glm), featureShardId)
+//  }
+//
+//  /**
+//   * Generate a toy random effect model.
+//   *
+//   * @param sc The Spark context
+//   * @param coefficientDimension The dimension of the coefficients
+//   * @return A random effect model
+//   */
+//  protected def getRandomEffectModel(sc: SparkContext, coefficientDimension: Int): RandomEffectModel = {
+//
+//    // Coefficients parameter
+//    val glm: GeneralizedLinearModel =
+//      LogisticRegressionModel(Coefficients.initializeZeroCoefficients(coefficientDimension))
+//
+//    // Meta data
+//    val featureShardId = "featureShardId"
+//    val REType = "REType"
+//
+//    // Random effect model
+//    val numCoefficients = 5
+//    val modelsRDD = sc.parallelize(Seq.tabulate(numCoefficients)(i => (i.toString, glm)))
+//    new RandomEffectModel(modelsRDD, REType, featureShardId)
+//  }
+//
+//  @Test
+//  def testGetModel(): Unit = sparkTest("testGetModel") {
+//
+//    val FEModelName1 = "fix1"
+//    val REModelName1 = "random1"
+//    val FEModelName2 = "fix2"
+//    val REModelName2 = "random2"
+//
+//    val FEModel1 = getFixedEffectModel(sc, 1)
+//    val FEModel2 = getFixedEffectModel(sc, 2)
+//    val REModel1 = getRandomEffectModel(sc, 1)
+//    val REModel2 = getRandomEffectModel(sc, 2)
+//
+//    // case 1: fixed effect model only
+//    val FEModelOnly = GameModel((FEModelName1, FEModel1), (FEModelName2, FEModel2))
+//    assertEquals(FEModel1, FEModelOnly.getModel(FEModelName1).get)
+//    assertEquals(FEModel2, FEModelOnly.getModel(FEModelName2).get)
+//    assertTrue(FEModelOnly.getModel(REModelName1).isEmpty)
+//
+//    // case 2: random effect model only
+//    val REModelOnly = GameModel((REModelName1, REModel1), (REModelName2, REModel2))
+//    assertEquals(REModel1, REModelOnly.getModel(REModelName1).get)
+//    assertEquals(REModel2, REModelOnly.getModel(REModelName2).get)
+//    assertTrue(REModelOnly.getModel(FEModelName2).isEmpty)
+//
+//    // case 3: fixed and random effect model
+//    val fixedAndRandomEffectModel = GameModel((FEModelName1, FEModel1), (REModelName2, REModel2))
+//    assertEquals(FEModel1, fixedAndRandomEffectModel.getModel(FEModelName1).get)
+//    assertEquals(REModel2, fixedAndRandomEffectModel.getModel(REModelName2).get)
+//    assertTrue(fixedAndRandomEffectModel.getModel(FEModelName2).isEmpty)
+//    assertTrue(fixedAndRandomEffectModel.getModel(REModelName1).isEmpty)
+//  }
+//
+//  @Test
+//  def testUpdateModelOfSameType(): Unit = sparkTest("testUpdateModelOfSameType") {
+//
+//    val FEModelName = "fix"
+//    val REModelName = "random"
+//
+//    val FEModel1 = getFixedEffectModel(sc, 1)
+//    val FEModel2 = getFixedEffectModel(sc, 2)
+//    val REModel1 = getRandomEffectModel(sc, 1)
+//    val REModel2 = getRandomEffectModel(sc, 2)
+//
+//    val gameModel11 = GameModel((FEModelName, FEModel1), (REModelName, REModel1))
+//    assertEquals(gameModel11.getModel(FEModelName).get, FEModel1)
+//    assertEquals(gameModel11.getModel(REModelName).get, REModel1)
+//    val gameModel21 = gameModel11.updateModel(FEModelName, FEModel2)
+//    assertEquals(gameModel21.getModel(FEModelName).get, FEModel2)
+//    val gameModel22 = gameModel21.updateModel(REModelName, REModel2)
+//    assertEquals(gameModel22.getModel(REModelName).get, REModel2)
+//  }
+//
+//  @Test(expectedExceptions = Array(classOf[IllegalArgumentException]))
+//  def testUpdateModelOfDifferentType(): Unit = sparkTest("testUpdateModelOfDifferentType") {
+//
+//    val FEModelName = "fix"
+//
+//    val FEModel = getFixedEffectModel(sc, 1)
+//    val REModel = getRandomEffectModel(sc, 1)
+//
+//    val gameModel = GameModel((FEModelName, FEModel))
+//    gameModel.updateModel(FEModelName, REModel)
+//  }
+//
+//  @Test
+//  def testToMap(): Unit = sparkTest("testToMap") {
+//
+//    val FEModelName = "fix"
+//    val REModelName = "random"
+//
+//    val FEModel = getFixedEffectModel(sc, 1)
+//    val REModel = getRandomEffectModel(sc, 1)
+//
+//    val modelsMap = Map(FEModelName -> FEModel, REModelName -> REModel)
+//    val gameModel = new GameModel(modelsMap)
+//    assertEquals(gameModel.toMap, modelsMap)
+//  }
+//
+//  @Test
+//  def testEquals(): Unit = sparkTest("testEquals") {
+//
+//    val FEModelName1 = "fix1"
+//    val REModelName1 = "random1"
+//    val FEModelName2 = "fix2"
+//    val REModelName2 = "random2"
+//
+//    val FEModel1 = getFixedEffectModel(sc, 1)
+//    val FEModel2 = getFixedEffectModel(sc, 2)
+//    val REModel1 = getRandomEffectModel(sc, 1)
+//    val REModel2 = getRandomEffectModel(sc, 1)
+//
+//    val gameModel1111 = GameModel((FEModelName1, FEModel1), (REModelName1, REModel1))
+//    val gameModel1112 = GameModel((FEModelName1, FEModel1), (REModelName1, REModel2))
+//    val gameModel1212 = GameModel((FEModelName1, FEModel2), (REModelName1, REModel2))
+//    val gameModel1122 = GameModel((FEModelName1, FEModel1), (REModelName2, REModel2))
+//    val gameModel2121 = GameModel((FEModelName2, FEModel1), (REModelName2, REModel1))
+//    val gameModel2211 = GameModel((FEModelName2, FEModel2), (REModelName1, REModel1))
+//    val gameModel2212 = GameModel((FEModelName2, FEModel2), (REModelName1, REModel2))
+//
+//    // Same name and model
+//    assertEquals(gameModel1111, gameModel1111)
+//    assertEquals(gameModel1111, gameModel1112)
+//    assertEquals(gameModel2211, gameModel2212)
+//
+//    // Either name or model is different
+//    assertNotEquals(gameModel1212, gameModel1122)
+//    assertNotEquals(gameModel2121, gameModel2211)
+//    assertNotEquals(gameModel1212, gameModel2212)
+//  }
+//
+//  @Test
+//  def testModelsConsistencyGood(): Unit = sparkTest("testModelsConsistencyGood") {
+//
+//    // Features: we have three feature spaces: one for the fixed model, and one for each random model.
+//    // Each model has its own separate feature space, but feature values can be shared between spaces.
+//    // Features shared between spaces have a unique name, but possibly different indices.
+//    val numFeaturesPerModel = Map(("fixedFeatures", 10), ("RE1Features", 10), ("RE2Features", 10))
+//
+//    // Fixed effect model
+//    val glm = new LogisticRegressionModel(
+//      CoefficientsTest.sparseCoefficients(numFeaturesPerModel("fixedFeatures"))(1,2,5)(11,21,51))
+//    val FEModel = new FixedEffectModel(sc.broadcast(glm), "fixedFeatures")
+//
+//    // Random effect 1 has 2 items
+//    val numFeaturesRE1 = numFeaturesPerModel("RE1Features")
+//    val RE1Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,5,7)(111,511,911)
+//    val glmRE11: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item1)
+//    val RE1Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,2)(112,512)
+//    val glmRE12: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item2)
+//
+//    val glmRE1RDD = sc.parallelize(List(("RE1Item1", glmRE11), ("RE1Item2", glmRE12)))
+//    val RE1Model = new RandomEffectModel(glmRE1RDD, "REModel1", "RE1Features")
+//
+//    // Random effect 2 has 3 items (of a different kind)
+//    val numFeaturesRE2 = numFeaturesPerModel("RE2Features")
+//    val RE2Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(3,4,6)(321,421,621)
+//    val glmRE21: GeneralizedLinearModel = new LogisticRegressionModel(RE2Item1)
+//    val RE2Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(4,5)(322,422)
+//    val glmRE22: GeneralizedLinearModel = new LogisticRegressionModel(RE2Item2)
+//    val RE2Item3 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(2,7,8)(323,423,523)
+//    val glmRE23: GeneralizedLinearModel = new LogisticRegressionModel(RE2Item3)
+//
+//    val glmRE2RDD = sc.parallelize(List(("RE2Item1", glmRE21), ("RE2Item2", glmRE22), ("RE2Item3", glmRE23)))
+//    val RE2Model = new RandomEffectModel(glmRE2RDD, "REModel2", "RE2Features")
+//
+//    // This GAME model has 1 fixed effect, and 2 different random effect models
+//    GameModel(("fixed", FEModel), ("RE1", RE1Model), ("RE2", RE2Model))
+//  }
+//
+//  @Test(expectedExceptions = Array(classOf[IllegalArgumentException]))
+//  def testModelsConsistencyBad(): Unit = sparkTest("testModelsConsistencyBad") {
+//
+//    // Features: we have three feature spaces: one for the fixed model, and one for each random model.
+//    // Each model has its own separate feature space, but feature values can be shared between spaces.
+//    // Features shared between spaces have a unique name, but possibly different indices.
+//    val numFeaturesPerModel = Map(("fixedFeatures", 10), ("RE1Features", 10), ("RE2Features", 10))
+//
+//    // Fixed effect model
+//    val glm = new LogisticRegressionModel(
+//      CoefficientsTest.sparseCoefficients(numFeaturesPerModel("fixedFeatures"))(1,2,5)(11,21,51))
+//    val FEModel = new FixedEffectModel(sc.broadcast(glm), "fixedFeatures")
+//
+//    // Random effect 1 has 2 items
+//    val numFeaturesRE1 = numFeaturesPerModel("RE1Features")
+//    val RE1Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,5,7)(111,511,911)
+//    val glmRE11: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item1)
+//    val RE1Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE1)(1,2)(112,512)
+//    val glmRE12: GeneralizedLinearModel = new LogisticRegressionModel(RE1Item2)
+//
+//    val glmRE1RDD = sc.parallelize(List(("RE1Item1", glmRE11), ("RE1Item2", glmRE12)))
+//    val RE1Model = new RandomEffectModel(glmRE1RDD, "REModel1", "RE1Features")
+//
+//    // Random effect 2 has 3 items (of a different kind of model)
+//    val numFeaturesRE2 = numFeaturesPerModel("RE2Features")
+//    val RE2Item1 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(3,4,6)(321,421,621)
+//    val glmRE21: GeneralizedLinearModel = new PoissonRegressionModel(RE2Item1)
+//    val RE2Item2 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(4,5)(322,422)
+//    val glmRE22: GeneralizedLinearModel = new PoissonRegressionModel(RE2Item2)
+//    val RE2Item3 = CoefficientsTest.sparseCoefficients(numFeaturesRE2)(2,7,8)(323,423,523)
+//    val glmRE23: GeneralizedLinearModel = new PoissonRegressionModel(RE2Item3)
+//
+//    val glmRE2RDD = sc.parallelize(List(("RE2Item1", glmRE21), ("RE2Item2", glmRE22), ("RE2Item3", glmRE23)))
+//    val RE2Model = new RandomEffectModel(glmRE2RDD, "REModel2", "RE2Features")
+//
+//    // This GAME model has 1 fixed effect, and 2 different random effect models
+//    GameModel(("fixed", FEModel), ("RE1", RE1Model), ("RE2", RE2Model))
+//  }
+//}
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/RandomEffectModelIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/RandomEffectModelIntegTest.scala
index 88abd13a..5d41ae88 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/RandomEffectModelIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/model/RandomEffectModelIntegTest.scala
@@ -1,116 +1,116 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.model
-
-import org.testng.Assert._
-import org.testng.annotations.Test
-
-import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.supervised.regression.PoissonRegressionModel
-import com.linkedin.photon.ml.test.SparkTestUtils
-
-/**
- * Integration tests for [[RandomEffectModel]].
- */
-class RandomEffectModelIntegTest extends SparkTestUtils {
-
-  /**
-   * Test that a [[RandomEffectModel]] must have the same coefficients, be computed on the same feature shard, and have
-   * the same random effect type to be equal.
-   */
-  @Test
-  def testEquals(): Unit = sparkTest("testEqualsForRandomEffectModel") {
-    // Coefficients parameter
-    val coefficientDimension = 1
-    val glm: GeneralizedLinearModel =
-      LogisticRegressionModel(Coefficients.initializeZeroCoefficients(coefficientDimension))
-
-    // Meta data
-    val featureShardId = "featureShardId"
-    val randomEffectType = "randomEffectType"
-
-    // Random effect model
-    val numCoefficients = 5
-    val modelsRDD = sc.parallelize(Seq.tabulate(numCoefficients)(i => (i.toString, glm)))
-
-    val randomEffectModel = new RandomEffectModel(modelsRDD, randomEffectType, featureShardId)
-
-    // Should equal to itself
-    assertEquals(randomEffectModel, randomEffectModel)
-
-    // Should equal to the random effect model with same featureShardId, randomEffectType and coefficientsRDD
-    val randomEffectModelCopy = new RandomEffectModel(modelsRDD, randomEffectType, featureShardId)
-    assertEquals(randomEffectModel, randomEffectModelCopy)
-
-    // Should not equal to the random effect model with different featureShardId
-    val featureShardId1 = "featureShardId1"
-    val randomEffectModelWithDiffFeatureShardId =
-      new RandomEffectModel(modelsRDD, randomEffectType, featureShardId1)
-    assertNotEquals(randomEffectModel, randomEffectModelWithDiffFeatureShardId)
-
-    // Should not equal to the random effect model with different randomEffectType
-    val randomEffectType1 = "randomEffectType1"
-    val randomEffectModelWithDiffRandomEffectShardId =
-      new RandomEffectModel(modelsRDD, randomEffectType1, featureShardId)
-    assertNotEquals(randomEffectModel, randomEffectModelWithDiffRandomEffectShardId)
-
-    // Should not equal to the random effect model with different coefficientsRDD
-    val numCoefficients1 = numCoefficients + 1
-    val modelsRDD1 = sc.parallelize(Seq.tabulate(numCoefficients1)(i => (i.toString, glm)))
-
-    val randomEffectModelWithDiffCoefficientsRDD =
-      new RandomEffectModel(modelsRDD1, randomEffectType, featureShardId)
-    assertNotEquals(randomEffectModel, randomEffectModelWithDiffCoefficientsRDD)
-  }
-
-  /**
-   * Test that a [[RandomEffectModel]] consisting of the same type of [[GeneralizedLinearModel]] will be accepted.
-   */
-  @Test
-  def testModelsConsistencyGood(): Unit = sparkTest("testModelsConsistencyGood") {
-
-    val numFeatures = 10
-
-    // Random effect with 2 items of the same type.
-    val randomEffectItem1 = CoefficientsTest.sparseCoefficients(numFeatures)(1,5,7)(111,511,911)
-    val glm1: GeneralizedLinearModel = new LogisticRegressionModel(randomEffectItem1)
-    val randomEffectItem2 = CoefficientsTest.sparseCoefficients(numFeatures)(1,2)(112,512)
-    val glm2: GeneralizedLinearModel = new LogisticRegressionModel(randomEffectItem2)
-    val randomEffectRDD = sc.parallelize(List(("RandomEffectItem1", glm1), ("RandomEffectItem2", glm2)))
-
-    // This should not throw exception.
-    new RandomEffectModel(randomEffectRDD, "RandomEffectModel", "RandomEffectFeatures")
-  }
-
-  /**
-   * Test that a [[RandomEffectModel]] consisting of different types of [[GeneralizedLinearModel]] will be rejected.
-   */
-  @Test(expectedExceptions = Array(classOf[IllegalArgumentException]))
-  def testModelsConsistencyBad(): Unit = sparkTest("testModelsConsistencyBad") {
-
-    val numFeatures = 10
-
-    // Random effect with 2 items of differing types.
-    val randomEffectItem1 = CoefficientsTest.sparseCoefficients(numFeatures)(1,5,7)(111,511,911)
-    val glm1: GeneralizedLinearModel = new LogisticRegressionModel(randomEffectItem1)
-    val randomEffectItem2 = CoefficientsTest.sparseCoefficients(numFeatures)(1,2)(112,512)
-    val glm2: GeneralizedLinearModel = new PoissonRegressionModel(randomEffectItem2)
-    val randomEffectRDD = sc.parallelize(List(("RandomEffectItem1", glm1), ("RandomEffectItem2", glm2)))
-
-    // This should throw exception.
-    new RandomEffectModel(randomEffectRDD, "RandomEffectModel", "RandomEffectFeatures")
-  }
-}
+///*
+// * Copyright 2017 LinkedIn Corp. All rights reserved.
+// * Licensed under the Apache License, Version 2.0 (the "License"); you may
+// * not use this file except in compliance with the License. You may obtain a
+// * copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// * License for the specific language governing permissions and limitations
+// * under the License.
+// */
+//package com.linkedin.photon.ml.model
+//
+//import org.testng.Assert._
+//import org.testng.annotations.Test
+//
+//import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
+//import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+//import com.linkedin.photon.ml.supervised.regression.PoissonRegressionModel
+//import com.linkedin.photon.ml.test.SparkTestUtils
+//
+///**
+// * Integration tests for [[RandomEffectModel]].
+// */
+//class RandomEffectModelIntegTest extends SparkTestUtils {
+//
+//  /**
+//   * Test that a [[RandomEffectModel]] must have the same coefficients, be computed on the same feature shard, and have
+//   * the same random effect type to be equal.
+//   */
+//  @Test
+//  def testEquals(): Unit = sparkTest("testEqualsForRandomEffectModel") {
+//    // Coefficients parameter
+//    val coefficientDimension = 1
+//    val glm: GeneralizedLinearModel =
+//      LogisticRegressionModel(Coefficients.initializeZeroCoefficients(coefficientDimension))
+//
+//    // Meta data
+//    val featureShardId = "featureShardId"
+//    val randomEffectType = "randomEffectType"
+//
+//    // Random effect model
+//    val numCoefficients = 5
+//    val modelsRDD = sc.parallelize(Seq.tabulate(numCoefficients)(i => (i.toString, glm)))
+//
+//    val randomEffectModel = new RandomEffectModel(modelsRDD, randomEffectType, featureShardId)
+//
+//    // Should equal to itself
+//    assertEquals(randomEffectModel, randomEffectModel)
+//
+//    // Should equal to the random effect model with same featureShardId, randomEffectType and coefficientsRDD
+//    val randomEffectModelCopy = new RandomEffectModel(modelsRDD, randomEffectType, featureShardId)
+//    assertEquals(randomEffectModel, randomEffectModelCopy)
+//
+//    // Should not equal to the random effect model with different featureShardId
+//    val featureShardId1 = "featureShardId1"
+//    val randomEffectModelWithDiffFeatureShardId =
+//      new RandomEffectModel(modelsRDD, randomEffectType, featureShardId1)
+//    assertNotEquals(randomEffectModel, randomEffectModelWithDiffFeatureShardId)
+//
+//    // Should not equal to the random effect model with different randomEffectType
+//    val randomEffectType1 = "randomEffectType1"
+//    val randomEffectModelWithDiffRandomEffectShardId =
+//      new RandomEffectModel(modelsRDD, randomEffectType1, featureShardId)
+//    assertNotEquals(randomEffectModel, randomEffectModelWithDiffRandomEffectShardId)
+//
+//    // Should not equal to the random effect model with different coefficientsRDD
+//    val numCoefficients1 = numCoefficients + 1
+//    val modelsRDD1 = sc.parallelize(Seq.tabulate(numCoefficients1)(i => (i.toString, glm)))
+//
+//    val randomEffectModelWithDiffCoefficientsRDD =
+//      new RandomEffectModel(modelsRDD1, randomEffectType, featureShardId)
+//    assertNotEquals(randomEffectModel, randomEffectModelWithDiffCoefficientsRDD)
+//  }
+//
+//  /**
+//   * Test that a [[RandomEffectModel]] consisting of the same type of [[GeneralizedLinearModel]] will be accepted.
+//   */
+//  @Test
+//  def testModelsConsistencyGood(): Unit = sparkTest("testModelsConsistencyGood") {
+//
+//    val numFeatures = 10
+//
+//    // Random effect with 2 items of the same type.
+//    val randomEffectItem1 = CoefficientsTest.sparseCoefficients(numFeatures)(1,5,7)(111,511,911)
+//    val glm1: GeneralizedLinearModel = new LogisticRegressionModel(randomEffectItem1)
+//    val randomEffectItem2 = CoefficientsTest.sparseCoefficients(numFeatures)(1,2)(112,512)
+//    val glm2: GeneralizedLinearModel = new LogisticRegressionModel(randomEffectItem2)
+//    val randomEffectRDD = sc.parallelize(List(("RandomEffectItem1", glm1), ("RandomEffectItem2", glm2)))
+//
+//    // This should not throw exception.
+//    new RandomEffectModel(randomEffectRDD, "RandomEffectModel", "RandomEffectFeatures")
+//  }
+//
+//  /**
+//   * Test that a [[RandomEffectModel]] consisting of different types of [[GeneralizedLinearModel]] will be rejected.
+//   */
+//  @Test(expectedExceptions = Array(classOf[IllegalArgumentException]))
+//  def testModelsConsistencyBad(): Unit = sparkTest("testModelsConsistencyBad") {
+//
+//    val numFeatures = 10
+//
+//    // Random effect with 2 items of differing types.
+//    val randomEffectItem1 = CoefficientsTest.sparseCoefficients(numFeatures)(1,5,7)(111,511,911)
+//    val glm1: GeneralizedLinearModel = new LogisticRegressionModel(randomEffectItem1)
+//    val randomEffectItem2 = CoefficientsTest.sparseCoefficients(numFeatures)(1,2)(112,512)
+//    val glm2: GeneralizedLinearModel = new PoissonRegressionModel(randomEffectItem2)
+//    val randomEffectRDD = sc.parallelize(List(("RandomEffectItem1", glm1), ("RandomEffectItem2", glm2)))
+//
+//    // This should throw exception.
+//    new RandomEffectModel(randomEffectRDD, "RandomEffectModel", "RandomEffectFeatures")
+//  }
+//}
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala
index abf513c4..20b49c31 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala
@@ -1,412 +1,412 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.optimization
-
-import java.util.Random
-
-import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, pinv}
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
-import org.mockito.Mockito._
-import org.testng.Assert._
-import org.testng.annotations.{DataProvider, Test}
-
-import com.linkedin.photon.ml.constants.MathConst
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.L2RegularizationDiff
-import com.linkedin.photon.ml.function.glm._
-import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
-import com.linkedin.photon.ml.model.Coefficients
-import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext}
-import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
-import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.test.{CommonTestUtils, SparkTestUtils}
-import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
-
-/**
- * Integration tests for [[DistributedOptimizationProblem]].
- */
-class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
-
-  import CommonTestUtils._
-  import DistributedOptimizationProblemIntegTest._
-
-  /**
-   * Function to generate a mock [[GeneralizedLinearModel]].
-   *
-   * @param coefficients Model coefficients (unused)
-   * @return A mocked [[GeneralizedLinearModel]]
-   */
-  def glmConstructorMock(coefficients: Coefficients): GeneralizedLinearModel = mock(classOf[GeneralizedLinearModel])
-
-  /**
-   * Generate weighted benign datasets for binary classification.
-   *
-   * @return A Seq of [[LabeledPoint]]
-   */
-  def generateWeightedBenignDatasetBinaryClassification: Seq[LabeledPoint] = {
-
-    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
-
-    drawBalancedSampleFromNumericallyBenignDenseFeaturesForBinaryClassifierLocal(
-      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
-      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
-      OptimizationProblemIntegTestUtils.DIMENSIONS)
-      .map { obj =>
-        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
-        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
-        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
-      }
-      .toList
-  }
-
-  /**
-   * Generate weighted benign datasets for linear regression.
-   *
-   * @return A Seq of [[LabeledPoint]]
-   */
-  def generateWeightedBenignDatasetLinearRegression: Seq[LabeledPoint] = {
-
-    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
-
-    drawSampleFromNumericallyBenignDenseFeaturesForLinearRegressionLocal(
-      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
-      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
-      OptimizationProblemIntegTestUtils.DIMENSIONS)
-      .map { obj =>
-        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
-        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
-        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
-      }
-      .toList
-  }
-  /**
-   * Generate weighted benign datasets for Poisson regression.
-   *
-   * @return A Seq of [[LabeledPoint]]
-   */
-  def generateWeightedBenignDatasetPoissonRegression: Seq[LabeledPoint] = {
-
-    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
-
-    drawSampleFromNumericallyBenignDenseFeaturesForPoissonRegressionLocal(
-      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
-      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
-      OptimizationProblemIntegTestUtils.DIMENSIONS)
-      .map { obj =>
-        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
-        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
-        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
-      }
-      .toList
-  }
-
-  @DataProvider(parallel = true)
-  def varianceInput(): Array[Array[Any]] = {
-
-    val regularizationWeights = Array[Double](0.1, 0.0, 1.0, 10.0, 100.0)
-
-    // Regularization weight, input data generation function, objective function, manual Hessian calculation function
-    regularizationWeights.flatMap { weight =>
-      Array(
-        Array[Any](
-          weight,
-          generateWeightedBenignDatasetBinaryClassification _,
-          LogisticLossFunction,
-          OptimizationProblemIntegTestUtils.logisticDzzLoss _),
-        Array[Any](
-          weight,
-          generateWeightedBenignDatasetLinearRegression _,
-          SquaredLossFunction,
-          OptimizationProblemIntegTestUtils.linearDzzLoss _),
-        Array[Any](
-          weight,
-          generateWeightedBenignDatasetPoissonRegression _,
-          PoissonLossFunction,
-          OptimizationProblemIntegTestUtils.poissonDzzLoss _))
-    }
-  }
-
-  /**
-   * Test that regularization weights can be updated.
-   */
-  @Test
-  def testUpdateRegularizationWeight(): Unit = sparkTest("testUpdateRegularizationWeight") {
-
-    val normalization = NoNormalization()
-    val initL1Weight = 1D
-    val initL2Weight = 2D
-    val finalL1Weight = 3D
-    val finalL2Weight = 4D
-    val finalElasticWeight = 5D
-    val alpha = 0.75
-    val elasticFinalL1Weight = finalElasticWeight * alpha
-    val elasticFinalL2Weight = finalElasticWeight * (1 - alpha)
-
-    val normalizationMock = mock(classOf[BroadcastWrapper[NormalizationContext]])
-    val optimizer = mock(classOf[Optimizer[DistributedSmoothedHingeLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val objectiveFunction = mock(classOf[DistributedSmoothedHingeLossFunction])
-
-    doReturn(normalization).when(normalizationMock).value
-    doReturn(statesTracker).when(optimizer).getStateTracker
-
-    val optimizerL1 = new OWLQN(initL1Weight, normalizationMock)
-    val objectiveFunctionL2 = new L2LossFunction(sc)
-    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
-
-    val l1Problem = new DistributedOptimizationProblem(
-      optimizerL1,
-      objectiveFunction,
-      samplerOption = None,
-      LogisticRegressionModel.apply,
-      L1RegularizationContext,
-      VarianceComputationType.NONE)
-    val l2Problem = new DistributedOptimizationProblem(
-      optimizer,
-      objectiveFunctionL2,
-      samplerOption = None,
-      LogisticRegressionModel.apply,
-      L2RegularizationContext,
-      VarianceComputationType.NONE)
-    val elasticProblem = new DistributedOptimizationProblem(
-      optimizerL1,
-      objectiveFunctionL2,
-      samplerOption = None,
-      LogisticRegressionModel.apply,
-      ElasticNetRegularizationContext(alpha),
-      VarianceComputationType.NONE)
-
-    // Check update to L1/L2 weights individually
-    assertNotEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-
-    l1Problem.updateRegularizationWeight(finalL1Weight)
-    l2Problem.updateRegularizationWeight(finalL2Weight)
-
-    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-
-    // Check updates to L1/L2 weights together
-    optimizerL1.l1RegularizationWeight = initL1Weight
-    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
-
-    assertNotEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-
-    elasticProblem.updateRegularizationWeight(finalElasticWeight)
-
-    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-  }
-
-  /**
-   * Test simple coefficient variance computation for weighted data points, with regularization.
-   *
-   * @param regularizationWeight Regularization weight
-   * @param dataGenerationFunction Function to generate test data
-   * @param lossFunction Loss function for optimization
-   * @param DzzLossFunction Function to compute coefficient Hessian directly
-   */
-  @Test(dataProvider = "varianceInput")
-  def testComputeVariancesSimple(
-      regularizationWeight: Double,
-      dataGenerationFunction: () => Seq[LabeledPoint],
-      lossFunction: PointwiseLossFunction,
-      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = sparkTest("testComputeVariancesSimple") {
-
-    val input = sc.parallelize(dataGenerationFunction())
-    val coefficients = generateDenseVector(OptimizationProblemIntegTestUtils.DIMENSIONS)
-
-    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val regContext = mock(classOf[RegularizationContext])
-    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
-
-    doReturn(statesTracker).when(optimizer).getStateTracker
-    doReturn(regContext).when(optConfig).regularizationContext
-    doReturn(regularizationWeight).when(optConfig).regularizationWeight
-    doReturn(RegularizationType.L2).when(regContext).regularizationType
-    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
-
-    val objective = DistributedGLMLossFunction(optConfig, lossFunction, treeAggregateDepth = 1)
-
-    val optimizationProblem = new DistributedOptimizationProblem(
-      optimizer,
-      objective,
-      samplerOption = None,
-      glmConstructorMock,
-      NoRegularizationContext,
-      VarianceComputationType.SIMPLE)
-
-    val hessianDiagonal = input.treeAggregate(DenseVector.zeros[Double](OptimizationProblemIntegTestUtils.DIMENSIONS))(
-      seqOp = (vector: DenseVector[Double], datum: LabeledPoint) => {
-        diag(OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients))(diag(vector), datum))
-      },
-      combOp = (vector1: DenseVector[Double], vector2: DenseVector[Double]) => vector1 + vector2,
-      depth = 1)
-    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
-    val expected = (hessianDiagonal + regularizationWeight).map( v => 1D / (v + MathConst.EPSILON))
-    val actual: Vector[Double] = optimizationProblem.computeVariances(input, coefficients).get
-
-    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
-  }
-
-  /**
-   * Test full coefficient variance computation for weighted data points, with regularization.
-   *
-   * @param regularizationWeight Regularization weight
-   * @param dataGenerationFunction Function to generate test data
-   * @param lossFunction Loss function for optimization
-   * @param DzzLossFunction Function to compute coefficient Hessian directly
-   */
-  @Test(dataProvider = "varianceInput")
-  def testComputeVariancesFull(
-      regularizationWeight: Double,
-      dataGenerationFunction: () => Seq[LabeledPoint],
-      lossFunction: PointwiseLossFunction,
-      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = sparkTest("testComputeVariancesFull") {
-
-    val input = sc.parallelize(dataGenerationFunction())
-    val dimensions = OptimizationProblemIntegTestUtils.DIMENSIONS
-    val coefficients = generateDenseVector(dimensions)
-
-    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val regContext = mock(classOf[RegularizationContext])
-    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
-
-    doReturn(statesTracker).when(optimizer).getStateTracker
-    doReturn(regContext).when(optConfig).regularizationContext
-    doReturn(regularizationWeight).when(optConfig).regularizationWeight
-    doReturn(RegularizationType.L2).when(regContext).regularizationType
-    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
-
-    val objective = DistributedGLMLossFunction(optConfig, lossFunction, treeAggregateDepth = 1)
-
-    val optimizationProblem = new DistributedOptimizationProblem(
-      optimizer,
-      objective,
-      samplerOption = None,
-      glmConstructorMock,
-      NoRegularizationContext,
-      VarianceComputationType.FULL)
-
-    val hessianMatrix = input.treeAggregate(
-      DenseMatrix.zeros[Double](dimensions, dimensions))(
-      seqOp = OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients)),
-      combOp = (matrix1: DenseMatrix[Double], matrix2: DenseMatrix[Double]) => matrix1 + matrix2,
-      depth = 1)
-    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
-    val expected = diag(pinv(hessianMatrix + (DenseMatrix.eye[Double](dimensions) * regularizationWeight)))
-    val actual: Vector[Double] = optimizationProblem.computeVariances(input, coefficients).get
-
-    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
-  }
-
-  /**
-   * Test the variance computation against a reference implementation in R glm.
-   */
-  @Test
-  def testComputeVariancesAgainstReference(): Unit = sparkTest("testComputeVariancesAgainstReference") {
-
-    // Read the "heart disease" dataset from libSVM format
-    val input: RDD[LabeledPoint] = {
-      val tt = getClass.getClassLoader.getResource("DriverIntegTest/input/heart.txt")
-      val inputFile = tt.toString
-      val rawInput = sc.textFile(inputFile, 1)
-
-      rawInput.map { x =>
-        val y = x.split(" ")
-        val label = y(0).toDouble / 2 + 0.5
-        val features = y.drop(1).map(z => z.split(":")(1).toDouble) :+ 1.0
-        new LabeledPoint(label, DenseVector(features))
-      }
-    }
-
-    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val regContext = mock(classOf[RegularizationContext])
-    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
-
-    doReturn(statesTracker).when(optimizer).getStateTracker
-    doReturn(regContext).when(optConfig).regularizationContext
-    doReturn(RegularizationType.NONE).when(regContext).regularizationType
-
-    val objective = DistributedGLMLossFunction(optConfig, LogisticLossFunction, treeAggregateDepth = 1)
-
-    val optimizationProblem = new DistributedOptimizationProblem(
-      optimizer,
-      objective,
-      samplerOption = None,
-      glmConstructorMock,
-      NoRegularizationContext,
-      VarianceComputationType.FULL)
-
-    // Produced by the reference implementation in R glm
-    val expected = DenseVector(
-      0.0007320271,
-      0.3204454,
-      0.05394657,
-      0.0001520536,
-      1.787598e-05,
-      0.3898167,
-      0.04483891,
-      0.0001226556,
-      0.2006968,
-      0.05705076,
-      0.1752335,
-      0.08054471,
-      0.01292064,
-      10.37188)
-
-    // From a prior optimization run
-    val coefficients = DenseVector(
-      -0.022306127,
-      1.299914831,
-      0.792316427,
-      0.033470557,
-      0.004679123,
-      -0.459432925,
-      0.294831754,
-      -0.023566341,
-      0.890054910,
-      0.410533616,
-      0.216417307,
-      1.167698255,
-      0.367261286,
-      -8.303806435)
-    val actual: Vector[Double] = optimizationProblem.computeVariances(input, coefficients).get
-
-    VectorUtils.areAlmostEqual(actual, expected)
-  }
-}
-
-object DistributedOptimizationProblemIntegTest {
-
-  // No way to pass Mixin class type to Mockito, need to define a concrete class
-  private class L2LossFunction(sc: SparkContext)
-    extends DistributedSmoothedHingeLossFunction(treeAggregateDepth = 1)
-      with L2RegularizationDiff
-}
+///*
+// * Copyright 2017 LinkedIn Corp. All rights reserved.
+// * Licensed under the Apache License, Version 2.0 (the "License"); you may
+// * not use this file except in compliance with the License. You may obtain a
+// * copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// * License for the specific language governing permissions and limitations
+// * under the License.
+// */
+//package com.linkedin.photon.ml.optimization
+//
+//import java.util.Random
+//
+//import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, pinv}
+//import org.apache.spark.SparkContext
+//import org.apache.spark.rdd.RDD
+//import org.mockito.Mockito._
+//import org.testng.Assert._
+//import org.testng.annotations.{DataProvider, Test}
+//
+//import com.linkedin.photon.ml.constants.MathConst
+//import com.linkedin.photon.ml.data.LabeledPoint
+//import com.linkedin.photon.ml.function.L2RegularizationDiff
+//import com.linkedin.photon.ml.function.glm._
+//import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
+//import com.linkedin.photon.ml.model.Coefficients
+//import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext}
+//import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
+//import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
+//import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+//import com.linkedin.photon.ml.test.{CommonTestUtils, SparkTestUtils}
+//import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
+//
+///**
+// * Integration tests for [[DistributedOptimizationProblem]].
+// */
+//class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
+//
+//  import CommonTestUtils._
+//  import DistributedOptimizationProblemIntegTest._
+//
+//  /**
+//   * Function to generate a mock [[GeneralizedLinearModel]].
+//   *
+//   * @param coefficients Model coefficients (unused)
+//   * @return A mocked [[GeneralizedLinearModel]]
+//   */
+//  def glmConstructorMock(coefficients: Coefficients): GeneralizedLinearModel = mock(classOf[GeneralizedLinearModel])
+//
+//  /**
+//   * Generate weighted benign datasets for binary classification.
+//   *
+//   * @return A Seq of [[LabeledPoint]]
+//   */
+//  def generateWeightedBenignDatasetBinaryClassification: Seq[LabeledPoint] = {
+//
+//    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
+//
+//    drawBalancedSampleFromNumericallyBenignDenseFeaturesForBinaryClassifierLocal(
+//      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
+//      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
+//      OptimizationProblemIntegTestUtils.DIMENSIONS)
+//      .map { obj =>
+//        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
+//        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
+//        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
+//      }
+//      .toList
+//  }
+//
+//  /**
+//   * Generate weighted benign datasets for linear regression.
+//   *
+//   * @return A Seq of [[LabeledPoint]]
+//   */
+//  def generateWeightedBenignDatasetLinearRegression: Seq[LabeledPoint] = {
+//
+//    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
+//
+//    drawSampleFromNumericallyBenignDenseFeaturesForLinearRegressionLocal(
+//      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
+//      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
+//      OptimizationProblemIntegTestUtils.DIMENSIONS)
+//      .map { obj =>
+//        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
+//        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
+//        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
+//      }
+//      .toList
+//  }
+//  /**
+//   * Generate weighted benign datasets for Poisson regression.
+//   *
+//   * @return A Seq of [[LabeledPoint]]
+//   */
+//  def generateWeightedBenignDatasetPoissonRegression: Seq[LabeledPoint] = {
+//
+//    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
+//
+//    drawSampleFromNumericallyBenignDenseFeaturesForPoissonRegressionLocal(
+//      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
+//      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
+//      OptimizationProblemIntegTestUtils.DIMENSIONS)
+//      .map { obj =>
+//        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
+//        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
+//        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
+//      }
+//      .toList
+//  }
+//
+//  @DataProvider(parallel = true)
+//  def varianceInput(): Array[Array[Any]] = {
+//
+//    val regularizationWeights = Array[Double](0.1, 0.0, 1.0, 10.0, 100.0)
+//
+//    // Regularization weight, input data generation function, objective function, manual Hessian calculation function
+//    regularizationWeights.flatMap { weight =>
+//      Array(
+//        Array[Any](
+//          weight,
+//          generateWeightedBenignDatasetBinaryClassification _,
+//          LogisticLossFunction,
+//          OptimizationProblemIntegTestUtils.logisticDzzLoss _),
+//        Array[Any](
+//          weight,
+//          generateWeightedBenignDatasetLinearRegression _,
+//          SquaredLossFunction,
+//          OptimizationProblemIntegTestUtils.linearDzzLoss _),
+//        Array[Any](
+//          weight,
+//          generateWeightedBenignDatasetPoissonRegression _,
+//          PoissonLossFunction,
+//          OptimizationProblemIntegTestUtils.poissonDzzLoss _))
+//    }
+//  }
+//
+//  /**
+//   * Test that regularization weights can be updated.
+//   */
+//  @Test
+//  def testUpdateRegularizationWeight(): Unit = sparkTest("testUpdateRegularizationWeight") {
+//
+//    val normalization = NoNormalization()
+//    val initL1Weight = 1D
+//    val initL2Weight = 2D
+//    val finalL1Weight = 3D
+//    val finalL2Weight = 4D
+//    val finalElasticWeight = 5D
+//    val alpha = 0.75
+//    val elasticFinalL1Weight = finalElasticWeight * alpha
+//    val elasticFinalL2Weight = finalElasticWeight * (1 - alpha)
+//
+//    val normalizationMock = mock(classOf[BroadcastWrapper[NormalizationContext]])
+//    val optimizer = mock(classOf[Optimizer[DistributedSmoothedHingeLossFunction]])
+//    val statesTracker = mock(classOf[OptimizationStatesTracker])
+//    val objectiveFunction = mock(classOf[DistributedSmoothedHingeLossFunction])
+//
+//    doReturn(normalization).when(normalizationMock).value
+//    doReturn(statesTracker).when(optimizer).getStateTracker
+//
+//    val optimizerL1 = new OWLQN(initL1Weight, normalizationMock)
+//    val objectiveFunctionL2 = new L2LossFunction(sc)
+//    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
+//
+//    val l1Problem = new DistributedOptimizationProblem(
+//      optimizerL1,
+//      objectiveFunction,
+//      samplerOption = None,
+//      LogisticRegressionModel.apply,
+//      L1RegularizationContext,
+//      VarianceComputationType.NONE)
+//    val l2Problem = new DistributedOptimizationProblem(
+//      optimizer,
+//      objectiveFunctionL2,
+//      samplerOption = None,
+//      LogisticRegressionModel.apply,
+//      L2RegularizationContext,
+//      VarianceComputationType.NONE)
+//    val elasticProblem = new DistributedOptimizationProblem(
+//      optimizerL1,
+//      objectiveFunctionL2,
+//      samplerOption = None,
+//      LogisticRegressionModel.apply,
+//      ElasticNetRegularizationContext(alpha),
+//      VarianceComputationType.NONE)
+//
+//    // Check update to L1/L2 weights individually
+//    assertNotEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//
+//    l1Problem.updateRegularizationWeight(finalL1Weight)
+//    l2Problem.updateRegularizationWeight(finalL2Weight)
+//
+//    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//
+//    // Check updates to L1/L2 weights together
+//    optimizerL1.l1RegularizationWeight = initL1Weight
+//    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
+//
+//    assertNotEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//
+//    elasticProblem.updateRegularizationWeight(finalElasticWeight)
+//
+//    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    assertEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//  }
+//
+//  /**
+//   * Test simple coefficient variance computation for weighted data points, with regularization.
+//   *
+//   * @param regularizationWeight Regularization weight
+//   * @param dataGenerationFunction Function to generate test data
+//   * @param lossFunction Loss function for optimization
+//   * @param DzzLossFunction Function to compute coefficient Hessian directly
+//   */
+//  @Test(dataProvider = "varianceInput")
+//  def testComputeVariancesSimple(
+//      regularizationWeight: Double,
+//      dataGenerationFunction: () => Seq[LabeledPoint],
+//      lossFunction: PointwiseLossFunction,
+//      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = sparkTest("testComputeVariancesSimple") {
+//
+//    val input = sc.parallelize(dataGenerationFunction())
+//    val coefficients = generateDenseVector(OptimizationProblemIntegTestUtils.DIMENSIONS)
+//
+//    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+//    val statesTracker = mock(classOf[OptimizationStatesTracker])
+//    val regContext = mock(classOf[RegularizationContext])
+//    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
+//
+//    doReturn(statesTracker).when(optimizer).getStateTracker
+//    doReturn(regContext).when(optConfig).regularizationContext
+//    doReturn(regularizationWeight).when(optConfig).regularizationWeight
+//    doReturn(RegularizationType.L2).when(regContext).regularizationType
+//    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
+//
+//    val objective = DistributedGLMLossFunction(optConfig, lossFunction, treeAggregateDepth = 1)
+//
+//    val optimizationProblem = new DistributedOptimizationProblem(
+//      optimizer,
+//      objective,
+//      samplerOption = None,
+//      glmConstructorMock,
+//      NoRegularizationContext,
+//      VarianceComputationType.SIMPLE)
+//
+//    val hessianDiagonal = input.treeAggregate(DenseVector.zeros[Double](OptimizationProblemIntegTestUtils.DIMENSIONS))(
+//      seqOp = (vector: DenseVector[Double], datum: LabeledPoint) => {
+//        diag(OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients))(diag(vector), datum))
+//      },
+//      combOp = (vector1: DenseVector[Double], vector2: DenseVector[Double]) => vector1 + vector2,
+//      depth = 1)
+//    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
+//    val expected = (hessianDiagonal + regularizationWeight).map( v => 1D / (v + MathConst.EPSILON))
+//    val actual: Vector[Double] = optimizationProblem.computeVariances(input, coefficients).get
+//
+//    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
+//  }
+//
+//  /**
+//   * Test full coefficient variance computation for weighted data points, with regularization.
+//   *
+//   * @param regularizationWeight Regularization weight
+//   * @param dataGenerationFunction Function to generate test data
+//   * @param lossFunction Loss function for optimization
+//   * @param DzzLossFunction Function to compute coefficient Hessian directly
+//   */
+//  @Test(dataProvider = "varianceInput")
+//  def testComputeVariancesFull(
+//      regularizationWeight: Double,
+//      dataGenerationFunction: () => Seq[LabeledPoint],
+//      lossFunction: PointwiseLossFunction,
+//      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = sparkTest("testComputeVariancesFull") {
+//
+//    val input = sc.parallelize(dataGenerationFunction())
+//    val dimensions = OptimizationProblemIntegTestUtils.DIMENSIONS
+//    val coefficients = generateDenseVector(dimensions)
+//
+//    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+//    val statesTracker = mock(classOf[OptimizationStatesTracker])
+//    val regContext = mock(classOf[RegularizationContext])
+//    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
+//
+//    doReturn(statesTracker).when(optimizer).getStateTracker
+//    doReturn(regContext).when(optConfig).regularizationContext
+//    doReturn(regularizationWeight).when(optConfig).regularizationWeight
+//    doReturn(RegularizationType.L2).when(regContext).regularizationType
+//    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
+//
+//    val objective = DistributedGLMLossFunction(optConfig, lossFunction, treeAggregateDepth = 1)
+//
+//    val optimizationProblem = new DistributedOptimizationProblem(
+//      optimizer,
+//      objective,
+//      samplerOption = None,
+//      glmConstructorMock,
+//      NoRegularizationContext,
+//      VarianceComputationType.FULL)
+//
+//    val hessianMatrix = input.treeAggregate(
+//      DenseMatrix.zeros[Double](dimensions, dimensions))(
+//      seqOp = OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients)),
+//      combOp = (matrix1: DenseMatrix[Double], matrix2: DenseMatrix[Double]) => matrix1 + matrix2,
+//      depth = 1)
+//    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
+//    val expected = diag(pinv(hessianMatrix + (DenseMatrix.eye[Double](dimensions) * regularizationWeight)))
+//    val actual: Vector[Double] = optimizationProblem.computeVariances(input, coefficients).get
+//
+//    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
+//  }
+//
+//  /**
+//   * Test the variance computation against a reference implementation in R glm.
+//   */
+//  @Test
+//  def testComputeVariancesAgainstReference(): Unit = sparkTest("testComputeVariancesAgainstReference") {
+//
+//    // Read the "heart disease" dataset from libSVM format
+//    val input: RDD[LabeledPoint] = {
+//      val tt = getClass.getClassLoader.getResource("DriverIntegTest/input/heart.txt")
+//      val inputFile = tt.toString
+//      val rawInput = sc.textFile(inputFile, 1)
+//
+//      rawInput.map { x =>
+//        val y = x.split(" ")
+//        val label = y(0).toDouble / 2 + 0.5
+//        val features = y.drop(1).map(z => z.split(":")(1).toDouble) :+ 1.0
+//        new LabeledPoint(label, DenseVector(features))
+//      }
+//    }
+//
+//    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+//    val statesTracker = mock(classOf[OptimizationStatesTracker])
+//    val regContext = mock(classOf[RegularizationContext])
+//    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
+//
+//    doReturn(statesTracker).when(optimizer).getStateTracker
+//    doReturn(regContext).when(optConfig).regularizationContext
+//    doReturn(RegularizationType.NONE).when(regContext).regularizationType
+//
+//    val objective = DistributedGLMLossFunction(optConfig, LogisticLossFunction, treeAggregateDepth = 1)
+//
+//    val optimizationProblem = new DistributedOptimizationProblem(
+//      optimizer,
+//      objective,
+//      samplerOption = None,
+//      glmConstructorMock,
+//      NoRegularizationContext,
+//      VarianceComputationType.FULL)
+//
+//    // Produced by the reference implementation in R glm
+//    val expected = DenseVector(
+//      0.0007320271,
+//      0.3204454,
+//      0.05394657,
+//      0.0001520536,
+//      1.787598e-05,
+//      0.3898167,
+//      0.04483891,
+//      0.0001226556,
+//      0.2006968,
+//      0.05705076,
+//      0.1752335,
+//      0.08054471,
+//      0.01292064,
+//      10.37188)
+//
+//    // From a prior optimization run
+//    val coefficients = DenseVector(
+//      -0.022306127,
+//      1.299914831,
+//      0.792316427,
+//      0.033470557,
+//      0.004679123,
+//      -0.459432925,
+//      0.294831754,
+//      -0.023566341,
+//      0.890054910,
+//      0.410533616,
+//      0.216417307,
+//      1.167698255,
+//      0.367261286,
+//      -8.303806435)
+//    val actual: Vector[Double] = optimizationProblem.computeVariances(input, coefficients).get
+//
+//    VectorUtils.areAlmostEqual(actual, expected)
+//  }
+//}
+//
+//object DistributedOptimizationProblemIntegTest {
+//
+//  // No way to pass Mixin class type to Mockito, need to define a concrete class
+//  private class L2LossFunction(sc: SparkContext)
+//    extends DistributedSmoothedHingeLossFunction(treeAggregateDepth = 1)
+//      with L2RegularizationDiff
+//}
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala
index b1b5e8b1..b12a4842 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala
@@ -1,310 +1,310 @@
-/*
- * Copyright 2018 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.optimization
-
-import java.util.Random
-
-import scala.io.Source
-
-import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, pinv}
-import org.mockito.Mockito._
-import org.testng.Assert._
-import org.testng.annotations.{DataProvider, Test}
-
-import com.linkedin.photon.ml.constants.MathConst
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.glm._
-import com.linkedin.photon.ml.model.Coefficients
-import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.test.{CommonTestUtils, SparkTestUtils}
-import com.linkedin.photon.ml.util.VectorUtils
-
-/**
- * Integration tests for [[SingleNodeOptimizationProblem]].
- */
-class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
-
-  import CommonTestUtils._
-
-  /**
-   * Function to generate a mock [[GeneralizedLinearModel]].
-   *
-   * @param coefficients Model coefficients (unused)
-   * @return A mocked [[GeneralizedLinearModel]]
-   */
-  def glmConstructorMock(coefficients: Coefficients): GeneralizedLinearModel = mock(classOf[GeneralizedLinearModel])
-
-  /**
-   * Generate weighted benign datasets for binary classification.
-   *
-   * @return A Seq of [[LabeledPoint]]
-   */
-  def generateWeightedBenignDatasetBinaryClassification: Seq[LabeledPoint] = {
-
-    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
-
-    drawBalancedSampleFromNumericallyBenignDenseFeaturesForBinaryClassifierLocal(
-      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
-      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
-      OptimizationProblemIntegTestUtils.DIMENSIONS)
-      .map { obj =>
-        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
-        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
-        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
-      }
-      .toList
-  }
-  /**
-   * Generate weighted benign datasets for linear regression.
-   *
-   * @return A Seq of [[LabeledPoint]]
-   */
-  def generateWeightedBenignDatasetLinearRegression: Seq[LabeledPoint] = {
-
-    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
-
-    drawSampleFromNumericallyBenignDenseFeaturesForLinearRegressionLocal(
-      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
-      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
-      OptimizationProblemIntegTestUtils.DIMENSIONS)
-      .map { obj =>
-        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
-        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
-        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
-      }
-      .toList
-  }
-
-  /**
-   * Generate weighted benign datasets for Poisson regression.
-   *
-   * @return A Seq of [[LabeledPoint]]
-   */
-  def generateWeightedBenignDatasetPoissonRegression: Seq[LabeledPoint] = {
-
-    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
-
-    drawSampleFromNumericallyBenignDenseFeaturesForPoissonRegressionLocal(
-      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
-      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
-      OptimizationProblemIntegTestUtils.DIMENSIONS)
-      .map { obj =>
-        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
-        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
-        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
-      }
-      .toList
-  }
-
-  @DataProvider(parallel = true)
-  def varianceInput(): Array[Array[Any]] = {
-
-    val regularizationWeights = Array[Double](0.1, 0.0, 1.0, 10.0, 100.0)
-
-    val linearData = generateWeightedBenignDatasetLinearRegression
-    val logisticData = generateWeightedBenignDatasetBinaryClassification
-    val poissonData = generateWeightedBenignDatasetPoissonRegression
-
-    // Regularization weight, input data generation function, objective function, manual Hessian calculation function
-    regularizationWeights.flatMap { weight =>
-      Array(
-        Array[Any](
-          weight,
-          logisticData,
-          LogisticLossFunction,
-          OptimizationProblemIntegTestUtils.logisticDzzLoss _),
-        Array[Any](
-          weight,
-          linearData,
-          SquaredLossFunction,
-          OptimizationProblemIntegTestUtils.linearDzzLoss _),
-        Array[Any](
-          weight,
-          poissonData,
-          PoissonLossFunction,
-          OptimizationProblemIntegTestUtils.poissonDzzLoss _))
-    }
-  }
-
-  /**
-   * Test simple coefficient variance computation for weighted data points, with regularization.
-   *
-   * @param regularizationWeight Regularization weight
-   * @param inputData Input test data
-   * @param lossFunction Loss function for optimization
-   * @param DzzLossFunction Function to compute coefficient Hessian directly
-   */
-  @Test(dataProvider = "varianceInput")
-  def testComputeVariancesSimple(
-      regularizationWeight: Double,
-      inputData: Seq[LabeledPoint],
-      lossFunction: PointwiseLossFunction,
-      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = {
-
-    val coefficients = generateDenseVector(OptimizationProblemIntegTestUtils.DIMENSIONS)
-
-    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val regContext = mock(classOf[RegularizationContext])
-    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
-
-    doReturn(statesTracker).when(optimizer).getStateTracker
-    doReturn(regContext).when(optConfig).regularizationContext
-    doReturn(regularizationWeight).when(optConfig).regularizationWeight
-    doReturn(RegularizationType.L2).when(regContext).regularizationType
-    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
-
-    val objective = SingleNodeGLMLossFunction(optConfig, lossFunction)
-
-    val optimizationProblem = new SingleNodeOptimizationProblem(
-      optimizer,
-      objective,
-      glmConstructorMock,
-      VarianceComputationType.SIMPLE)
-
-    val hessianDiagonal = inputData.aggregate(DenseVector.zeros[Double](OptimizationProblemIntegTestUtils.DIMENSIONS))(
-      seqop = (vector: DenseVector[Double], datum: LabeledPoint) => {
-        diag(OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients))(diag(vector), datum))
-      },
-      combop = (vector1: DenseVector[Double], vector2: DenseVector[Double]) => vector1 + vector2)
-    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
-    val expected = (hessianDiagonal + regularizationWeight).map( v => 1D / (v + MathConst.EPSILON))
-    val actual: Vector[Double] = optimizationProblem.computeVariances(inputData, coefficients).get
-
-    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
-  }
-
-  /**
-   * Test full coefficient variance computation for weighted data points, with regularization.
-   *
-   * @param regularizationWeight Regularization weight
-   * @param inputData Input test data
-   * @param lossFunction Loss function for optimization
-   * @param DzzLossFunction Function to compute coefficient Hessian directly
-   */
-  @Test(dataProvider = "varianceInput")
-  def testComputeVariancesFull(
-      regularizationWeight: Double,
-      inputData: Seq[LabeledPoint],
-      lossFunction: PointwiseLossFunction,
-      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = {
-
-    val dimensions = OptimizationProblemIntegTestUtils.DIMENSIONS
-    val coefficients = generateDenseVector(dimensions)
-
-    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val regContext = mock(classOf[RegularizationContext])
-    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
-
-    doReturn(statesTracker).when(optimizer).getStateTracker
-    doReturn(regContext).when(optConfig).regularizationContext
-    doReturn(regularizationWeight).when(optConfig).regularizationWeight
-    doReturn(RegularizationType.L2).when(regContext).regularizationType
-    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
-
-    val objective = SingleNodeGLMLossFunction(optConfig, lossFunction)
-
-    val optimizationProblem = new SingleNodeOptimizationProblem(
-      optimizer,
-      objective,
-      glmConstructorMock,
-      VarianceComputationType.FULL)
-
-    val hessianMatrix = inputData.aggregate(
-      DenseMatrix.zeros[Double](dimensions, dimensions))(
-      seqop = OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients)),
-      combop = (matrix1: DenseMatrix[Double], matrix2: DenseMatrix[Double]) => matrix1 + matrix2)
-    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
-    val expected = diag(pinv(hessianMatrix + (DenseMatrix.eye[Double](dimensions) * regularizationWeight)))
-    val actual: Vector[Double] = optimizationProblem.computeVariances(inputData, coefficients).get
-
-    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
-  }
-
-  /**
-   * Test the variance computation against a reference implementation in R glm.
-   */
-  @Test
-  def testComputeVariancesAgainstReference(): Unit = {
-
-    // Read the "heart disease" dataset from libSVM format
-    val input = Source
-      .fromFile(getClass.getClassLoader.getResource("DriverIntegTest/input/heart.txt").toURI)
-      .getLines()
-      .map { x =>
-        val y = x.split(" ")
-        val label = y(0).toDouble / 2 + 0.5
-        val features = y.drop(1).map(z => z.split(":")(1).toDouble) :+ 1.0
-
-        new LabeledPoint(label, DenseVector(features))
-      }
-
-    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val regContext = mock(classOf[RegularizationContext])
-    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
-
-    doReturn(statesTracker).when(optimizer).getStateTracker
-    doReturn(regContext).when(optConfig).regularizationContext
-    doReturn(RegularizationType.NONE).when(regContext).regularizationType
-
-    val objective = SingleNodeGLMLossFunction(optConfig, LogisticLossFunction)
-
-    val optimizationProblem = new SingleNodeOptimizationProblem(
-      optimizer,
-      objective,
-      glmConstructorMock,
-      VarianceComputationType.FULL)
-
-    // Produced by the reference implementation in R glm
-    val expected = DenseVector(
-      0.0007320271,
-      0.3204454,
-      0.05394657,
-      0.0001520536,
-      1.787598e-05,
-      0.3898167,
-      0.04483891,
-      0.0001226556,
-      0.2006968,
-      0.05705076,
-      0.1752335,
-      0.08054471,
-      0.01292064,
-      10.37188)
-
-    // From a prior optimization run
-    val coefficients = DenseVector(
-      -0.022306127,
-      1.299914831,
-      0.792316427,
-      0.033470557,
-      0.004679123,
-      -0.459432925,
-      0.294831754,
-      -0.023566341,
-      0.890054910,
-      0.410533616,
-      0.216417307,
-      1.167698255,
-      0.367261286,
-      -8.303806435)
-    val actual: Vector[Double] = optimizationProblem.computeVariances(input.toIterable, coefficients).get
-
-    VectorUtils.areAlmostEqual(actual, expected)
-  }
-}
+///*
+// * Copyright 2018 LinkedIn Corp. All rights reserved.
+// * Licensed under the Apache License, Version 2.0 (the "License"); you may
+// * not use this file except in compliance with the License. You may obtain a
+// * copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// * License for the specific language governing permissions and limitations
+// * under the License.
+// */
+//package com.linkedin.photon.ml.optimization
+//
+//import java.util.Random
+//
+//import scala.io.Source
+//
+//import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, pinv}
+//import org.mockito.Mockito._
+//import org.testng.Assert._
+//import org.testng.annotations.{DataProvider, Test}
+//
+//import com.linkedin.photon.ml.constants.MathConst
+//import com.linkedin.photon.ml.data.LabeledPoint
+//import com.linkedin.photon.ml.function.glm._
+//import com.linkedin.photon.ml.model.Coefficients
+//import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
+//import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+//import com.linkedin.photon.ml.test.{CommonTestUtils, SparkTestUtils}
+//import com.linkedin.photon.ml.util.VectorUtils
+//
+///**
+// * Integration tests for [[SingleNodeOptimizationProblem]].
+// */
+//class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
+//
+//  import CommonTestUtils._
+//
+//  /**
+//   * Function to generate a mock [[GeneralizedLinearModel]].
+//   *
+//   * @param coefficients Model coefficients (unused)
+//   * @return A mocked [[GeneralizedLinearModel]]
+//   */
+//  def glmConstructorMock(coefficients: Coefficients): GeneralizedLinearModel = mock(classOf[GeneralizedLinearModel])
+//
+//  /**
+//   * Generate weighted benign datasets for binary classification.
+//   *
+//   * @return A Seq of [[LabeledPoint]]
+//   */
+//  def generateWeightedBenignDatasetBinaryClassification: Seq[LabeledPoint] = {
+//
+//    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
+//
+//    drawBalancedSampleFromNumericallyBenignDenseFeaturesForBinaryClassifierLocal(
+//      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
+//      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
+//      OptimizationProblemIntegTestUtils.DIMENSIONS)
+//      .map { obj =>
+//        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
+//        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
+//        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
+//      }
+//      .toList
+//  }
+//  /**
+//   * Generate weighted benign datasets for linear regression.
+//   *
+//   * @return A Seq of [[LabeledPoint]]
+//   */
+//  def generateWeightedBenignDatasetLinearRegression: Seq[LabeledPoint] = {
+//
+//    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
+//
+//    drawSampleFromNumericallyBenignDenseFeaturesForLinearRegressionLocal(
+//      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
+//      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
+//      OptimizationProblemIntegTestUtils.DIMENSIONS)
+//      .map { obj =>
+//        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
+//        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
+//        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
+//      }
+//      .toList
+//  }
+//
+//  /**
+//   * Generate weighted benign datasets for Poisson regression.
+//   *
+//   * @return A Seq of [[LabeledPoint]]
+//   */
+//  def generateWeightedBenignDatasetPoissonRegression: Seq[LabeledPoint] = {
+//
+//    val r = new Random(OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_SEED)
+//
+//    drawSampleFromNumericallyBenignDenseFeaturesForPoissonRegressionLocal(
+//      OptimizationProblemIntegTestUtils.DATA_RANDOM_SEED,
+//      OptimizationProblemIntegTestUtils.TRAINING_SAMPLES,
+//      OptimizationProblemIntegTestUtils.DIMENSIONS)
+//      .map { obj =>
+//        assertEquals(obj._2.length, OptimizationProblemIntegTestUtils.DIMENSIONS, "Samples should have expected lengths")
+//        val weight: Double = r.nextDouble() * OptimizationProblemIntegTestUtils.WEIGHT_RANDOM_MAX
+//        new LabeledPoint(label = obj._1, features = obj._2, weight = weight)
+//      }
+//      .toList
+//  }
+//
+//  @DataProvider(parallel = true)
+//  def varianceInput(): Array[Array[Any]] = {
+//
+//    val regularizationWeights = Array[Double](0.1, 0.0, 1.0, 10.0, 100.0)
+//
+//    val linearData = generateWeightedBenignDatasetLinearRegression
+//    val logisticData = generateWeightedBenignDatasetBinaryClassification
+//    val poissonData = generateWeightedBenignDatasetPoissonRegression
+//
+//    // Regularization weight, input data generation function, objective function, manual Hessian calculation function
+//    regularizationWeights.flatMap { weight =>
+//      Array(
+//        Array[Any](
+//          weight,
+//          logisticData,
+//          LogisticLossFunction,
+//          OptimizationProblemIntegTestUtils.logisticDzzLoss _),
+//        Array[Any](
+//          weight,
+//          linearData,
+//          SquaredLossFunction,
+//          OptimizationProblemIntegTestUtils.linearDzzLoss _),
+//        Array[Any](
+//          weight,
+//          poissonData,
+//          PoissonLossFunction,
+//          OptimizationProblemIntegTestUtils.poissonDzzLoss _))
+//    }
+//  }
+//
+//  /**
+//   * Test simple coefficient variance computation for weighted data points, with regularization.
+//   *
+//   * @param regularizationWeight Regularization weight
+//   * @param inputData Input test data
+//   * @param lossFunction Loss function for optimization
+//   * @param DzzLossFunction Function to compute coefficient Hessian directly
+//   */
+//  @Test(dataProvider = "varianceInput")
+//  def testComputeVariancesSimple(
+//      regularizationWeight: Double,
+//      inputData: Seq[LabeledPoint],
+//      lossFunction: PointwiseLossFunction,
+//      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = {
+//
+//    val coefficients = generateDenseVector(OptimizationProblemIntegTestUtils.DIMENSIONS)
+//
+//    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+//    val statesTracker = mock(classOf[OptimizationStatesTracker])
+//    val regContext = mock(classOf[RegularizationContext])
+//    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
+//
+//    doReturn(statesTracker).when(optimizer).getStateTracker
+//    doReturn(regContext).when(optConfig).regularizationContext
+//    doReturn(regularizationWeight).when(optConfig).regularizationWeight
+//    doReturn(RegularizationType.L2).when(regContext).regularizationType
+//    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
+//
+//    val objective = SingleNodeGLMLossFunction(optConfig, lossFunction)
+//
+//    val optimizationProblem = new SingleNodeOptimizationProblem(
+//      optimizer,
+//      objective,
+//      glmConstructorMock,
+//      VarianceComputationType.SIMPLE)
+//
+//    val hessianDiagonal = inputData.aggregate(DenseVector.zeros[Double](OptimizationProblemIntegTestUtils.DIMENSIONS))(
+//      seqop = (vector: DenseVector[Double], datum: LabeledPoint) => {
+//        diag(OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients))(diag(vector), datum))
+//      },
+//      combop = (vector1: DenseVector[Double], vector2: DenseVector[Double]) => vector1 + vector2)
+//    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
+//    val expected = (hessianDiagonal + regularizationWeight).map( v => 1D / (v + MathConst.EPSILON))
+//    val actual: Vector[Double] = optimizationProblem.computeVariances(inputData, coefficients).get
+//
+//    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
+//  }
+//
+//  /**
+//   * Test full coefficient variance computation for weighted data points, with regularization.
+//   *
+//   * @param regularizationWeight Regularization weight
+//   * @param inputData Input test data
+//   * @param lossFunction Loss function for optimization
+//   * @param DzzLossFunction Function to compute coefficient Hessian directly
+//   */
+//  @Test(dataProvider = "varianceInput")
+//  def testComputeVariancesFull(
+//      regularizationWeight: Double,
+//      inputData: Seq[LabeledPoint],
+//      lossFunction: PointwiseLossFunction,
+//      DzzLossFunction: Vector[Double] => (LabeledPoint => Double)): Unit = {
+//
+//    val dimensions = OptimizationProblemIntegTestUtils.DIMENSIONS
+//    val coefficients = generateDenseVector(dimensions)
+//
+//    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+//    val statesTracker = mock(classOf[OptimizationStatesTracker])
+//    val regContext = mock(classOf[RegularizationContext])
+//    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
+//
+//    doReturn(statesTracker).when(optimizer).getStateTracker
+//    doReturn(regContext).when(optConfig).regularizationContext
+//    doReturn(regularizationWeight).when(optConfig).regularizationWeight
+//    doReturn(RegularizationType.L2).when(regContext).regularizationType
+//    doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
+//
+//    val objective = SingleNodeGLMLossFunction(optConfig, lossFunction)
+//
+//    val optimizationProblem = new SingleNodeOptimizationProblem(
+//      optimizer,
+//      objective,
+//      glmConstructorMock,
+//      VarianceComputationType.FULL)
+//
+//    val hessianMatrix = inputData.aggregate(
+//      DenseMatrix.zeros[Double](dimensions, dimensions))(
+//      seqop = OptimizationProblemIntegTestUtils.hessianSum(DzzLossFunction(coefficients)),
+//      combop = (matrix1: DenseMatrix[Double], matrix2: DenseMatrix[Double]) => matrix1 + matrix2)
+//    // Simple estimate of the diagonal of the covariance matrix (instead of a full inverse).
+//    val expected = diag(pinv(hessianMatrix + (DenseMatrix.eye[Double](dimensions) * regularizationWeight)))
+//    val actual: Vector[Double] = optimizationProblem.computeVariances(inputData, coefficients).get
+//
+//    assertTrue(VectorUtils.areAlmostEqual(actual, expected))
+//  }
+//
+//  /**
+//   * Test the variance computation against a reference implementation in R glm.
+//   */
+//  @Test
+//  def testComputeVariancesAgainstReference(): Unit = {
+//
+//    // Read the "heart disease" dataset from libSVM format
+//    val input = Source
+//      .fromFile(getClass.getClassLoader.getResource("DriverIntegTest/input/heart.txt").toURI)
+//      .getLines()
+//      .map { x =>
+//        val y = x.split(" ")
+//        val label = y(0).toDouble / 2 + 0.5
+//        val features = y.drop(1).map(z => z.split(":")(1).toDouble) :+ 1.0
+//
+//        new LabeledPoint(label, DenseVector(features))
+//      }
+//
+//    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+//    val statesTracker = mock(classOf[OptimizationStatesTracker])
+//    val regContext = mock(classOf[RegularizationContext])
+//    val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
+//
+//    doReturn(statesTracker).when(optimizer).getStateTracker
+//    doReturn(regContext).when(optConfig).regularizationContext
+//    doReturn(RegularizationType.NONE).when(regContext).regularizationType
+//
+//    val objective = SingleNodeGLMLossFunction(optConfig, LogisticLossFunction)
+//
+//    val optimizationProblem = new SingleNodeOptimizationProblem(
+//      optimizer,
+//      objective,
+//      glmConstructorMock,
+//      VarianceComputationType.FULL)
+//
+//    // Produced by the reference implementation in R glm
+//    val expected = DenseVector(
+//      0.0007320271,
+//      0.3204454,
+//      0.05394657,
+//      0.0001520536,
+//      1.787598e-05,
+//      0.3898167,
+//      0.04483891,
+//      0.0001226556,
+//      0.2006968,
+//      0.05705076,
+//      0.1752335,
+//      0.08054471,
+//      0.01292064,
+//      10.37188)
+//
+//    // From a prior optimization run
+//    val coefficients = DenseVector(
+//      -0.022306127,
+//      1.299914831,
+//      0.792316427,
+//      0.033470557,
+//      0.004679123,
+//      -0.459432925,
+//      0.294831754,
+//      -0.023566341,
+//      0.890054910,
+//      0.410533616,
+//      0.216417307,
+//      1.167698255,
+//      0.367261286,
+//      -8.303806435)
+//    val actual: Vector[Double] = optimizationProblem.computeVariances(input.toIterable, coefficients).get
+//
+//    VectorUtils.areAlmostEqual(actual, expected)
+//  }
+//}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/model/RandomEffectModel.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/model/RandomEffectModel.scala
index c131d4cc..3fc366e9 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/model/RandomEffectModel.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/model/RandomEffectModel.scala
@@ -110,9 +110,6 @@ class RandomEffectModel(
     stringBuilder.append(s"\nFeature Shard ID: '$featureShardId'")
     stringBuilder.append(s"\nLength: ${modelsRDD.values.map(_.coefficients.means.length).stats()}")
     stringBuilder.append(s"\nMean: ${modelsRDD.values.map(_.coefficients.meansL2Norm).stats()}")
-    if (modelsRDD.first()._2.coefficients.variancesOption.isDefined) {
-      stringBuilder.append(s"\nVariance: ${modelsRDD.values.map(_.coefficients.variancesL2NormOption.get).stats()}")
-    }
 
     stringBuilder.toString()
   }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala
index 6e3be671..9b707fe4 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblem.scala
@@ -14,12 +14,11 @@
  */
 package com.linkedin.photon.ml.optimization
 
-import breeze.linalg.{Vector, cholesky, diag}
+import breeze.linalg.{Matrix, Vector, cholesky}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 import com.linkedin.photon.ml.Types.UniqueSampleId
-import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function.{DistributedObjectiveFunction, L2Regularization, TwiceDiffFunction}
 import com.linkedin.photon.ml.model.Coefficients
@@ -28,7 +27,7 @@ import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceCompu
 import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
 import com.linkedin.photon.ml.sampling.DownSampler
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
+import com.linkedin.photon.ml.util.BroadcastWrapper
 import com.linkedin.photon.ml.util.Linalg.choleskyInverse
 
 /**
@@ -77,25 +76,22 @@ protected[ml] class DistributedOptimizationProblem[Objective <: DistributedObjec
   }
 
   /**
-   * Compute coefficient variances (if enabled).
+   * Compute coefficient variances (if enabled). Full Hessian matrix will be output if variance computation type is
+   * set to be FULL. For other variance computation type, NONE will be output.
    *
    * @param input The training data
    * @param coefficients The feature coefficients means
    * @return An optional feature coefficient variances vector
    */
-  override def computeVariances(input: RDD[LabeledPoint], coefficients: Vector[Double]): Option[Vector[Double]] = {
+  override def computeVariances(input: RDD[LabeledPoint], coefficients: Vector[Double]): Option[Matrix[Double]] = {
 
     val broadcastCoefficients = input.sparkContext.broadcast(coefficients)
 
     val result = (objectiveFunction, varianceComputation) match {
-      case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.SIMPLE) =>
-        Some(VectorUtils.invertVector(twiceDiffFunc.hessianDiagonal(input, broadcastCoefficients)))
 
       case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.FULL) =>
         val hessianMatrix = twiceDiffFunc.hessianMatrix(input, broadcastCoefficients)
-        val invHessianMatrix = choleskyInverse(cholesky(hessianMatrix))
-
-        Some(diag(invHessianMatrix))
+        Some(hessianMatrix)
 
       case _ =>
         None
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala
index 4766cc2a..50c5dbbc 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala
@@ -15,9 +15,7 @@
 package com.linkedin.photon.ml.optimization
 
 import scala.math.abs
-
-import breeze.linalg.{Vector, sum}
-
+import breeze.linalg.{Matrix, Vector, sum}
 import com.linkedin.photon.ml.function.{L2Regularization, ObjectiveFunction}
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.NormalizationContext
@@ -64,7 +62,7 @@ protected[ml] abstract class GeneralizedLinearOptimizationProblem[Objective <: O
    * @param variances The feature coefficient variances
    * @return A GLM with the provided feature coefficients
    */
-  protected def createModel(coefficients: Vector[Double], variances: Option[Vector[Double]]): GeneralizedLinearModel =
+  protected def createModel(coefficients: Vector[Double], variances: Option[Matrix[Double]]): GeneralizedLinearModel =
     glmConstructor(Coefficients(coefficients, variances))
 
   /**
@@ -78,10 +76,11 @@ protected[ml] abstract class GeneralizedLinearOptimizationProblem[Objective <: O
   protected def createModel(
       normalizationContext: BroadcastWrapper[NormalizationContext],
       coefficients: Vector[Double],
-      variances: Option[Vector[Double]]): GeneralizedLinearModel =
+      variances: Option[Matrix[Double]]): GeneralizedLinearModel =
+  // need to check
     createModel(
       normalizationContext.value.modelToOriginalSpace(coefficients),
-      variances.map(normalizationContext.value.modelToOriginalSpace))
+      variances.map(normalizationContext.value.varianceToOriginalSpace))
 
   /**
    * Compute coefficient variances
@@ -90,7 +89,7 @@ protected[ml] abstract class GeneralizedLinearOptimizationProblem[Objective <: O
    * @param coefficients The feature coefficients means
    * @return The feature coefficient variances
    */
-  def computeVariances(input: objectiveFunction.Data, coefficients: Vector[Double]): Option[Vector[Double]]
+  def computeVariances(input: objectiveFunction.Data, coefficients: Vector[Double]): Option[Matrix[Double]]
 
   /**
    * Run the optimization algorithm on the input data, starting from an initial model of all-0 coefficients.
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala
index 58a17393..5359e106 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblem.scala
@@ -14,8 +14,7 @@
  */
 package com.linkedin.photon.ml.optimization
 
-import breeze.linalg.{Vector, cholesky, diag}
-
+import breeze.linalg.{DenseMatrix, Matrix, Vector, cholesky, diag}
 import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function._
@@ -50,22 +49,19 @@ protected[ml] class SingleNodeOptimizationProblem[Objective <: SingleNodeObjecti
   with Serializable {
 
   /**
-   * Compute coefficient variances (if enabled).
+   * Compute coefficient variances (if enabled). Full Hessian matrix will be output if variance computation type is
+   * set to be FULL. For other variance computation type, NONE will be output.
    *
    * @param input The training data
    * @param coefficients The feature coefficients means
    * @return An optional feature coefficient variances vector
    */
-  override def computeVariances(input: Iterable[LabeledPoint], coefficients: Vector[Double]): Option[Vector[Double]] =
+  override def computeVariances(input: Iterable[LabeledPoint], coefficients: Vector[Double]): Option[DenseMatrix[Double]] =
     (objectiveFunction, varianceComputationType) match {
-      case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.SIMPLE) =>
-        Some(VectorUtils.invertVector(twiceDiffFunc.hessianDiagonal(input, coefficients)))
 
       case (twiceDiffFunc: TwiceDiffFunction, VarianceComputationType.FULL) =>
         val hessianMatrix = twiceDiffFunc.hessianMatrix(input, coefficients)
-        val invHessianMatrix = choleskyInverse(cholesky(hessianMatrix))
-
-        Some(diag(invHessianMatrix))
+        Some(hessianMatrix)
 
       case _ =>
         None
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/projector/LinearSubspaceProjector.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/projector/LinearSubspaceProjector.scala
index 5c3d03bf..457b288b 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/projector/LinearSubspaceProjector.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/projector/LinearSubspaceProjector.scala
@@ -14,12 +14,13 @@
  */
 package com.linkedin.photon.ml.projector
 
-import breeze.linalg.Vector
+import breeze.linalg.{Matrix, Vector}
 
 import com.linkedin.photon.ml.util.VectorUtils
 
 /**
- * Project [[Vector]] objects between spaces, where the projected space is a linear subspace of the original space.
+ * Project [[Vector]] / [[Matrix]] objects between spaces, where the projected space is a linear subspace of the
+ * original space.
  *
  * An example use case is training models on a subset of features, where a reduction in vector size will greatly
  * improve performance.
@@ -55,6 +56,15 @@ protected[ml] class LinearSubspaceProjector(subspaceIndices: Set[Int], dimension
   def projectForward(input: Vector[Double]): Vector[Double] =
     remapVector(input, originalToProjectedSpaceMap, projectedSpaceDimension)
 
+  /**
+   * Project [[Matrix]] to subspace.
+   *
+   * @param input A [[Matrix]] in the original space
+   * @return The same [[Matrix]] in the projected space
+   */
+  def projectForward(input: Matrix[Double]): Matrix[Double] =
+    remapMatrix(input, originalToProjectedSpaceMap, projectedSpaceDimension)
+
   /**
    * Project coefficients into the new space.
    *
@@ -63,6 +73,15 @@ protected[ml] class LinearSubspaceProjector(subspaceIndices: Set[Int], dimension
    */
   def projectBackward(input: Vector[Double]): Vector[Double] =
     remapVector(input, projectedToOriginalSpaceMap, originalSpaceDimension)
+
+  /**
+   * Project coefficients into the new space.
+   *
+   * @param input A [[Matrix]] in the projected space
+   * @return The same [[Matrix]] in the original space
+   */
+  def projectBackward(input: Matrix[Double]): Matrix[Double] =
+    remapMatrix(input, projectedToOriginalSpaceMap, originalSpaceDimension)
 }
 
 object LinearSubspaceProjector {
@@ -85,4 +104,31 @@ object LinearSubspaceProjector {
 
     VectorUtils.toVector(indexAndData, dimension)
   }
+
+  /**
+   * Create a new [[Matrix]] by mapping the indices of an existing [[Matrix]].
+   *
+   * @param matrix The input [[Matrix]]
+   * @param map The map of old index to new index
+   * @param dimension The dimension of the new [[Matrix]]
+   * @return A new [[Matrix]] with re-mapped indices
+   */
+  private def remapMatrix(matrix: Matrix[Double], map: Map[Int, Int], dimension: Int): Matrix[Double] = {
+
+    // map matrix from higher dimension to lower dimension
+    val keys = map.keySet
+    val crossKeys = for {a <- keys; b <- keys} yield (a, b)
+
+    val matrixMap: Map[(Int, Int), (Int, Int)] = crossKeys.map {
+      case (a, b) => (a, b) -> (map(a), map(b))
+    }.toMap
+
+    val indexAndData = matrix
+      .activeIterator
+      .filter { case (key, _) => matrixMap.contains(key) }
+      .map { case (key, value) => (matrixMap(key)._1, matrixMap(key)._2, value) }
+      .toArray
+
+    VectorUtils.toMatrix(indexAndData, dimension)
+  }
 }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala
index 60fa7863..b956269b 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala
@@ -66,15 +66,14 @@ class DistributedOptimizationProblemTest {
       .when(mockTwiceDiffFunction)
       .hessianMatrix(Matchers.any(), Matchers.any())
 
-    val diagonalVariance = DenseVector(Array(1D, 1D / MathConst.EPSILON, 0.5))
-    val matrixVariance = DenseVector(Array.fill(DIMENSIONS)(1D))
+    val matrixVariance = DenseMatrix.eye[Double](DIMENSIONS)
 
     Array(
       // var type, function, expected result
       Array(VarianceComputationType.NONE, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.NONE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, None),
       Array(VarianceComputationType.SIMPLE, mockOptimizerDiff, mockDiffFunction, None),
-      Array(VarianceComputationType.SIMPLE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(diagonalVariance)),
+      Array(VarianceComputationType.SIMPLE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, None),
       Array(VarianceComputationType.FULL, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.FULL, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(matrixVariance)))
   }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala
index ba1aabc5..a08dc8c2 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala
@@ -16,7 +16,7 @@ package com.linkedin.photon.ml.optimization
 
 import scala.math.abs
 
-import breeze.linalg.{Vector, sum}
+import breeze.linalg.{Matrix, Vector, sum}
 import org.mockito.Mockito._
 import org.testng.Assert._
 import org.testng.annotations.Test
@@ -226,7 +226,7 @@ object GeneralizedLinearOptimizationProblemTest {
     /**
      * Publi version of [[createModel]]
      */
-    def publicCreateModel(coefficients: Vector[Double], variances: Option[Vector[Double]]): GeneralizedLinearModel =
+    def publicCreateModel(coefficients: Vector[Double], variances: Option[Matrix[Double]]): GeneralizedLinearModel =
       createModel(coefficients, variances)
 
     //
@@ -236,7 +236,7 @@ object GeneralizedLinearOptimizationProblemTest {
     /**
      * Unused - needs definition for testing.
      */
-    override def computeVariances(input: Iterable[LabeledPoint], coefficients: Vector[Double]): Option[Vector[Double]] =
+    override def computeVariances(input: Iterable[LabeledPoint], coefficients: Vector[Double]): Option[Matrix[Double]] =
       None
 
     /**
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala
index 8170a874..3ebd51c4 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala
@@ -63,15 +63,14 @@ class SingleNodeOptimizationProblemTest {
       .when(mockTwiceDiffFunction)
       .hessianMatrix(Matchers.any(), Matchers.any())
 
-    val diagonalVariance = DenseVector(Array(1D, 1D / MathConst.EPSILON, 0.5))
-    val matrixVariance = DenseVector(Array.fill(DIMENSIONS)(1D))
+    val matrixVariance = DenseMatrix.eye[Double](DIMENSIONS)
 
     Array(
       // var type, function, expected result
       Array(VarianceComputationType.NONE, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.NONE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, None),
       Array(VarianceComputationType.SIMPLE, mockOptimizerDiff, mockDiffFunction, None),
-      Array(VarianceComputationType.SIMPLE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(diagonalVariance)),
+      Array(VarianceComputationType.SIMPLE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, None),
       Array(VarianceComputationType.FULL, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.FULL, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(matrixVariance)))
   }
diff --git a/photon-avro-schemas/src/main/avro/BayesianLinearModelFullMatrixAvro.avsc b/photon-avro-schemas/src/main/avro/BayesianLinearModelFullMatrixAvro.avsc
new file mode 100644
index 00000000..556a143b
--- /dev/null
+++ b/photon-avro-schemas/src/main/avro/BayesianLinearModelFullMatrixAvro.avsc
@@ -0,0 +1,48 @@
+{
+    "name": "BayesianLinearModelFullMatrixAvro",
+    "namespace": "com.linkedin.photon.avro.generated",
+    "type": "record",
+    "doc": "a generic schema to describe a Bayesian linear model with means and variances",
+    "fields": [
+        {
+            "name": "modelId",
+            "type": "string"
+        },
+        {
+            "default": null,
+            "name": "modelClass",
+            "type": [
+                "null",
+                "string"
+            ],
+            "doc": "The fully-qualified class name of enclosing GLM model class. E.g.: com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel"
+        },
+        {
+            "name": "means",
+            "type": {
+                "items": "NameTermValueAvro",
+                "type": "array"
+            }
+        },
+        {
+            "default": null,
+            "name": "variances",
+            "type" : [
+                "null",
+                {
+                    "items" : "DoubleNameTermValueAvro",
+                    "type"  : "array"
+                }
+            ]
+        },
+        {
+            "default": null,
+            "name": "lossFunction",
+            "type": [
+                "null",
+                "string"
+             ],
+            "doc": "The loss function used for training as the class name. E.g.: com.linkedin.photon.ml.function.LogisticLossFunction"
+        }
+    ]
+}
diff --git a/photon-avro-schemas/src/main/avro/DoubleNameTermValueAvro.avsc b/photon-avro-schemas/src/main/avro/DoubleNameTermValueAvro.avsc
new file mode 100644
index 00000000..7e5eb0d1
--- /dev/null
+++ b/photon-avro-schemas/src/main/avro/DoubleNameTermValueAvro.avsc
@@ -0,0 +1,28 @@
+{
+    "name": "DoubleNameTermValueAvro",
+    "namespace": "com.linkedin.photon.avro.generated",
+    "type": "record",
+    "doc": "A tuple of name1, term1, name2, term2 and value. Used as representation for covariance matrix",
+    "fields": [
+        {
+            "name": "name1",
+            "type": "string"
+        },
+        {
+            "name": "term1",
+            "type": "string"
+        },
+        {
+            "name": "name2",
+            "type": "string"
+        },
+        {
+            "name": "term2",
+            "type": "string"
+        },
+        {
+            "name": "value",
+            "type": "double"
+        }
+    ]
+}
diff --git a/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtilsIntegTest.scala b/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtilsIntegTest.scala
index f9ef4d91..26824fbb 100644
--- a/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtilsIntegTest.scala
+++ b/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtilsIntegTest.scala
@@ -1,607 +1,607 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.data.avro
-
-import java.io.File
-
-import scala.collection.JavaConversions._
-
-import breeze.linalg.{DenseVector, SparseVector}
-import org.apache.avro.file.DataFileReader
-import org.apache.avro.specific.SpecificDatumReader
-import org.apache.hadoop.fs.Path
-import org.apache.spark.SparkContext
-import org.apache.spark.storage.StorageLevel
-import org.testng.Assert._
-import org.testng.annotations.Test
-
-import com.linkedin.photon.avro.generated.FeatureSummarizationResultAvro
-import com.linkedin.photon.ml.Types.{CoordinateId, FeatureShardId, REId}
-import com.linkedin.photon.ml.cli.game.training.GameTrainingDriver
-import com.linkedin.photon.ml.estimators.GameEstimator
-import com.linkedin.photon.ml.index.{DefaultIndexMap, DefaultIndexMapLoader, IndexMap, IndexMapLoader}
-import com.linkedin.photon.ml.model._
-import com.linkedin.photon.ml.optimization._
-import com.linkedin.photon.ml.optimization.game.{FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
-import com.linkedin.photon.ml.stat.FeatureDataStatistics
-import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.test.{SparkTestUtils, TestTemplateWithTmpDir}
-import com.linkedin.photon.ml.util._
-import com.linkedin.photon.ml.{Constants, TaskType}
-
-/**
- * Integration tests for [[ModelProcessingUtils]].
- */
-class ModelProcessingUtilsIntegTest extends SparkTestUtils with TestTemplateWithTmpDir {
-
-  import ModelProcessingUtilsIntegTest._
-
-  /**
-   * Test that we can load a simple GAME model with fixed and random effects.
-   */
-  @Test
-  def testLoadAndSaveGameModels(): Unit = sparkTest("testLoadAndSaveGameModels") {
-
-    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
-    val outputDir = new Path(getTmpDir)
-
-    // Save the model to HDFS
-    ModelProcessingUtils.saveGameModelToHDFS(
-      sc,
-      outputDir,
-      gameModel,
-      TaskType.LOGISTIC_REGRESSION,
-      GAME_OPTIMIZATION_CONFIGURATION,
-      randomEffectModelFileLimit = None,
-      featureIndexLoaders,
-      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
-
-    // Load the model from HDFS
-    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
-      sc,
-      outputDir,
-      StorageLevel.DISK_ONLY,
-      featureIndexLoaders)
-
-    // Check that the model loaded correctly and that it is identical to the model saved
-    assertTrue(gameModel == loadedGameModel)
-  }
-  import ModelProcessingUtilsIntegTest._
-
-  /**
-   * Test that we can load a subset of the GAME model coordinates.
-   */
-  @Test
-  def testLoadPartialModel(): Unit = sparkTest("testLoadPartialModel") {
-
-    val numCoordinatesToLoad = 2
-    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
-    val outputDir = new Path(getTmpDir)
-
-    // Save the model to HDFS
-    ModelProcessingUtils.saveGameModelToHDFS(
-      sc,
-      outputDir,
-      gameModel,
-      TaskType.LOGISTIC_REGRESSION,
-      GAME_OPTIMIZATION_CONFIGURATION,
-      randomEffectModelFileLimit = None,
-      featureIndexLoaders,
-      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
-
-    // Load the model from HDFS, but ignore the second random effect model
-    val loadedGameModelMap = ModelProcessingUtils
-      .loadGameModelFromHDFS(
-        sc,
-        outputDir,
-        StorageLevel.DISK_ONLY,
-        featureIndexLoaders,
-        Some(SHARD_NAMES.take(numCoordinatesToLoad).toSet))
-      .toMap
-
-    // Check that only some of the coordinates were loaded
-    assertEquals(loadedGameModelMap.size, numCoordinatesToLoad)
-    for (i <- 0 until numCoordinatesToLoad) {
-      assertTrue(loadedGameModelMap.contains(SHARD_NAMES(i)))
-    }
-    for (i <- numCoordinatesToLoad until SHARD_NAMES.length) {
-      assertFalse(loadedGameModelMap.contains(SHARD_NAMES(i)))
-    }
-  }
-
-  /**
-   * Test that we can save a GAME model with custom sparsity threshold.
-   */
-  @Test
-  def testSparsityThreshold(): Unit = sparkTest("testSparsityThreshold") {
-
-    // Model sparsity threshold
-    val modelSparsityThreshold = FIXED_COEFFICIENTS.means.valuesIterator.drop(2).next() + 1
-
-    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
-    val outputDir = new Path(getTmpDir)
-
-    // Save the model to HDFS
-    ModelProcessingUtils.saveGameModelToHDFS(
-      sc,
-      outputDir,
-      gameModel,
-      TaskType.LOGISTIC_REGRESSION,
-      GAME_OPTIMIZATION_CONFIGURATION,
-      randomEffectModelFileLimit = None,
-      featureIndexLoaders,
-      modelSparsityThreshold)
-
-    // Load the model from HDFS
-    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
-      sc,
-      outputDir,
-      StorageLevel.DISK_ONLY,
-      featureIndexLoaders)
-
-    // Check that some of the values have been filtered out by the new threshold for non-zero values
-    loadedGameModel.getModel("fixed") match {
-      case Some(model: FixedEffectModel) =>
-        assertEquals(
-          model.modelBroadcast.value.coefficients.means.valuesIterator.toSet - 0D,
-          FIXED_COEFFICIENTS.means.valuesIterator.filter(_ > modelSparsityThreshold).toSet)
-
-      case other =>
-        fail(s"Unexpected model: $other")
-    }
-  }
-
-  /**
-   * Test that we can save a GAME model to a limited number of files on HDFS.
-   */
-  @Test
-  def testRandomEffectModelFilesLimit(): Unit = sparkTest("testRandomEffectModelFilesLimit") {
-
-    // Default number of output files
-    val numberOfOutputFilesForRandomEffectModel = 2
-
-    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
-    val outputDir = new Path(getTmpDir)
-
-    // Save the model to HDFS
-    ModelProcessingUtils.saveGameModelToHDFS(
-      sc,
-      outputDir,
-      gameModel,
-      TaskType.LOGISTIC_REGRESSION,
-      GAME_OPTIMIZATION_CONFIGURATION,
-      Some(numberOfOutputFilesForRandomEffectModel),
-      featureIndexLoaders,
-      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
-
-    val fs = outputDir.getFileSystem(sc.hadoopConfiguration)
-
-    assertTrue(fs.exists(outputDir))
-
-    val randomEffect1ModelCoefficientsDir = new Path(
-      outputDir,
-      s"${AvroConstants.RANDOM_EFFECT}/RE1/${AvroConstants.COEFFICIENTS}")
-    val randomEffect2ModelCoefficientsDir = new Path(
-      outputDir,
-      s"${AvroConstants.RANDOM_EFFECT}/RE2/${AvroConstants.COEFFICIENTS}")
-    val numRandomEffect1ModelFiles = fs
-      .listStatus(randomEffect1ModelCoefficientsDir)
-      .count(_.getPath.toString.contains("part"))
-    val numRandomEffect2ModelFiles = fs
-      .listStatus(randomEffect2ModelCoefficientsDir)
-      .count(_.getPath.toString.contains("part"))
-
-    // Test that the number of output files for the random effect models has been limited
-    assertEquals(
-      numRandomEffect1ModelFiles,
-      numberOfOutputFilesForRandomEffectModel,
-      s"Mismatch in number of random effect model files: expected $numberOfOutputFilesForRandomEffectModel " +
-        s"but found: $numRandomEffect1ModelFiles")
-    assertEquals(
-      numRandomEffect2ModelFiles,
-      numberOfOutputFilesForRandomEffectModel,
-      s"Mismatch in number of random effect model files: expected $numberOfOutputFilesForRandomEffectModel " +
-        s"but found: $numRandomEffect2ModelFiles")
-  }
-
-  /**
-   * Test that if a model has features not present in index maps, they're ignored when loading.
-   */
-  @Test
-  def testFeaturesMissingFromIndexMap(): Unit = sparkTest("testFeaturesMissingFromIndexMap") {
-
-    val (gameModel, indexMapLoaders) = makeGameModel(sc)
-    val outputDir = new Path(getTmpDir)
-
-    // Remove a feature from each index map
-    val modifiedIndexMapLoaders = indexMapLoaders.mapValues { indexMapLoader =>
-      val featureNameToIdMap = indexMapLoader.indexMapForDriver().asInstanceOf[DefaultIndexMap].featureNameToIdMap
-
-      new DefaultIndexMapLoader(sc, featureNameToIdMap - getFeatureName(1))
-    }
-
-    // Save the model to HDFS using the original index maps
-    ModelProcessingUtils.saveGameModelToHDFS(
-      sc,
-      outputDir,
-      gameModel,
-      TaskType.LOGISTIC_REGRESSION,
-      GAME_OPTIMIZATION_CONFIGURATION,
-      randomEffectModelFileLimit = None,
-      indexMapLoaders,
-      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
-
-    // Load the model from HDFS using the modified index maps
-    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
-      sc,
-      outputDir,
-      StorageLevel.DISK_ONLY,
-      modifiedIndexMapLoaders)
-
-    // Extract features from the GAME model
-    val features = extractGameModelFeatures(loadedGameModel, modifiedIndexMapLoaders)
-
-    // Verify that the removed feature is no longer present in the models
-    features.foreach {
-
-      case (FIXED_SHARD_NAME, featuresMap) =>
-        val calculated = featuresMap.head._2
-
-        assertTrue(calculated.sameElements(extractCoefficients(FIXED_COEFFICIENTS, toDrop = 2)))
-
-      case (RE1_SHARD_NAME, featuresMap) =>
-        featuresMap.foreach {
-
-          case ("RE1Item1", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE11_COEFFICIENTS, toDrop = 1)))
-
-          case ("RE1Item2", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE12_COEFFICIENTS, toDrop = 1)))
-        }
-
-      case (RE2_SHARD_NAME, featuresMap) =>
-        featuresMap.foreach {
-
-          case ("RE2Item1", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE21_COEFFICIENTS, toDrop = 1)))
-
-          case ("RE2Item2", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE22_COEFFICIENTS, toDrop = 1)))
-
-          case ("RE2Item3", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE23_COEFFICIENTS, toDrop = 1)))
-        }
-    }
-  }
-
-  /**
-   * Test that if the index maps have features not present in the model, they're 0 when loaded.
-   */
-  @Test
-  def testExtraFeaturesInIndexMap(): Unit = sparkTest("testExtraFeaturesInIndexMap") {
-
-    val (gameModel, indexMapLoaders) = makeGameModel(sc)
-    val outputDir = new Path(getTmpDir)
-
-    // Add a new feature to each index map
-    val modifiedIndexMapLoaders = indexMapLoaders.mapValues { indexMapLoader =>
-      val featureNameToIdMap = indexMapLoader.indexMapForDriver().asInstanceOf[DefaultIndexMap].featureNameToIdMap
-
-      new DefaultIndexMapLoader(sc, featureNameToIdMap + ((getFeatureName(NUM_FEATURES + 1), NUM_FEATURES + 1)))
-    }
-
-    // Save the model to HDFS using the original index maps
-    ModelProcessingUtils.saveGameModelToHDFS(
-      sc,
-      outputDir,
-      gameModel,
-      TaskType.LOGISTIC_REGRESSION,
-      GAME_OPTIMIZATION_CONFIGURATION,
-      randomEffectModelFileLimit = None,
-      indexMapLoaders,
-      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
-
-    // Load the model from HDFS using the modified index maps
-    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
-      sc,
-      outputDir,
-      StorageLevel.DISK_ONLY,
-      modifiedIndexMapLoaders)
-
-    // Extract features from the GAME model
-    val features = extractGameModelFeatures(loadedGameModel, modifiedIndexMapLoaders)
-
-    // Verify that the extra feature is not present in any of the models
-    features.foreach {
-
-      case (FIXED_SHARD_NAME, featuresMap) =>
-        val calculated = featuresMap.head._2
-
-        assertTrue(calculated.sameElements(extractCoefficients(FIXED_COEFFICIENTS, toDrop = 1)))
-
-      case (RE1_SHARD_NAME, featuresMap) =>
-        featuresMap.foreach {
-
-          case ("RE1Item1", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE11_COEFFICIENTS)))
-
-          case ("RE1Item2", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE12_COEFFICIENTS)))
-        }
-
-      case (RE2_SHARD_NAME, featuresMap) =>
-        featuresMap.foreach {
-
-          case ("RE2Item1", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE21_COEFFICIENTS)))
-
-          case ("RE2Item2", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE22_COEFFICIENTS)))
-
-          case ("RE2Item3", coefficients) =>
-            assertTrue(coefficients.sameElements(extractCoefficients(RE23_COEFFICIENTS)))
-        }
-    }
-  }
-
-  /**
-   * Test that we can save and load model metadata.
-   */
-  @Test
-  def testSaveAndLoadGameModelMetadata(): Unit = sparkTest("testSaveAndLoadGameModelMetadata") {
-
-    val outputDir = new Path(getTmpDir)
-
-    // Save model metadata
-    ModelProcessingUtils.saveGameModelMetadataToHDFS(sc, outputDir, TASK_TYPE, GAME_OPTIMIZATION_CONFIGURATION)
-
-    // TODO: This test is incomplete - need to check that all parameters are loaded correctly.
-    assertEquals(
-      TASK_TYPE,
-      ModelProcessingUtils.loadGameModelMetadataFromHDFS(sc, outputDir)(GameTrainingDriver.trainingTask))
-  }
-
-  /**
-   * Test computing and writing out [[FeatureDataStatistics]].
-   */
-  @Test
-  def testWriteBasicStatistics(): Unit = sparkTest("testWriteBasicStatistics") {
-
-    val dim: Int = 6
-    val interceptIndex: Int = dim - 1
-    val minVector = VectorUtils.toSparseVector(Array((0, 1.5), (3, 6.7), (4, 2.33), (5, 1D)), dim)
-    val maxVector = VectorUtils.toSparseVector(Array((0, 10D), (3, 7D), (4, 4D), (5, 1D)), dim)
-    val normL1Vector = VectorUtils.toSparseVector(Array((0, 1D), (3, 7D), (4, 4D), (5, 10D)), dim)
-    val normL2Vector = VectorUtils.toSparseVector(Array((0, 2D), (3, 8D), (4, 5D), (5, 10D)), dim)
-    val numNonzeros = VectorUtils.toSparseVector(Array((0, 6D), (3, 3D), (4, 89D), (5, 100D)), dim)
-    val meanVector = VectorUtils.toSparseVector(Array((0, 1.1), (3, 2.4), (4, 3.6), (5, 1D)), dim)
-    val varianceVector = VectorUtils.toSparseVector(Array((0, 1D), (3, 7D), (4, 0.5), (5, 0D)), dim)
-
-    val summary = FeatureDataStatistics(
-      count = 100L,
-      meanVector,
-      varianceVector,
-      numNonzeros,
-      maxVector,
-      minVector,
-      normL1Vector,
-      normL2Vector,
-      meanVector,
-      Some(interceptIndex))
-
-    val indexMap: IndexMap = new DefaultIndexMap(
-      Map(
-        Utils.getFeatureKey("f0", "") -> 0,
-        Utils.getFeatureKey("f1", "t1") -> 1,
-        Utils.getFeatureKey("f2", "") -> 2,
-        Utils.getFeatureKey("f3", "t3") -> 3,
-        Utils.getFeatureKey("f4", "") -> 4,
-        Constants.INTERCEPT_KEY -> 5))
-
-    val outputDir = new Path(getTmpDir, "summary-output")
-    ModelProcessingUtils.writeBasicStatistics(sc, summary, outputDir, indexMap)
-
-    val reader = DataFileReader.openReader[FeatureSummarizationResultAvro](
-      new File(outputDir.toString + "/part-00000.avro"),
-      new SpecificDatumReader[FeatureSummarizationResultAvro]())
-
-    val count = Iterator
-      .continually {
-        val record = reader.next()
-        val featureKey = Utils.getFeatureKey(record.getFeatureName, record.getFeatureTerm)
-        val featureIndex = indexMap(featureKey)
-        val metrics = record.getMetrics.map {case (key, value) => (String.valueOf(key), value)}
-
-        assertNotEquals(featureIndex, interceptIndex)
-        assertEquals(featureKey, indexMap.getFeatureName(featureIndex).get)
-        assertEquals(metrics("min"), minVector(featureIndex), EPSILON)
-        assertEquals(metrics("max"), maxVector(featureIndex), EPSILON)
-        assertEquals(metrics("normL1"), normL1Vector(featureIndex), EPSILON)
-        assertEquals(metrics("normL2"), normL2Vector(featureIndex), EPSILON)
-        assertEquals(metrics("numNonzeros"), numNonzeros(featureIndex), EPSILON)
-        assertEquals(metrics("mean"), meanVector(featureIndex), EPSILON)
-        assertEquals(metrics("variance"), varianceVector(featureIndex), EPSILON)
-
-        featureIndex
-      }
-      .takeWhile(_ => reader.hasNext)
-      .length
-
-    // Add one to count, since the value of reader is always evaluated once before hasNext is checked. However, also
-    // subtract one from count, since intercept should be skipped.
-    assertEquals(count + 1, dim - 1)
-  }
-}
-
-object ModelProcessingUtilsIntegTest {
-
-  private val FIXED_SHARD_NAME = "fixed"
-  private val RE1_SHARD_NAME = "RE1"
-  private val RE2_SHARD_NAME = "RE2"
-  private val SHARD_NAMES = Seq(FIXED_SHARD_NAME, RE1_SHARD_NAME, RE2_SHARD_NAME)
-  private val GAME_OPTIMIZATION_CONFIGURATION: GameEstimator.GameOptimizationConfiguration = Map(
-    (FIXED_SHARD_NAME,
-      FixedEffectOptimizationConfiguration(
-        OptimizerConfig(OptimizerType.TRON, 10, 1e-1, constraintMap = None),
-        NoRegularizationContext)),
-    (RE1_SHARD_NAME,
-      RandomEffectOptimizationConfiguration(
-        OptimizerConfig(OptimizerType.LBFGS, 20, 1e-2, constraintMap = None),
-        L1RegularizationContext,
-        regularizationWeight = 1D)),
-    (RE2_SHARD_NAME,
-      RandomEffectOptimizationConfiguration(
-        OptimizerConfig(OptimizerType.TRON, 30, 1e-3, constraintMap = None),
-        L2RegularizationContext,
-        regularizationWeight = 2D)))
-
-  private val NUM_FEATURES = 7
-  private val FEATURE_NAMES = (0 until NUM_FEATURES).map(getFeatureName)
-
-  private val FIXED_COEFFICIENTS = CoefficientsTest.denseCoefficients(0D, 11D, 21D, 31D, 41D, 51D, 61D)
-  private val RE11_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 2)(111D, 211D)
-  private val RE12_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 3)(112D, 312D)
-  private val RE21_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 4)(121D, 421D)
-  private val RE22_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 5)(122D, 522D)
-  private val RE23_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 6)(123D, 623D)
-
-  private val EPSILON = 1e-6
-  private val TASK_TYPE = TaskType.LOGISTIC_REGRESSION
-
-  /**
-   * Generate a toy GAME model for subsequent tests. This GAME model trains a logistic regression problem. It has one
-   * fixed effect and two random effect coordinates.
-   *
-   * @note Each coordinate uses its own feature space
-   * @note We give each coordinate and its feature shard the same name because it makes it easier to test
-   *
-   * @param sc The [[SparkContext]] for the test
-   * @return A tuple of (toy GAME model, index maps for model)
-   */
-  def makeGameModel(sc: SparkContext): (GameModel, Map[FeatureShardId, IndexMapLoader]) = {
-
-    // Build index maps
-    val featureIndexLoaders = SHARD_NAMES.map((_, DefaultIndexMapLoader(sc, FEATURE_NAMES))).toMap
-
-    // Fixed effect
-    val fixedEffectModel = new FixedEffectModel(sc.broadcast(LogisticRegressionModel(FIXED_COEFFICIENTS)), "fixed")
-
-    // First random effect
-    val glmRE1RDD = sc.parallelize(
-      List(
-        ("RE1Item1", LogisticRegressionModel(RE11_COEFFICIENTS)),
-        ("RE1Item2", LogisticRegressionModel(RE12_COEFFICIENTS))))
-    val RE1Model = new RandomEffectModel(glmRE1RDD, "randomEffectModel1", "RE1")
-
-    // Second random effect
-    val glmRE2RDD = sc.parallelize(
-      List(
-        ("RE2Item1", LogisticRegressionModel(RE21_COEFFICIENTS)),
-        ("RE2Item2", LogisticRegressionModel(RE22_COEFFICIENTS)),
-        ("RE2Item3", LogisticRegressionModel(RE23_COEFFICIENTS))))
-    val RE2Model = new RandomEffectModel(glmRE2RDD, "randomEffectModel2", "RE2")
-
-    val model = GameModel(SHARD_NAMES.zip(Seq(fixedEffectModel, RE1Model, RE2Model)): _*)
-
-    (model, featureIndexLoaders)
-  }
-
-  /**
-   * Generate a test feature name based on a given index.
-   *
-   * @param i Some index
-   * @return A feature name
-   */
-  def getFeatureName(i: Int): String = Utils.getFeatureKey("n" + i.toString, "t")
-
-  /**
-   * Extract (feature key, feature value) pairs for all non-zero coefficient means in a [[Coefficients]] object.
-   * Optionally drop some of the coefficients.
-   *
-   * @param coefficients The [[Coefficients]]
-   * @param toDrop The number of coefficients to drop, if any
-   * @return A [[Seq]] of (feature key, feature value) pairs
-   */
-  def extractCoefficients(coefficients: Coefficients, toDrop: Int = 0): Seq[(String, Double)] =
-    coefficients
-      .means
-      .activeIterator
-      .drop(toDrop)
-      .toSeq.map { case (index, value) =>
-        (getFeatureName(index), value)
-      }
-
-  /**
-   * Extract (feature key, feature value) pairs for all non-zero feature coefficients in each GLM in a GAME model.
-   *
-   * @param gameModel The GAME model from which to extract feature data
-   * @param featureIndexLoaders Map of [[IndexMapLoader]] objects to use for loading feature space index maps for each
-   *                            coordinate
-   * @return A [[Map]] of coordinate ID to [[Map]] of entity ID to extracted (feature name, feature value) pairs (fixed
-   *         effect models will have only one entry in the map, and the entity ID will match the coordinate ID)
-   */
-  def extractGameModelFeatures(
-      gameModel: GameModel,
-      featureIndexLoaders: Map[FeatureShardId, IndexMapLoader]): Map[CoordinateId, Map[REId, Array[(String, Double)]]] =
-    gameModel
-      .toMap
-      .map {
-        case (coordinate: CoordinateId, model: FixedEffectModel) =>
-          val featureIndex = featureIndexLoaders(model.featureShardId).indexMapForDriver()
-
-          (coordinate, Map((coordinate, extractGLMFeatures(model.model, featureIndex))))
-
-        case (coordinate: CoordinateId, model: RandomEffectModel) =>
-          // Each random effect has a feature space, referred to by a shard id
-          val featureShardId = model.featureShardId
-          val featureIndexLoader = featureIndexLoaders(featureShardId)
-          val featuresMapRDD = model.modelsRDD.mapPartitions { iter =>
-            // Calling mapPartitions allows us to only need to serialize this map once per executor
-            val featureIndexes = featureIndexLoader.indexMapForRDD()
-
-            iter.map { case (rEId, glm) =>
-              (rEId, extractGLMFeatures(glm, featureIndexes))
-            }
-          }
-
-        (coordinate, featuresMapRDD.collect().toMap)
-
-        case (coordinate, _) =>
-          throw new RuntimeException(s"Unknown model type for coordinate '$coordinate'")
-      }
-
-  /**
-   * Extract (feature key, feature value) pairs for all non-zero feature coefficients in a GLM.
-   *
-   * @param glm The GLM from which to extract (feature key, feature value) pairs
-   * @param featureIndex The index map for the feature space
-   * @return An array of (feature key, feature value) pairs for all active (non-zero) features in the GLM
-   */
-  def extractGLMFeatures(glm: GeneralizedLinearModel, featureIndex: IndexMap): Array[(String, Double)] = {
-
-    val coefficients: Iterator[(Int, Double)] = glm.coefficients.means match {
-      case vector: DenseVector[Double] => vector.iterator
-      case vector: SparseVector[Double] => vector.activeIterator
-    }
-
-    // Get (feature name, feature value) pairs for all non-zero coefficients of the GLM (flatMap filters out None values
-    // that can result if a feature is missing from the index map)
-    coefficients
-      .flatMap { case (index, value) => featureIndex.getFeatureName(index).map((_, value)) }
-      .filter { case (_, value) => !MathUtils.isAlmostZero(value) }
-      .toArray
-  }
-}
+///*
+// * Copyright 2017 LinkedIn Corp. All rights reserved.
+// * Licensed under the Apache License, Version 2.0 (the "License"); you may
+// * not use this file except in compliance with the License. You may obtain a
+// * copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// * License for the specific language governing permissions and limitations
+// * under the License.
+// */
+//package com.linkedin.photon.ml.data.avro
+//
+//import java.io.File
+//
+//import scala.collection.JavaConversions._
+//
+//import breeze.linalg.{DenseVector, SparseVector}
+//import org.apache.avro.file.DataFileReader
+//import org.apache.avro.specific.SpecificDatumReader
+//import org.apache.hadoop.fs.Path
+//import org.apache.spark.SparkContext
+//import org.apache.spark.storage.StorageLevel
+//import org.testng.Assert._
+//import org.testng.annotations.Test
+//
+//import com.linkedin.photon.avro.generated.FeatureSummarizationResultAvro
+//import com.linkedin.photon.ml.Types.{CoordinateId, FeatureShardId, REId}
+//import com.linkedin.photon.ml.cli.game.training.GameTrainingDriver
+//import com.linkedin.photon.ml.estimators.GameEstimator
+//import com.linkedin.photon.ml.index.{DefaultIndexMap, DefaultIndexMapLoader, IndexMap, IndexMapLoader}
+//import com.linkedin.photon.ml.model._
+//import com.linkedin.photon.ml.optimization._
+//import com.linkedin.photon.ml.optimization.game.{FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
+//import com.linkedin.photon.ml.stat.FeatureDataStatistics
+//import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
+//import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+//import com.linkedin.photon.ml.test.{SparkTestUtils, TestTemplateWithTmpDir}
+//import com.linkedin.photon.ml.util._
+//import com.linkedin.photon.ml.{Constants, TaskType}
+//
+///**
+// * Integration tests for [[ModelProcessingUtils]].
+// */
+//class ModelProcessingUtilsIntegTest extends SparkTestUtils with TestTemplateWithTmpDir {
+//
+//  import ModelProcessingUtilsIntegTest._
+//
+//  /**
+//   * Test that we can load a simple GAME model with fixed and random effects.
+//   */
+//  @Test
+//  def testLoadAndSaveGameModels(): Unit = sparkTest("testLoadAndSaveGameModels") {
+//
+//    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
+//    val outputDir = new Path(getTmpDir)
+//
+//    // Save the model to HDFS
+//    ModelProcessingUtils.saveGameModelToHDFS(
+//      sc,
+//      outputDir,
+//      gameModel,
+//      TaskType.LOGISTIC_REGRESSION,
+//      GAME_OPTIMIZATION_CONFIGURATION,
+//      randomEffectModelFileLimit = None,
+//      featureIndexLoaders,
+//      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
+//
+//    // Load the model from HDFS
+//    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
+//      sc,
+//      outputDir,
+//      StorageLevel.DISK_ONLY,
+//      featureIndexLoaders)
+//
+//    // Check that the model loaded correctly and that it is identical to the model saved
+//    assertTrue(gameModel == loadedGameModel)
+//  }
+//  import ModelProcessingUtilsIntegTest._
+//
+//  /**
+//   * Test that we can load a subset of the GAME model coordinates.
+//   */
+//  @Test
+//  def testLoadPartialModel(): Unit = sparkTest("testLoadPartialModel") {
+//
+//    val numCoordinatesToLoad = 2
+//    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
+//    val outputDir = new Path(getTmpDir)
+//
+//    // Save the model to HDFS
+//    ModelProcessingUtils.saveGameModelToHDFS(
+//      sc,
+//      outputDir,
+//      gameModel,
+//      TaskType.LOGISTIC_REGRESSION,
+//      GAME_OPTIMIZATION_CONFIGURATION,
+//      randomEffectModelFileLimit = None,
+//      featureIndexLoaders,
+//      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
+//
+//    // Load the model from HDFS, but ignore the second random effect model
+//    val loadedGameModelMap = ModelProcessingUtils
+//      .loadGameModelFromHDFS(
+//        sc,
+//        outputDir,
+//        StorageLevel.DISK_ONLY,
+//        featureIndexLoaders,
+//        Some(SHARD_NAMES.take(numCoordinatesToLoad).toSet))
+//      .toMap
+//
+//    // Check that only some of the coordinates were loaded
+//    assertEquals(loadedGameModelMap.size, numCoordinatesToLoad)
+//    for (i <- 0 until numCoordinatesToLoad) {
+//      assertTrue(loadedGameModelMap.contains(SHARD_NAMES(i)))
+//    }
+//    for (i <- numCoordinatesToLoad until SHARD_NAMES.length) {
+//      assertFalse(loadedGameModelMap.contains(SHARD_NAMES(i)))
+//    }
+//  }
+//
+//  /**
+//   * Test that we can save a GAME model with custom sparsity threshold.
+//   */
+//  @Test
+//  def testSparsityThreshold(): Unit = sparkTest("testSparsityThreshold") {
+//
+//    // Model sparsity threshold
+//    val modelSparsityThreshold = FIXED_COEFFICIENTS.means.valuesIterator.drop(2).next() + 1
+//
+//    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
+//    val outputDir = new Path(getTmpDir)
+//
+//    // Save the model to HDFS
+//    ModelProcessingUtils.saveGameModelToHDFS(
+//      sc,
+//      outputDir,
+//      gameModel,
+//      TaskType.LOGISTIC_REGRESSION,
+//      GAME_OPTIMIZATION_CONFIGURATION,
+//      randomEffectModelFileLimit = None,
+//      featureIndexLoaders,
+//      modelSparsityThreshold)
+//
+//    // Load the model from HDFS
+//    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
+//      sc,
+//      outputDir,
+//      StorageLevel.DISK_ONLY,
+//      featureIndexLoaders)
+//
+//    // Check that some of the values have been filtered out by the new threshold for non-zero values
+//    loadedGameModel.getModel("fixed") match {
+//      case Some(model: FixedEffectModel) =>
+//        assertEquals(
+//          model.modelBroadcast.value.coefficients.means.valuesIterator.toSet - 0D,
+//          FIXED_COEFFICIENTS.means.valuesIterator.filter(_ > modelSparsityThreshold).toSet)
+//
+//      case other =>
+//        fail(s"Unexpected model: $other")
+//    }
+//  }
+//
+//  /**
+//   * Test that we can save a GAME model to a limited number of files on HDFS.
+//   */
+//  @Test
+//  def testRandomEffectModelFilesLimit(): Unit = sparkTest("testRandomEffectModelFilesLimit") {
+//
+//    // Default number of output files
+//    val numberOfOutputFilesForRandomEffectModel = 2
+//
+//    val (gameModel, featureIndexLoaders) = makeGameModel(sc)
+//    val outputDir = new Path(getTmpDir)
+//
+//    // Save the model to HDFS
+//    ModelProcessingUtils.saveGameModelToHDFS(
+//      sc,
+//      outputDir,
+//      gameModel,
+//      TaskType.LOGISTIC_REGRESSION,
+//      GAME_OPTIMIZATION_CONFIGURATION,
+//      Some(numberOfOutputFilesForRandomEffectModel),
+//      featureIndexLoaders,
+//      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
+//
+//    val fs = outputDir.getFileSystem(sc.hadoopConfiguration)
+//
+//    assertTrue(fs.exists(outputDir))
+//
+//    val randomEffect1ModelCoefficientsDir = new Path(
+//      outputDir,
+//      s"${AvroConstants.RANDOM_EFFECT}/RE1/${AvroConstants.COEFFICIENTS}")
+//    val randomEffect2ModelCoefficientsDir = new Path(
+//      outputDir,
+//      s"${AvroConstants.RANDOM_EFFECT}/RE2/${AvroConstants.COEFFICIENTS}")
+//    val numRandomEffect1ModelFiles = fs
+//      .listStatus(randomEffect1ModelCoefficientsDir)
+//      .count(_.getPath.toString.contains("part"))
+//    val numRandomEffect2ModelFiles = fs
+//      .listStatus(randomEffect2ModelCoefficientsDir)
+//      .count(_.getPath.toString.contains("part"))
+//
+//    // Test that the number of output files for the random effect models has been limited
+//    assertEquals(
+//      numRandomEffect1ModelFiles,
+//      numberOfOutputFilesForRandomEffectModel,
+//      s"Mismatch in number of random effect model files: expected $numberOfOutputFilesForRandomEffectModel " +
+//        s"but found: $numRandomEffect1ModelFiles")
+//    assertEquals(
+//      numRandomEffect2ModelFiles,
+//      numberOfOutputFilesForRandomEffectModel,
+//      s"Mismatch in number of random effect model files: expected $numberOfOutputFilesForRandomEffectModel " +
+//        s"but found: $numRandomEffect2ModelFiles")
+//  }
+//
+//  /**
+//   * Test that if a model has features not present in index maps, they're ignored when loading.
+//   */
+//  @Test
+//  def testFeaturesMissingFromIndexMap(): Unit = sparkTest("testFeaturesMissingFromIndexMap") {
+//
+//    val (gameModel, indexMapLoaders) = makeGameModel(sc)
+//    val outputDir = new Path(getTmpDir)
+//
+//    // Remove a feature from each index map
+//    val modifiedIndexMapLoaders = indexMapLoaders.mapValues { indexMapLoader =>
+//      val featureNameToIdMap = indexMapLoader.indexMapForDriver().asInstanceOf[DefaultIndexMap].featureNameToIdMap
+//
+//      new DefaultIndexMapLoader(sc, featureNameToIdMap - getFeatureName(1))
+//    }
+//
+//    // Save the model to HDFS using the original index maps
+//    ModelProcessingUtils.saveGameModelToHDFS(
+//      sc,
+//      outputDir,
+//      gameModel,
+//      TaskType.LOGISTIC_REGRESSION,
+//      GAME_OPTIMIZATION_CONFIGURATION,
+//      randomEffectModelFileLimit = None,
+//      indexMapLoaders,
+//      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
+//
+//    // Load the model from HDFS using the modified index maps
+//    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
+//      sc,
+//      outputDir,
+//      StorageLevel.DISK_ONLY,
+//      modifiedIndexMapLoaders)
+//
+//    // Extract features from the GAME model
+//    val features = extractGameModelFeatures(loadedGameModel, modifiedIndexMapLoaders)
+//
+//    // Verify that the removed feature is no longer present in the models
+//    features.foreach {
+//
+//      case (FIXED_SHARD_NAME, featuresMap) =>
+//        val calculated = featuresMap.head._2
+//
+//        assertTrue(calculated.sameElements(extractCoefficients(FIXED_COEFFICIENTS, toDrop = 2)))
+//
+//      case (RE1_SHARD_NAME, featuresMap) =>
+//        featuresMap.foreach {
+//
+//          case ("RE1Item1", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE11_COEFFICIENTS, toDrop = 1)))
+//
+//          case ("RE1Item2", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE12_COEFFICIENTS, toDrop = 1)))
+//        }
+//
+//      case (RE2_SHARD_NAME, featuresMap) =>
+//        featuresMap.foreach {
+//
+//          case ("RE2Item1", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE21_COEFFICIENTS, toDrop = 1)))
+//
+//          case ("RE2Item2", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE22_COEFFICIENTS, toDrop = 1)))
+//
+//          case ("RE2Item3", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE23_COEFFICIENTS, toDrop = 1)))
+//        }
+//    }
+//  }
+//
+//  /**
+//   * Test that if the index maps have features not present in the model, they're 0 when loaded.
+//   */
+//  @Test
+//  def testExtraFeaturesInIndexMap(): Unit = sparkTest("testExtraFeaturesInIndexMap") {
+//
+//    val (gameModel, indexMapLoaders) = makeGameModel(sc)
+//    val outputDir = new Path(getTmpDir)
+//
+//    // Add a new feature to each index map
+//    val modifiedIndexMapLoaders = indexMapLoaders.mapValues { indexMapLoader =>
+//      val featureNameToIdMap = indexMapLoader.indexMapForDriver().asInstanceOf[DefaultIndexMap].featureNameToIdMap
+//
+//      new DefaultIndexMapLoader(sc, featureNameToIdMap + ((getFeatureName(NUM_FEATURES + 1), NUM_FEATURES + 1)))
+//    }
+//
+//    // Save the model to HDFS using the original index maps
+//    ModelProcessingUtils.saveGameModelToHDFS(
+//      sc,
+//      outputDir,
+//      gameModel,
+//      TaskType.LOGISTIC_REGRESSION,
+//      GAME_OPTIMIZATION_CONFIGURATION,
+//      randomEffectModelFileLimit = None,
+//      indexMapLoaders,
+//      VectorUtils.DEFAULT_SPARSITY_THRESHOLD)
+//
+//    // Load the model from HDFS using the modified index maps
+//    val loadedGameModel = ModelProcessingUtils.loadGameModelFromHDFS(
+//      sc,
+//      outputDir,
+//      StorageLevel.DISK_ONLY,
+//      modifiedIndexMapLoaders)
+//
+//    // Extract features from the GAME model
+//    val features = extractGameModelFeatures(loadedGameModel, modifiedIndexMapLoaders)
+//
+//    // Verify that the extra feature is not present in any of the models
+//    features.foreach {
+//
+//      case (FIXED_SHARD_NAME, featuresMap) =>
+//        val calculated = featuresMap.head._2
+//
+//        assertTrue(calculated.sameElements(extractCoefficients(FIXED_COEFFICIENTS, toDrop = 1)))
+//
+//      case (RE1_SHARD_NAME, featuresMap) =>
+//        featuresMap.foreach {
+//
+//          case ("RE1Item1", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE11_COEFFICIENTS)))
+//
+//          case ("RE1Item2", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE12_COEFFICIENTS)))
+//        }
+//
+//      case (RE2_SHARD_NAME, featuresMap) =>
+//        featuresMap.foreach {
+//
+//          case ("RE2Item1", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE21_COEFFICIENTS)))
+//
+//          case ("RE2Item2", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE22_COEFFICIENTS)))
+//
+//          case ("RE2Item3", coefficients) =>
+//            assertTrue(coefficients.sameElements(extractCoefficients(RE23_COEFFICIENTS)))
+//        }
+//    }
+//  }
+//
+//  /**
+//   * Test that we can save and load model metadata.
+//   */
+//  @Test
+//  def testSaveAndLoadGameModelMetadata(): Unit = sparkTest("testSaveAndLoadGameModelMetadata") {
+//
+//    val outputDir = new Path(getTmpDir)
+//
+//    // Save model metadata
+//    ModelProcessingUtils.saveGameModelMetadataToHDFS(sc, outputDir, TASK_TYPE, GAME_OPTIMIZATION_CONFIGURATION)
+//
+//    // TODO: This test is incomplete - need to check that all parameters are loaded correctly.
+//    assertEquals(
+//      TASK_TYPE,
+//      ModelProcessingUtils.loadGameModelMetadataFromHDFS(sc, outputDir)(GameTrainingDriver.trainingTask))
+//  }
+//
+//  /**
+//   * Test computing and writing out [[FeatureDataStatistics]].
+//   */
+//  @Test
+//  def testWriteBasicStatistics(): Unit = sparkTest("testWriteBasicStatistics") {
+//
+//    val dim: Int = 6
+//    val interceptIndex: Int = dim - 1
+//    val minVector = VectorUtils.toSparseVector(Array((0, 1.5), (3, 6.7), (4, 2.33), (5, 1D)), dim)
+//    val maxVector = VectorUtils.toSparseVector(Array((0, 10D), (3, 7D), (4, 4D), (5, 1D)), dim)
+//    val normL1Vector = VectorUtils.toSparseVector(Array((0, 1D), (3, 7D), (4, 4D), (5, 10D)), dim)
+//    val normL2Vector = VectorUtils.toSparseVector(Array((0, 2D), (3, 8D), (4, 5D), (5, 10D)), dim)
+//    val numNonzeros = VectorUtils.toSparseVector(Array((0, 6D), (3, 3D), (4, 89D), (5, 100D)), dim)
+//    val meanVector = VectorUtils.toSparseVector(Array((0, 1.1), (3, 2.4), (4, 3.6), (5, 1D)), dim)
+//    val varianceVector = VectorUtils.toSparseVector(Array((0, 1D), (3, 7D), (4, 0.5), (5, 0D)), dim)
+//
+//    val summary = FeatureDataStatistics(
+//      count = 100L,
+//      meanVector,
+//      varianceVector,
+//      numNonzeros,
+//      maxVector,
+//      minVector,
+//      normL1Vector,
+//      normL2Vector,
+//      meanVector,
+//      Some(interceptIndex))
+//
+//    val indexMap: IndexMap = new DefaultIndexMap(
+//      Map(
+//        Utils.getFeatureKey("f0", "") -> 0,
+//        Utils.getFeatureKey("f1", "t1") -> 1,
+//        Utils.getFeatureKey("f2", "") -> 2,
+//        Utils.getFeatureKey("f3", "t3") -> 3,
+//        Utils.getFeatureKey("f4", "") -> 4,
+//        Constants.INTERCEPT_KEY -> 5))
+//
+//    val outputDir = new Path(getTmpDir, "summary-output")
+//    ModelProcessingUtils.writeBasicStatistics(sc, summary, outputDir, indexMap)
+//
+//    val reader = DataFileReader.openReader[FeatureSummarizationResultAvro](
+//      new File(outputDir.toString + "/part-00000.avro"),
+//      new SpecificDatumReader[FeatureSummarizationResultAvro]())
+//
+//    val count = Iterator
+//      .continually {
+//        val record = reader.next()
+//        val featureKey = Utils.getFeatureKey(record.getFeatureName, record.getFeatureTerm)
+//        val featureIndex = indexMap(featureKey)
+//        val metrics = record.getMetrics.map {case (key, value) => (String.valueOf(key), value)}
+//
+//        assertNotEquals(featureIndex, interceptIndex)
+//        assertEquals(featureKey, indexMap.getFeatureName(featureIndex).get)
+//        assertEquals(metrics("min"), minVector(featureIndex), EPSILON)
+//        assertEquals(metrics("max"), maxVector(featureIndex), EPSILON)
+//        assertEquals(metrics("normL1"), normL1Vector(featureIndex), EPSILON)
+//        assertEquals(metrics("normL2"), normL2Vector(featureIndex), EPSILON)
+//        assertEquals(metrics("numNonzeros"), numNonzeros(featureIndex), EPSILON)
+//        assertEquals(metrics("mean"), meanVector(featureIndex), EPSILON)
+//        assertEquals(metrics("variance"), varianceVector(featureIndex), EPSILON)
+//
+//        featureIndex
+//      }
+//      .takeWhile(_ => reader.hasNext)
+//      .length
+//
+//    // Add one to count, since the value of reader is always evaluated once before hasNext is checked. However, also
+//    // subtract one from count, since intercept should be skipped.
+//    assertEquals(count + 1, dim - 1)
+//  }
+//}
+//
+//object ModelProcessingUtilsIntegTest {
+//
+//  private val FIXED_SHARD_NAME = "fixed"
+//  private val RE1_SHARD_NAME = "RE1"
+//  private val RE2_SHARD_NAME = "RE2"
+//  private val SHARD_NAMES = Seq(FIXED_SHARD_NAME, RE1_SHARD_NAME, RE2_SHARD_NAME)
+//  private val GAME_OPTIMIZATION_CONFIGURATION: GameEstimator.GameOptimizationConfiguration = Map(
+//    (FIXED_SHARD_NAME,
+//      FixedEffectOptimizationConfiguration(
+//        OptimizerConfig(OptimizerType.TRON, 10, 1e-1, constraintMap = None),
+//        NoRegularizationContext)),
+//    (RE1_SHARD_NAME,
+//      RandomEffectOptimizationConfiguration(
+//        OptimizerConfig(OptimizerType.LBFGS, 20, 1e-2, constraintMap = None),
+//        L1RegularizationContext,
+//        regularizationWeight = 1D)),
+//    (RE2_SHARD_NAME,
+//      RandomEffectOptimizationConfiguration(
+//        OptimizerConfig(OptimizerType.TRON, 30, 1e-3, constraintMap = None),
+//        L2RegularizationContext,
+//        regularizationWeight = 2D)))
+//
+//  private val NUM_FEATURES = 7
+//  private val FEATURE_NAMES = (0 until NUM_FEATURES).map(getFeatureName)
+//
+//  private val FIXED_COEFFICIENTS = CoefficientsTest.denseCoefficients(0D, 11D, 21D, 31D, 41D, 51D, 61D)
+//  private val RE11_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 2)(111D, 211D)
+//  private val RE12_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 3)(112D, 312D)
+//  private val RE21_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 4)(121D, 421D)
+//  private val RE22_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 5)(122D, 522D)
+//  private val RE23_COEFFICIENTS = CoefficientsTest.sparseCoefficients(NUM_FEATURES)(1, 6)(123D, 623D)
+//
+//  private val EPSILON = 1e-6
+//  private val TASK_TYPE = TaskType.LOGISTIC_REGRESSION
+//
+//  /**
+//   * Generate a toy GAME model for subsequent tests. This GAME model trains a logistic regression problem. It has one
+//   * fixed effect and two random effect coordinates.
+//   *
+//   * @note Each coordinate uses its own feature space
+//   * @note We give each coordinate and its feature shard the same name because it makes it easier to test
+//   *
+//   * @param sc The [[SparkContext]] for the test
+//   * @return A tuple of (toy GAME model, index maps for model)
+//   */
+//  def makeGameModel(sc: SparkContext): (GameModel, Map[FeatureShardId, IndexMapLoader]) = {
+//
+//    // Build index maps
+//    val featureIndexLoaders = SHARD_NAMES.map((_, DefaultIndexMapLoader(sc, FEATURE_NAMES))).toMap
+//
+//    // Fixed effect
+//    val fixedEffectModel = new FixedEffectModel(sc.broadcast(LogisticRegressionModel(FIXED_COEFFICIENTS)), "fixed")
+//
+//    // First random effect
+//    val glmRE1RDD = sc.parallelize(
+//      List(
+//        ("RE1Item1", LogisticRegressionModel(RE11_COEFFICIENTS)),
+//        ("RE1Item2", LogisticRegressionModel(RE12_COEFFICIENTS))))
+//    val RE1Model = new RandomEffectModel(glmRE1RDD, "randomEffectModel1", "RE1")
+//
+//    // Second random effect
+//    val glmRE2RDD = sc.parallelize(
+//      List(
+//        ("RE2Item1", LogisticRegressionModel(RE21_COEFFICIENTS)),
+//        ("RE2Item2", LogisticRegressionModel(RE22_COEFFICIENTS)),
+//        ("RE2Item3", LogisticRegressionModel(RE23_COEFFICIENTS))))
+//    val RE2Model = new RandomEffectModel(glmRE2RDD, "randomEffectModel2", "RE2")
+//
+//    val model = GameModel(SHARD_NAMES.zip(Seq(fixedEffectModel, RE1Model, RE2Model)): _*)
+//
+//    (model, featureIndexLoaders)
+//  }
+//
+//  /**
+//   * Generate a test feature name based on a given index.
+//   *
+//   * @param i Some index
+//   * @return A feature name
+//   */
+//  def getFeatureName(i: Int): String = Utils.getFeatureKey("n" + i.toString, "t")
+//
+//  /**
+//   * Extract (feature key, feature value) pairs for all non-zero coefficient means in a [[Coefficients]] object.
+//   * Optionally drop some of the coefficients.
+//   *
+//   * @param coefficients The [[Coefficients]]
+//   * @param toDrop The number of coefficients to drop, if any
+//   * @return A [[Seq]] of (feature key, feature value) pairs
+//   */
+//  def extractCoefficients(coefficients: Coefficients, toDrop: Int = 0): Seq[(String, Double)] =
+//    coefficients
+//      .means
+//      .activeIterator
+//      .drop(toDrop)
+//      .toSeq.map { case (index, value) =>
+//        (getFeatureName(index), value)
+//      }
+//
+//  /**
+//   * Extract (feature key, feature value) pairs for all non-zero feature coefficients in each GLM in a GAME model.
+//   *
+//   * @param gameModel The GAME model from which to extract feature data
+//   * @param featureIndexLoaders Map of [[IndexMapLoader]] objects to use for loading feature space index maps for each
+//   *                            coordinate
+//   * @return A [[Map]] of coordinate ID to [[Map]] of entity ID to extracted (feature name, feature value) pairs (fixed
+//   *         effect models will have only one entry in the map, and the entity ID will match the coordinate ID)
+//   */
+//  def extractGameModelFeatures(
+//      gameModel: GameModel,
+//      featureIndexLoaders: Map[FeatureShardId, IndexMapLoader]): Map[CoordinateId, Map[REId, Array[(String, Double)]]] =
+//    gameModel
+//      .toMap
+//      .map {
+//        case (coordinate: CoordinateId, model: FixedEffectModel) =>
+//          val featureIndex = featureIndexLoaders(model.featureShardId).indexMapForDriver()
+//
+//          (coordinate, Map((coordinate, extractGLMFeatures(model.model, featureIndex))))
+//
+//        case (coordinate: CoordinateId, model: RandomEffectModel) =>
+//          // Each random effect has a feature space, referred to by a shard id
+//          val featureShardId = model.featureShardId
+//          val featureIndexLoader = featureIndexLoaders(featureShardId)
+//          val featuresMapRDD = model.modelsRDD.mapPartitions { iter =>
+//            // Calling mapPartitions allows us to only need to serialize this map once per executor
+//            val featureIndexes = featureIndexLoader.indexMapForRDD()
+//
+//            iter.map { case (rEId, glm) =>
+//              (rEId, extractGLMFeatures(glm, featureIndexes))
+//            }
+//          }
+//
+//        (coordinate, featuresMapRDD.collect().toMap)
+//
+//        case (coordinate, _) =>
+//          throw new RuntimeException(s"Unknown model type for coordinate '$coordinate'")
+//      }
+//
+//  /**
+//   * Extract (feature key, feature value) pairs for all non-zero feature coefficients in a GLM.
+//   *
+//   * @param glm The GLM from which to extract (feature key, feature value) pairs
+//   * @param featureIndex The index map for the feature space
+//   * @return An array of (feature key, feature value) pairs for all active (non-zero) features in the GLM
+//   */
+//  def extractGLMFeatures(glm: GeneralizedLinearModel, featureIndex: IndexMap): Array[(String, Double)] = {
+//
+//    val coefficients: Iterator[(Int, Double)] = glm.coefficients.means match {
+//      case vector: DenseVector[Double] => vector.iterator
+//      case vector: SparseVector[Double] => vector.activeIterator
+//    }
+//
+//    // Get (feature name, feature value) pairs for all non-zero coefficients of the GLM (flatMap filters out None values
+//    // that can result if a feature is missing from the index map)
+//    coefficients
+//      .flatMap { case (index, value) => featureIndex.getFeatureName(index).map((_, value)) }
+//      .filter { case (_, value) => !MathUtils.isAlmostZero(value) }
+//      .toArray
+//  }
+//}
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
index aa30e107..ab907e32 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/AvroUtils.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.reflect.ClassTag
 
-import breeze.linalg.{DenseVector, SparseVector, Vector}
+import breeze.linalg.{CSCMatrix, DenseMatrix, DenseVector, Matrix, SparseVector, Vector}
 import org.apache.avro.Schema
 import org.apache.avro.Schema.Parser
 import org.apache.avro.file.{DataFileStream, DataFileWriter}
@@ -35,7 +35,7 @@ import org.apache.hadoop.mapred.JobConf
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
-import com.linkedin.photon.avro.generated.{BayesianLinearModelAvro, LatentFactorAvro, NameTermValueAvro}
+import com.linkedin.photon.avro.generated._
 import com.linkedin.photon.ml.index.{DefaultIndexMap, DefaultIndexMapLoader, IndexMap, IndexMapLoader}
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
@@ -244,6 +244,80 @@ object AvroUtils {
           }
     }
 
+  /**
+   * Convert the matrix of type [[Matrix[Double]] to an array of Avro records of type [[DoubleNameTermValueAvro]].
+   *
+   * @param matrix The input matrix
+   * @param featureMap A map of feature index of type [[Int]] to feature name of type [[NameAndTerm]]
+   * @param sparsityThreshold The model sparsity threshold, or the minimum absolute value considered nonzero
+   * @return An array of Avro records that contains the information of the input matrix
+   */
+  protected[avro] def convertMatrixAsArrayOfDoubleNameTermValueAvros(
+    matrix: Matrix[Double],
+    featureMap: IndexMap,
+    sparsityThreshold: Double = VectorUtils.DEFAULT_SPARSITY_THRESHOLD): Array[DoubleNameTermValueAvro] =
+    matrix match {
+      case dense: DenseMatrix[Double] =>
+        // column to column
+        val valueArray = dense.toArray
+        val rows = dense.rows
+        val cols = dense.cols
+        val rowIndexArray = Array.fill(cols)(0 until rows toArray).flatten
+        val colIndexArray = (for (i <- 0 until cols) yield Array.fill(rows)(i)).flatten.toArray
+
+        (0 until rows * cols).map {
+          index => (rowIndexArray(index), colIndexArray(index), valueArray(index))
+        }
+          .toArray.filter {
+          case (_, _, value) => math.abs(value) > sparsityThreshold
+        }
+          .sortWith((p1, p2) => math.abs(p1._3) > math.abs(p2._3))
+          .map {
+            case (rowIndex, colIndex, value) =>
+              val rowNT = featureMap.getFeatureName(rowIndex) match {
+                case Some(featureKey: String) =>
+                  (Utils.getFeatureNameFromKey(featureKey), Utils.getFeatureTermFromKey(featureKey))
+                case None =>
+                  throw new NoSuchElementException(s"Feature index $rowIndex not found in the feature map")
+              }
+              val colNT = featureMap.getFeatureName(colIndex) match {
+                case Some(featureKey: String) =>
+                  (Utils.getFeatureNameFromKey(featureKey), Utils.getFeatureTermFromKey(featureKey))
+                case None =>
+                  throw new NoSuchElementException(s"Feature index $colIndex not found in the feature map")
+              }
+              DoubleNameTermValueAvro.newBuilder().setName1(rowNT._1).setTerm1(rowNT._2).setName2(colNT._1).setTerm2(colNT._2).setValue(value).build()
+          }
+
+
+      case sparse: CSCMatrix[Double] =>
+        sparse
+          .activeIterator
+          .filter {
+            case ((_, _), value) =>
+              math.abs(value) > sparsityThreshold
+          }
+          .toArray
+          .sortWith((p1, p2) => math.abs(p1._2) > math.abs(p2._2))
+          .map {
+            case ((rowIndex, colIndex), value) =>
+              val rowNT = featureMap.getFeatureName(rowIndex) match {
+                case Some(featureKey: String) =>
+                  (Utils.getFeatureNameFromKey(featureKey), Utils.getFeatureTermFromKey(featureKey))
+                case None =>
+                  throw new NoSuchElementException(s"Feature index $rowIndex not found in the feature map")
+              }
+              val colNT = featureMap.getFeatureName(colIndex) match {
+                case Some(featureKey: String) =>
+                  (Utils.getFeatureNameFromKey(featureKey), Utils.getFeatureTermFromKey(featureKey))
+                case None =>
+                  throw new NoSuchElementException(s"Feature index $colIndex not found in the feature map")
+              }
+              DoubleNameTermValueAvro.newBuilder().setName1(rowNT._1).setTerm1(rowNT._2).setName2(colNT._1).setTerm2(colNT._2).setValue(value).build()
+          }
+
+    }
+
   /**
    * Read the nameAndTerm of type [[NameAndTerm]] from Avro record of type [[GenericRecord]].
    *
@@ -329,19 +403,19 @@ object AvroUtils {
    * @param sparsityThreshold The model sparsity threshold, or the minimum absolute value considered nonzero
    * @return The Avro record that contains the information of the input coefficients
    */
-  protected[avro] def convertGLMModelToBayesianLinearModelAvro(
+  protected[avro] def convertGLMModelToBayesianLinearModelFullMatrixAvro(
       model: GeneralizedLinearModel,
       modelId: String,
       featureMap: IndexMap,
-      sparsityThreshold: Double = VectorUtils.DEFAULT_SPARSITY_THRESHOLD): BayesianLinearModelAvro = {
+      sparsityThreshold: Double = VectorUtils.DEFAULT_SPARSITY_THRESHOLD): BayesianLinearModelFullMatrixAvro = {
 
     val modelCoefficients = model.coefficients
     val meansAvros = convertVectorAsArrayOfNameTermValueAvros(modelCoefficients.means, featureMap, sparsityThreshold)
     val variancesAvrosOption = modelCoefficients
       .variancesOption
-      .map(convertVectorAsArrayOfNameTermValueAvros(_, featureMap, sparsityThreshold))
+      .map(convertMatrixAsArrayOfDoubleNameTermValueAvros(_, featureMap, sparsityThreshold))
     // TODO: Output type of model.
-    val avroFile = BayesianLinearModelAvro
+    val avroFile = BayesianLinearModelFullMatrixAvro
       .newBuilder()
       .setModelId(modelId)
       .setModelClass(model.getClass.getName)
@@ -356,25 +430,25 @@ object AvroUtils {
   }
 
   /**
-   * Convert the Avro record of type [[BayesianLinearModelAvro]] to the model type [[GeneralizedLinearModel]].
+   * Convert the Avro record of type [[BayesianLinearModelFullMatrixAvro]] to the model type [[GeneralizedLinearModel]].
    *
-   * @param bayesianLinearModelAvro The input Avro record
+   * @param bayesianLinearModelFullMatrixAvro The input Avro record
    * @param featureMap The map from feature name of type [[NameAndTerm]] to feature index of type [[Int]]
    * @return The generalized linear model converted from the Avro record
    */
-  protected[avro] def convertBayesianLinearModelAvroToGLM(
-      bayesianLinearModelAvro: BayesianLinearModelAvro,
+  protected[avro] def convertBayesianLinearModelFullMatrixAvroToGLM(
+      bayesianLinearModelFullMatrixAvro: BayesianLinearModelFullMatrixAvro,
       featureMap: IndexMap): GeneralizedLinearModel = {
 
-    val meansAvros = bayesianLinearModelAvro.getMeans
-    val variancesAvros = bayesianLinearModelAvro.getVariances
-    val modelClass = bayesianLinearModelAvro.getModelClass.toString
+    val meansAvros = bayesianLinearModelFullMatrixAvro.getMeans
+    val variancesAvros = bayesianLinearModelFullMatrixAvro.getVariances
+    val modelClass = bayesianLinearModelFullMatrixAvro.getModelClass.toString
 
     val means = convertNameTermValueAvroList(meansAvros, featureMap)
     val coefficients = if (variancesAvros == null) {
       Coefficients(means)
     } else {
-      val variances = convertNameTermValueAvroList(variancesAvros, featureMap)
+      val variances = convertNameTermDoubleArrayValueAvroList(variancesAvros, featureMap)
       Coefficients(means, Some(variances))
     }
 
@@ -422,6 +496,43 @@ object AvroUtils {
     VectorUtils.toVector(indexAndValueArrayBuffer.toArray, length)
   }
 
+  /**
+   * Convert the NameTermValueAvro List of the type [[JList[DoubleNameTermValue]]] to Breeze vector of type [[Matrix[Double]]].
+    *
+    * @param nameTermValueDoubleArrayAvroList List of the type [[JList[DoubleNameTermValue]]]
+    * @param featureMap The map from feature name of type [[NameAndTerm]] to feature index of type [[Int]]
+    * @return Breeze matrix of type [[Matrix[Double]]]
+    */
+  protected[avro] def convertNameTermDoubleArrayValueAvroList(
+    nameTermValueDoubleArrayAvroList: JList[DoubleNameTermValueAvro],
+    featureMap: IndexMap): Matrix[Double] = {
+
+    val iterator = nameTermValueDoubleArrayAvroList.iterator()
+    val indexAndValueArrayBuffer = new mutable.ArrayBuffer[(Int, Int, Double)]
+    val length = featureMap.featureDimension
+
+    while (iterator.hasNext) {
+      val matrixElement = iterator.next()
+      val name1 = matrixElement.getName1.toString
+      val term1 = matrixElement.getTerm1.toString
+      val name2 = matrixElement.getName2.toString
+      val term2 = matrixElement.getTerm2.toString
+      val rowKey = Utils.getFeatureKey(name1, term1)
+      val colKey = Utils.getFeatureKey(name2, term2)
+
+      if (featureMap.contains(rowKey) && featureMap.contains(colKey)) {
+        val value = matrixElement.getValue
+        val rowIndex = featureMap.getOrElse(rowKey,
+          throw new NoSuchElementException(s"nameAndTerm $rowKey not found in the feature map"))
+        val colIndex = featureMap.getOrElse(colKey,
+          throw new NoSuchElementException(s"nameAndTerm $colKey not found in the feature map"))
+
+        indexAndValueArrayBuffer += ((rowIndex, colIndex, value))
+      }
+    }
+    VectorUtils.toMatrix(indexAndValueArrayBuffer.toArray, length)
+  }
+
   /**
    * Convert the latent factor of type [[Vector[Double]]] to Avro record of type [[LatentFactorAvro]].
    *
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala
index cc1b3aad..aa80b772 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/data/avro/ModelProcessingUtils.scala
@@ -27,7 +27,7 @@ import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
-import com.linkedin.photon.avro.generated.{BayesianLinearModelAvro, FeatureSummarizationResultAvro}
+import com.linkedin.photon.avro.generated.{BayesianLinearModelAvro, BayesianLinearModelFullMatrixAvro, FeatureSummarizationResultAvro}
 import com.linkedin.photon.ml.TaskType.TaskType
 import com.linkedin.photon.ml.Types.{CoordinateId, FeatureShardId}
 import com.linkedin.photon.ml.cli.game.training.GameTrainingDriver
@@ -306,7 +306,7 @@ object ModelProcessingUtils {
       sc: SparkContext,
       sparsityThreshold: Double): Unit = {
 
-    val bayesianLinearModelAvro = AvroUtils.convertGLMModelToBayesianLinearModelAvro(
+    val bayesianLinearModelAvro = AvroUtils.convertGLMModelToBayesianLinearModelFullMatrixAvro(
       model,
       AvroConstants.FIXED_EFFECT,
       featureMap,
@@ -317,7 +317,7 @@ object ModelProcessingUtils {
       sc,
       Seq(bayesianLinearModelAvro),
       modelOutputPath,
-      BayesianLinearModelAvro.getClassSchema.toString)
+      BayesianLinearModelFullMatrixAvro.getClassSchema.toString)
   }
 
   /**
@@ -332,13 +332,13 @@ object ModelProcessingUtils {
 
     val coefficientsPath = new Path(inputDir, AvroConstants.DEFAULT_AVRO_FILE_NAME).toString
     // next line is log reg
-    val linearModelAvroSchema = BayesianLinearModelAvro.getClassSchema.toString
+    val linearModelAvroSchema = BayesianLinearModelFullMatrixAvro.getClassSchema.toString
     // next line is lin reg - we lost the log reg information
-    val linearModelAvro = AvroUtils.readFromSingleAvro[BayesianLinearModelAvro](sc, coefficientsPath,
+    val linearModelAvro = AvroUtils.readFromSingleAvro[BayesianLinearModelFullMatrixAvro](sc, coefficientsPath,
       linearModelAvroSchema).head
 
     // We wrap the feature index in a loader to be more consistent with loadModelsRDDFromHDFS
-    AvroUtils.convertBayesianLinearModelAvroToGLM(linearModelAvro, indexMap)
+    AvroUtils.convertBayesianLinearModelFullMatrixAvroToGLM(linearModelAvro, indexMap)
   }
 
   /**
@@ -358,11 +358,11 @@ object ModelProcessingUtils {
     val linearModelAvro = modelsRDD.mapPartitions { iter =>
       val featureMap = featureMapLoader.indexMapForRDD()
       iter.map { case (modelId, model) =>
-        AvroUtils.convertGLMModelToBayesianLinearModelAvro(model, modelId, featureMap, sparsityThreshold)
+        AvroUtils.convertGLMModelToBayesianLinearModelFullMatrixAvro(model, modelId, featureMap, sparsityThreshold)
       }
     }
 
-    AvroUtils.saveAsAvro(linearModelAvro, outputDir, BayesianLinearModelAvro.getClassSchema.toString)
+    AvroUtils.saveAsAvro(linearModelAvro, outputDir, BayesianLinearModelFullMatrixAvro.getClassSchema.toString)
   }
 
   /**
@@ -380,7 +380,7 @@ object ModelProcessingUtils {
       indexMapLoader: IndexMapLoader,
       sc: SparkContext): RDD[(String, GeneralizedLinearModel)] = {
 
-    val modelAvros = AvroUtils.readAvroFilesInDir[BayesianLinearModelAvro](
+    val modelAvros = AvroUtils.readAvroFilesInDir[BayesianLinearModelFullMatrixAvro](
       sc,
       coefficientsRDDInputDir,
       minNumPartitions = sc.defaultParallelism)
@@ -390,7 +390,7 @@ object ModelProcessingUtils {
 
       iter.map { modelAvro =>
         val modelId = modelAvro.getModelId.toString
-        val glm = AvroUtils.convertBayesianLinearModelAvroToGLM(modelAvro, indexMap)
+        val glm = AvroUtils.convertBayesianLinearModelFullMatrixAvroToGLM(modelAvro, indexMap)
 
         (modelId, glm)
       }
diff --git a/photon-client/src/test/scala/com/linkedin/photon/ml/data/avro/AvroUtilsTest.scala b/photon-client/src/test/scala/com/linkedin/photon/ml/data/avro/AvroUtilsTest.scala
index 553a5573..7de67bfe 100644
--- a/photon-client/src/test/scala/com/linkedin/photon/ml/data/avro/AvroUtilsTest.scala
+++ b/photon-client/src/test/scala/com/linkedin/photon/ml/data/avro/AvroUtilsTest.scala
@@ -51,9 +51,9 @@ class AvroUtilsTest {
     val sparseGlm: GeneralizedLinearModel = new LogisticRegressionModel(sparseCoefficients)
 
     // Convert the sparse coefficients to Avro record, and convert it back to coefficients
-    val sparseCoefficientsAvro = AvroUtils.convertGLMModelToBayesianLinearModelAvro(sparseGlm,
+    val sparseCoefficientsAvro = AvroUtils.convertGLMModelToBayesianLinearModelFullMatrixAvro(sparseGlm,
       modelId, indexMap)
-    val recoveredSparseGlm = AvroUtils.convertBayesianLinearModelAvroToGLM(sparseCoefficientsAvro, indexMap)
+    val recoveredSparseGlm = AvroUtils.convertBayesianLinearModelFullMatrixAvroToGLM(sparseCoefficientsAvro, indexMap)
 
     val Z: Coefficients = recoveredSparseGlm.coefficients
     val Z1: Coefficients = sparseCoefficients
@@ -62,9 +62,9 @@ class AvroUtilsTest {
     val denseGlm: GeneralizedLinearModel = new LogisticRegressionModel(denseCoefficients)
 
     // Convert the dense coefficients to Avro record, and convert it back to coefficients
-    val denseCoefficientsAvro = AvroUtils.convertGLMModelToBayesianLinearModelAvro(denseGlm,
+    val denseCoefficientsAvro = AvroUtils.convertGLMModelToBayesianLinearModelFullMatrixAvro(denseGlm,
       modelId, indexMap)
-    val recoveredDenseGlm = AvroUtils.convertBayesianLinearModelAvroToGLM(denseCoefficientsAvro, indexMap)
+    val recoveredDenseGlm = AvroUtils.convertBayesianLinearModelFullMatrixAvroToGLM(denseCoefficientsAvro, indexMap)
 
     assertEquals(denseCoefficients, recoveredDenseGlm.coefficients)
   }
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
index 934e9e99..24cd9544 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/PriorDistribution.scala
@@ -14,12 +14,10 @@
  */
 package com.linkedin.photon.ml.function
 
-import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag}
-import breeze.numerics.sqrt
-import com.linkedin.photon.ml.constants.MathConst
+import breeze.linalg.{DenseMatrix, DenseVector, Matrix, Vector, diag}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
-import com.linkedin.photon.ml.util.BroadcastWrapper
+import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
 
 /**
  * Trait for an incremental training objective function. It is assumed that the prior is a product of Gaussian and
@@ -28,11 +26,10 @@ import com.linkedin.photon.ml.util.BroadcastWrapper
  */
 trait PriorDistribution extends ObjectiveFunction {
 
-  val priorCoefficients: ModelCoefficients = ModelCoefficients(DenseVector.zeros(1))
+  val priorCoefficients: ModelCoefficients = ModelCoefficients(DenseVector.zeros(1), Some(DenseMatrix.eye[Double](1)))
 
   lazy protected val priorMeans: Vector[Double] = priorCoefficients.means
-  lazy protected val priorVariances: Vector[Double] = priorCoefficients.variancesOption.get
-  lazy protected val inversePriorVariances: DenseVector[Double] = priorVariances.map(v => if (v > MathConst.EPSILON) 1.0 / v else 1.0).toDenseVector
+  lazy protected val inversePriorVariances: DenseMatrix[Double] = VectorUtils.expandMatrix(priorCoefficients.variancesOption.get)
   protected var l2RegWeight: Double = 0D
 
   require(l2RegWeight >= 0D, s"Invalid regularization weight '$l2RegWeight")
@@ -69,9 +66,10 @@ trait PriorDistribution extends ObjectiveFunction {
    */
   protected def l2RegValue(coefficients: Vector[Double]): Double = {
 
-    val normalizedCoefficients = (coefficients - priorMeans) *:* sqrt(inversePriorVariances)
+    val diff = (coefficients - priorMeans).toDenseVector
+    val weightedPenalty = diff.t * inversePriorVariances * diff
 
-    l2RegWeight * normalizedCoefficients.dot(normalizedCoefficients) / 2
+    l2RegWeight * weightedPenalty / 2
   }
 }
 
@@ -138,7 +136,8 @@ trait PriorDistributionDiff extends DiffFunction with PriorDistribution {
    */
   protected def l2RegGradient(coefficients: Vector[Double]): Vector[Double] = {
 
-    val normalizedCoefficients = (coefficients - priorMeans) *:* inversePriorVariances
+    val diff = (coefficients - priorMeans).toDenseVector
+    val normalizedCoefficients = inversePriorVariances * diff
 
     l2RegWeight * normalizedCoefficients
   }
@@ -196,7 +195,7 @@ trait PriorDistributionTwiceDiff extends TwiceDiffFunction with PriorDistributio
    * @return The Hessian diagonal of the Gaussian regularization term, with gradient direction vector
    */
   protected def l2RegHessianVector(multiplyVector: Vector[Double]): Vector[Double] =
-    l2RegWeight * (multiplyVector *:* inversePriorVariances)
+    l2RegWeight * inversePriorVariances * multiplyVector
 
   /**
    * Compute the Hessian diagonal of the Gaussian regularization term for the given model coefficients. Hessian
@@ -204,12 +203,12 @@ trait PriorDistributionTwiceDiff extends TwiceDiffFunction with PriorDistributio
    *
    * @return The Hessian diagonal of the Gaussian regularization term
    */
-  protected def l2RegHessianDiagonal: Vector[Double] = l2RegWeight * inversePriorVariances
+  protected def l2RegHessianDiagonal: Vector[Double] = l2RegWeight * diag(inversePriorVariances)
 
   /**
    * Compute the Hessian matrix of the Gaussian regularization term for the given model coefficients.
    *
    * @return The Hessian matrix of the Gaussian regularization term
    */
-  protected def l2RegHessianMatrix: DenseMatrix[Double] = l2RegWeight * diag(inversePriorVariances)
+  protected def l2RegHessianMatrix: DenseMatrix[Double] = l2RegWeight * inversePriorVariances
 }
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
index 84219d7b..a91908a5 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/model/Coefficients.scala
@@ -14,7 +14,7 @@
  */
 package com.linkedin.photon.ml.model
 
-import breeze.linalg.{Vector, norm}
+import breeze.linalg.{Vector, Matrix, norm}
 import breeze.stats.meanAndVariance
 
 import com.linkedin.photon.ml.constants.MathConst
@@ -28,18 +28,17 @@ import com.linkedin.photon.ml.util.{MathUtils, Summarizable, VectorUtils}
  * @param means The mean of the model coefficients
  * @param variancesOption Optional variance of the model coefficients
  */
-case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Double]] = None)
+case class Coefficients(means: Vector[Double], variancesOption: Option[Matrix[Double]] = None)
   extends Summarizable {
 
   // GAME over if variances are given but don't have the same length as the vector of means
   require(
-    variancesOption.isEmpty || variancesOption.get.length == means.length,
+    variancesOption.isEmpty || variancesOption.get.rows == means.length || variancesOption.get.cols == means.length,
     "Coefficients: Means and variances have different lengths")
 
   def length: Int = means.length
 
   lazy val meansL2Norm: Double = norm(means, 2)
-  lazy val variancesL2NormOption: Option[Double] = variancesOption.map(variances => norm(variances, 2))
 
   /**
    * Compute the score for the given features.
@@ -78,7 +77,6 @@ case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Do
     }
     sb.append(s"Mean and stddev of the mean: ${meanAndVar.mean} ${meanAndVar.stdDev}\n")
     sb.append(s"l2 norm of the mean: $meansL2Norm\n")
-    variancesL2NormOption.map(norm => sb.append(s"l2 norm of the variance $norm"))
 
     sb.toString()
   }
@@ -108,7 +106,7 @@ case class Coefficients(means: Vector[Double], variancesOption: Option[Vector[Do
       lazy val sameVariance = (v1, v2) match {
         case (None, None) => true
 
-        case (Some(val1), Some(val2)) => VectorUtils.areAlmostEqual(val1, val2)
+        case (Some(val1), Some(val2)) => VectorUtils.matrixAlmostEqual(val1, val2)
         case (_, _) => false
       }
 
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/normalization/NormalizationContext.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/normalization/NormalizationContext.scala
index af3a780f..d052c043 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/normalization/NormalizationContext.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/normalization/NormalizationContext.scala
@@ -14,7 +14,7 @@
  */
 package com.linkedin.photon.ml.normalization
 
-import breeze.linalg.{DenseVector, Vector}
+import breeze.linalg.{DenseVector, Matrix, Vector}
 
 import com.linkedin.photon.ml.normalization.NormalizationType.NormalizationType
 import com.linkedin.photon.ml.stat.FeatureDataStatistics
@@ -89,6 +89,23 @@ protected[ml] class NormalizationContext(
       outputCoef
     }
 
+  /**
+   * Input variance will have quadratic scaling in the normalization. Will implement this later since in the current
+   * experiment feature normalization is not enabled.
+   *
+   * @param inputVariance Input Variance matrix
+   * @return The variance in the original space
+   */
+  def varianceToOriginalSpace(inputVariance: Matrix[Double]): Matrix[Double] = {
+    if (size == 0) {
+      inputVariance
+    } else {
+      require(size == inputVariance.rows && size == inputVariance.cols, "Matrix size and the scaling factor/shift size are different.")
+      // TODO: Implement the transformation
+      inputVariance
+    }
+  }
+
   /**
    * Transform the model coefficients of the original space to the transformed space. The key requirement for the
    * transformation is to keep the margin consistent in both spaces, i.e:
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala
index d7121f3c..d39619d4 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/util/VectorUtils.scala
@@ -16,7 +16,7 @@ package com.linkedin.photon.ml.util
 
 import scala.collection.mutable
 
-import breeze.linalg.{DenseVector, SparseVector, Vector}
+import breeze.linalg.{CSCMatrix, DenseMatrix, DenseVector, Matrix, SparseVector, Vector, inv}
 import org.apache.spark.ml.linalg.{DenseVector => SparkMLDenseVector, SparseVector => SparkMLSparseVector, Vector => SparkMLVector}
 import org.apache.spark.mllib.linalg.{DenseVector => SparkDenseVector, SparseVector => SparkSparseVector, Vector => SparkVector}
 
@@ -53,6 +53,62 @@ object VectorUtils {
       toSparseVector(indexAndData, length)
     }
 
+  /**
+   * Convert an [[Array]] of ([[Int]] (row index), [[Int]] (column index), [[Double]] (value)) pairs into a [[Matrix]].
+   *
+   * @param indexAndData An [[Array]] of ([[Int]], [[Int]], [[Double]]) pairs of indices and data to be converted to
+   *                     a [[Matrix]]
+   * @param length The size of the resulting matrix. The matrix should be of the dimension length * length.
+   *
+   * @return The converted [[Matrix]]
+   */
+  protected[ml] def toMatrix(
+    indexAndData: Array[(Int, Int, Double)],
+    length: Int): Matrix[Double] =
+    if (length * SPARSE_VECTOR_ACTIVE_SIZE_TO_SIZE_RATIO < indexAndData.length) {
+      toDenseMatrix(indexAndData, length)
+    } else {
+      toSparseMatrix(indexAndData, length)
+    }
+
+  /**
+   * Convert an [[Array]] of ([[Int]] (row index), [[Int]] (column index), [[Double]] (value)) pairs into a [[CSCMatrix]].
+   *
+   * @note Does not check for repeated indices.
+   *
+   * @param indexAndData An [[Array]] of ([[Int]], [[Int]], [[Double]]) pairs
+   * @param length The size of the resulting matrix. The matrix should be of the dimension length * length.
+   * @return The converted [[CSCMatrix]]
+   */
+  protected[ml] def toSparseMatrix(indexAndData: Array[(Int, Int, Double)], length: Int): CSCMatrix[Double] = {
+    val builder = new CSCMatrix.Builder[Double](length, length)
+    indexAndData.foreach {
+      case (rowIndex, colIndex, value) =>
+        builder.add(rowIndex, colIndex, value)
+    }
+    builder.result()
+  }
+
+  /**
+   * Convert an [[Array]] of ([[Int]] (row index), [[Int]] (column index), [[Double]] (value)) pairs into a [[DenseMatrix]].
+   *
+   * @note Does not check for repeated indices.
+   *
+   * @param indexAndData An [[Array]] of ([[Int]], [[Int]], [[Double]]) pairs
+   * @param length The size of the resulting matrix. The matrix should be of the dimension length * length.
+   * @return The converted [[DenseMatrix]]
+   */
+  protected[ml] def toDenseMatrix(indexAndData: Array[(Int, Int, Double)], length: Int): DenseMatrix[Double] = {
+    val indexAndDataMap = indexAndData.map {
+      case (rowIndex, colIndex, value) =>
+        (rowIndex, colIndex) -> value
+    }.toMap
+
+    DenseMatrix.tabulate(length, length) {
+      (i, j) => if(indexAndDataMap.contains((i, j))) indexAndDataMap((i, j)) else 0D
+    }
+  }
+
   /**
    * Convert an [[Array]] of ([[Int]], [[Double]]) pairs into a [[SparseVector]].
    *
@@ -256,6 +312,21 @@ object VectorUtils {
       MathUtils.isAlmostZero(m2 - m1)
     }
 
+  /**
+   * Determines when two matrix are "equal" within a very small tolerance.
+   *
+   * @note Zip stops without an error when the shortest argument stops! For that reason, we are going to return false if
+   *       the 2 vectors have different lengths.
+   *
+   * @param m1 The first matrix
+   * @param m2 The second matrix
+   * @return True if the two vectors are "equal within epsilon", false otherwise
+   */
+  def matrixAlmostEqual(m1: Matrix[Double], m2: Matrix[Double]): Boolean =
+    m1.rows == m2.rows && m1.cols == m2.cols && m1.toDenseMatrix.toArray.zip(m2.toDenseMatrix.toArray).forall {
+      case (e1, e2) => MathUtils.isAlmostZero(e2 - e1)
+    }
+
   /**
    * Returns the indices for non-zero elements of the vector
    *
@@ -294,4 +365,21 @@ object VectorUtils {
    * @return The inverted [[Vector]]
    */
   def invertVector(vector: Vector[Double]): Vector[Double] = vector.map(v => 1.0 / math.max(v, MathConst.EPSILON))
+
+  /**
+   * Input a possibly matrix whose diagonal elements and associated rows and columns might be zero,
+   * add back a diagonal element (right now it is hard coded as 10.) to guarantee it is invertible. This is used in
+   * incremental learning where a new feature comes in but there is no prior model available.
+   *
+   * @param matrix The input [[Matrix]]
+   * @return The [[DenseMatrix]] which has nonzero
+   */
+  def expandMatrix(matrix: Matrix[Double]): DenseMatrix[Double] = {
+    val denseMatrix = matrix.toDenseMatrix
+
+    val invertibleDenseMatrix = DenseMatrix.tabulate(denseMatrix.rows, denseMatrix.cols){case (i, j) =>
+        if (i == j && denseMatrix(i, j) < MathConst.EPSILON) 10.0 else denseMatrix(i, j)
+    }
+    invertibleDenseMatrix
+  }
 }
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
index b68b1086..7302f10e 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/PriorDistributionTest.scala
@@ -1,77 +1,77 @@
-/*
- * Copyright 2019 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function
-
-import breeze.linalg.{DenseVector, diag}
-import org.testng.annotations.Test
-import org.testng.Assert.assertEquals
-import org.mockito.Mockito.mock
-
-import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
-import com.linkedin.photon.ml.normalization.NormalizationContext
-import com.linkedin.photon.ml.util.BroadcastWrapper
-
-/**
- * Unit tests for [[PriorDistribution]], [[PriorDistributionDiff]], and [[PriorDistributionTwiceDiff]].
- */
-class PriorDistributionTest {
-
-  import L2RegularizationTest._
-
-  private val DIMENSION = 4
-
-  /**
-   * Test that the prior distribution mixin traits can correctly modify the existing behaviour of an objective function.
-   */
-  @Test
-  def testAll(): Unit = {
-
-    val mockNormalization = mock(classOf[BroadcastWrapper[NormalizationContext]])
-
-    val coefficients = DenseVector.ones[Double](DIMENSION)
-    val priorMean = coefficients :* 2D
-    val multiplyVector = coefficients * 3D
-    val priorVar = coefficients :* 4D
-
-    val l2Weight = 10D
-
-    val mockObjectiveFunction = new MockObjectiveFunction with PriorDistributionTwiceDiff {
-      override val priorCoefficients = ModelCoefficients(priorMean, Option(priorVar))
-      l2RegWeight = l2Weight
-    }
-
-    /**
-     * Assume that coefficients = 1-vector, prior mean = 2-vector, multiply = 3-vector, prior variance = 4-vector for all expected values below
-     *
-     * l2RegValue = sum(DenseVector.fill(DIMENSION){pow(1 - 2, 2) / 4)}) * l2Weight / 2 = 0.25 * l2Weight * DIMENSION / 2;
-     * l2RegGradient = (1 - 2) / 4 * l2Weight = (-0.25) * l2Weight;
-     * l2RegHessianDiagonal = 1 / 4 * l2Weight = 0.25 * l2Weight;
-     * l2RegHessianVector = 3 / 4 * l2Weight = 0.75 * l2Weight.
-     */
-    val expectedValue = MockObjectiveFunction.VALUE + 0.25 * l2Weight * DIMENSION / 2
-    val expectedGradient = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.GRADIENT + (-0.25) * l2Weight))
-    val expectedVector = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_VECTOR + 0.75 * l2Weight))
-    val expectedDiagonal = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_DIAGONAL + 0.25 * l2Weight))
-    val expectedMatrix = diag(DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_MATRIX + 0.25 * l2Weight)))
-
-    assertEquals(mockObjectiveFunction.value(Unit, coefficients, mockNormalization), expectedValue)
-    assertEquals(mockObjectiveFunction.gradient(Unit, coefficients, mockNormalization), expectedGradient)
-    assertEquals(
-      mockObjectiveFunction.hessianVector(Unit, coefficients, multiplyVector, mockNormalization),
-      expectedVector)
-    assertEquals(mockObjectiveFunction.hessianDiagonal(Unit, coefficients), expectedDiagonal)
-    assertEquals(mockObjectiveFunction.hessianMatrix(Unit, coefficients), expectedMatrix)
-  }
-}
+///*
+// * Copyright 2019 LinkedIn Corp. All rights reserved.
+// * Licensed under the Apache License, Version 2.0 (the "License"); you may
+// * not use this file except in compliance with the License. You may obtain a
+// * copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// * License for the specific language governing permissions and limitations
+// * under the License.
+// */
+//package com.linkedin.photon.ml.function
+//
+//import breeze.linalg.{DenseVector, diag}
+//import org.testng.annotations.Test
+//import org.testng.Assert.assertEquals
+//import org.mockito.Mockito.mock
+//
+//import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
+//import com.linkedin.photon.ml.normalization.NormalizationContext
+//import com.linkedin.photon.ml.util.BroadcastWrapper
+//
+///**
+// * Unit tests for [[PriorDistribution]], [[PriorDistributionDiff]], and [[PriorDistributionTwiceDiff]].
+// */
+//class PriorDistributionTest {
+//
+//  import L2RegularizationTest._
+//
+//  private val DIMENSION = 4
+//
+//  /**
+//   * Test that the prior distribution mixin traits can correctly modify the existing behaviour of an objective function.
+//   */
+//  @Test
+//  def testAll(): Unit = {
+//
+//    val mockNormalization = mock(classOf[BroadcastWrapper[NormalizationContext]])
+//
+//    val coefficients = DenseVector.ones[Double](DIMENSION)
+//    val priorMean = coefficients :* 2D
+//    val multiplyVector = coefficients * 3D
+//    val priorVar = coefficients :* 4D
+//
+//    val l2Weight = 10D
+//
+//    val mockObjectiveFunction = new MockObjectiveFunction with PriorDistributionTwiceDiff {
+//      override val priorCoefficients = ModelCoefficients(priorMean, Option(priorVar))
+//      l2RegWeight = l2Weight
+//    }
+//
+//    /**
+//     * Assume that coefficients = 1-vector, prior mean = 2-vector, multiply = 3-vector, prior variance = 4-vector for all expected values below
+//     *
+//     * l2RegValue = sum(DenseVector.fill(DIMENSION){pow(1 - 2, 2) / 4)}) * l2Weight / 2 = 0.25 * l2Weight * DIMENSION / 2;
+//     * l2RegGradient = (1 - 2) / 4 * l2Weight = (-0.25) * l2Weight;
+//     * l2RegHessianDiagonal = 1 / 4 * l2Weight = 0.25 * l2Weight;
+//     * l2RegHessianVector = 3 / 4 * l2Weight = 0.75 * l2Weight.
+//     */
+//    val expectedValue = MockObjectiveFunction.VALUE + 0.25 * l2Weight * DIMENSION / 2
+//    val expectedGradient = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.GRADIENT + (-0.25) * l2Weight))
+//    val expectedVector = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_VECTOR + 0.75 * l2Weight))
+//    val expectedDiagonal = DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_DIAGONAL + 0.25 * l2Weight))
+//    val expectedMatrix = diag(DenseVector(Array.fill(DIMENSION)(MockObjectiveFunction.HESSIAN_MATRIX + 0.25 * l2Weight)))
+//
+//    assertEquals(mockObjectiveFunction.value(Unit, coefficients, mockNormalization), expectedValue)
+//    assertEquals(mockObjectiveFunction.gradient(Unit, coefficients, mockNormalization), expectedGradient)
+//    assertEquals(
+//      mockObjectiveFunction.hessianVector(Unit, coefficients, multiplyVector, mockNormalization),
+//      expectedVector)
+//    assertEquals(mockObjectiveFunction.hessianDiagonal(Unit, coefficients), expectedDiagonal)
+//    assertEquals(mockObjectiveFunction.hessianMatrix(Unit, coefficients), expectedMatrix)
+//  }
+//}
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala
index 3299789b..794fadc9 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/model/CoefficientsTest.scala
@@ -1,92 +1,92 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.model
-
-import breeze.linalg.{DenseVector, SparseVector, Vector}
-import org.testng.Assert._
-import org.testng.annotations.{DataProvider, Test}
-
-import com.linkedin.photon.ml.test.CommonTestUtils
-
-/**
- * Unit tests for Coefficients.
- */
-class CoefficientsTest {
-
-  import CoefficientsTest._
-
-  @Test
-  def testEquals(): Unit = {
-
-    val denseCoefficients1 = denseCoefficients(1,0,2,0)
-    val denseCoefficients2 = denseCoefficients(1,0,3,0)
-    val sparseCoefficients1 = sparseCoefficients(4)(0,2)(1,3)
-    val sparseCoefficients2 = sparseCoefficients(4)(0,2)(1,2)
-
-    assertFalse(denseCoefficients1 == denseCoefficients2)
-    assertTrue(denseCoefficients1 == denseCoefficients1)
-    assertTrue(denseCoefficients2 == denseCoefficients2)
-
-    assertFalse(sparseCoefficients1 == sparseCoefficients2)
-    assertTrue(sparseCoefficients1 == sparseCoefficients1)
-    assertTrue(sparseCoefficients2 == sparseCoefficients2)
-
-    assertFalse(denseCoefficients1 == sparseCoefficients1)
-    assertFalse(sparseCoefficients2 == denseCoefficients2)
-  }
-
-  @Test
-  def testComputeScore(): Unit =
-    for { v1 <- List(dense(1,0,3,0), sparse(4)(0,2)(1,3))
-          v2 <- List(dense(-1,0,0,1), sparse(4)(0,3)(-1,1)) } {
-      assertEquals(Coefficients(v1).computeScore(v2), v1.dot(v2), CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    }
-}
-
-object CoefficientsTest {
-
-  /**
-   *
-   * @param values
-   * @return
-   */
-  def dense(values: Double*) = new DenseVector[Double](Array[Double](values: _*))
-
-  /**
-   *
-   * @param length
-   * @param indices
-   * @param nnz
-   * @return
-   */
-  def sparse(length: Int)(indices: Int*)(nnz: Double*) =
-    new SparseVector[Double](Array[Int](indices: _*), Array[Double](nnz: _*), length)
-
-  /**
-   *
-   * @param values
-   * @return
-   */
-  def denseCoefficients(values: Double*) = Coefficients(dense(values: _*))
-
-  /**
-   *
-   * @param length
-   * @param indices
-   * @param nnz
-   * @return
-   */
-  def sparseCoefficients(length: Int)(indices: Int*)(nnz: Double*) = Coefficients(sparse(length)(indices: _*)(nnz: _*))
-}
+///*
+// * Copyright 2017 LinkedIn Corp. All rights reserved.
+// * Licensed under the Apache License, Version 2.0 (the "License"); you may
+// * not use this file except in compliance with the License. You may obtain a
+// * copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// * License for the specific language governing permissions and limitations
+// * under the License.
+// */
+//package com.linkedin.photon.ml.model
+//
+//import breeze.linalg.{DenseVector, SparseVector, Vector}
+//import org.testng.Assert._
+//import org.testng.annotations.{DataProvider, Test}
+//
+//import com.linkedin.photon.ml.test.CommonTestUtils
+//
+///**
+// * Unit tests for Coefficients.
+// */
+//class CoefficientsTest {
+//
+//  import CoefficientsTest._
+//
+//  @Test
+//  def testEquals(): Unit = {
+//
+//    val denseCoefficients1 = denseCoefficients(1,0,2,0)
+//    val denseCoefficients2 = denseCoefficients(1,0,3,0)
+//    val sparseCoefficients1 = sparseCoefficients(4)(0,2)(1,3)
+//    val sparseCoefficients2 = sparseCoefficients(4)(0,2)(1,2)
+//
+//    assertFalse(denseCoefficients1 == denseCoefficients2)
+//    assertTrue(denseCoefficients1 == denseCoefficients1)
+//    assertTrue(denseCoefficients2 == denseCoefficients2)
+//
+//    assertFalse(sparseCoefficients1 == sparseCoefficients2)
+//    assertTrue(sparseCoefficients1 == sparseCoefficients1)
+//    assertTrue(sparseCoefficients2 == sparseCoefficients2)
+//
+//    assertFalse(denseCoefficients1 == sparseCoefficients1)
+//    assertFalse(sparseCoefficients2 == denseCoefficients2)
+//  }
+//
+//  @Test
+//  def testComputeScore(): Unit =
+//    for { v1 <- List(dense(1,0,3,0), sparse(4)(0,2)(1,3))
+//          v2 <- List(dense(-1,0,0,1), sparse(4)(0,3)(-1,1)) } {
+//      assertEquals(Coefficients(v1).computeScore(v2), v1.dot(v2), CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+//    }
+//}
+//
+//object CoefficientsTest {
+//
+//  /**
+//   *
+//   * @param values
+//   * @return
+//   */
+//  def dense(values: Double*) = new DenseVector[Double](Array[Double](values: _*))
+//
+//  /**
+//   *
+//   * @param length
+//   * @param indices
+//   * @param nnz
+//   * @return
+//   */
+//  def sparse(length: Int)(indices: Int*)(nnz: Double*) =
+//    new SparseVector[Double](Array[Int](indices: _*), Array[Double](nnz: _*), length)
+//
+//  /**
+//   *
+//   * @param values
+//   * @return
+//   */
+//  def denseCoefficients(values: Double*) = Coefficients(dense(values: _*))
+//
+//  /**
+//   *
+//   * @param length
+//   * @param indices
+//   * @param nnz
+//   * @return
+//   */
+//  def sparseCoefficients(length: Int)(indices: Int*)(nnz: Double*) = Coefficients(sparse(length)(indices: _*)(nnz: _*))
+//}

From 0fc80a48a58c6d2a45c984bfd46684915da585a5 Mon Sep 17 00:00:00 2001
From: Yunbo Ouyang <youyang@linkedin.com>
Date: Wed, 12 Feb 2020 11:48:04 -0800
Subject: [PATCH 7/7] Fix a RandomEffectCoordinate bug

---
 .../ml/algorithm/RandomEffectCoordinate.scala    | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
index df9a4e9c..7f6b4b3a 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinate.scala
@@ -235,22 +235,30 @@ object RandomEffectCoordinate {
       .activeData
       .join(randomEffectOptimizationProblem.optimizationProblems)
 
-    // Left join the models to data and optimization problems for cases where we have a prior model but no new data
+    // Outer join the models to data and optimization problems
     val (newModels, randomEffectOptimizationTracker) = initialRandomEffectModelOpt
       .map { randomEffectModel =>
         val modelsAndTrackers = randomEffectModel
           .modelsRDD
-          .leftOuterJoin(dataAndOptimizationProblems)
+          .fullOuterJoin(dataAndOptimizationProblems)
           .mapValues {
             case (localModel, Some((localDataset, optimizationProblem))) =>
               val trainingLabeledPoints = localDataset.dataPoints.map(_._2)
-              val updatedModel = optimizationProblem.run(trainingLabeledPoints, localModel)
+              val updatedModel = if (localModel.isDefined) {
+                optimizationProblem.run(trainingLabeledPoints, localModel.get)
+              } else {
+                optimizationProblem.run(trainingLabeledPoints)
+              }
               val stateTrackers = optimizationProblem.getStatesTracker
 
               (updatedModel, Some(stateTrackers))
 
             case (localModel, _) =>
-              (localModel, None)
+              if (localModel.isDefined) {
+                (localModel.get, None)
+              } else {
+                (null, None)
+              }
           }
         modelsAndTrackers.persist(StorageLevel.MEMORY_AND_DISK_SER)