implement grid search

joelberkeley · joelberkeley · commit 744081ff1a5c · 2022-07-27T14:09:24.000+01:00
diff --git a/backend/src/tensorflow/compiler/xla/client/lib/arithmetic.cpp b/backend/src/tensorflow/compiler/xla/client/lib/arithmetic.cpp
@@ -0,0 +1,32 @@
+/*
+Copyright 2022 Joel Berkeley
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+
+#include "../xla_builder.h"
+
+extern "C" {
+    XlaOp* ArgMax(XlaOp& input, int output_type, int axis) {
+        auto& input_ = reinterpret_cast<xla::XlaOp&>(input);
+        xla::XlaOp res = xla::ArgMax(input_, (xla::PrimitiveType) output_type, axis);
+        return reinterpret_cast<XlaOp*>(new xla::XlaOp(res));
+    }
+
+    XlaOp* ArgMin(XlaOp& input, int output_type, int axis) {
+        auto& input_ = reinterpret_cast<xla::XlaOp&>(input);
+        xla::XlaOp res = xla::ArgMin(input_, (xla::PrimitiveType) output_type, axis);
+        return reinterpret_cast<XlaOp*>(new xla::XlaOp(res));
+    }
+}
diff --git a/backend/src/tensorflow/compiler/xla/client/lib/arithmetic.h b/backend/src/tensorflow/compiler/xla/client/lib/arithmetic.h
@@ -0,0 +1,21 @@
+/*
+Copyright 2022 Joel Berkeley
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#include "../xla_builder.h"
+
+extern "C" {
+    XlaOp* ArgMax(XlaOp& input, int output_type, int axis);
+    XlaOp* ArgMin(XlaOp& input, int output_type, int axis);
+}
diff --git a/src/Compiler/Xla/Prim/TensorFlow/Compiler/Xla/Client/Lib/Arithmetic.idr b/src/Compiler/Xla/Prim/TensorFlow/Compiler/Xla/Client/Lib/Arithmetic.idr
@@ -0,0 +1,28 @@
+{--
+Copyright 2022 Joel Berkeley
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--}
+module Compiler.Xla.Prim.TensorFlow.Compiler.Xla.Client.Lib.Arithmetic
+
+import System.FFI
+
+import Compiler.Xla.Prim.Util
+
+export
+%foreign (libxla "ArgMax")
+prim__argMax : GCAnyPtr -> Int -> Int -> PrimIO AnyPtr
+
+export
+%foreign (libxla "ArgMin")
+prim__argMin : GCAnyPtr -> Int -> Int -> PrimIO AnyPtr
diff --git a/src/Compiler/Xla/TensorFlow/Compiler/Xla/Client/Lib/Arithmetic.idr b/src/Compiler/Xla/TensorFlow/Compiler/Xla/Client/Lib/Arithmetic.idr
@@ -0,0 +1,34 @@
+{--
+Copyright 2022 Joel Berkeley
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--}
+module Compiler.Xla.TensorFlow.Compiler.Xla.Client.Lib.Constants
+
+import Compiler.Xla.Prim.TensorFlow.Compiler.Xla.Client.Lib.Constants
+import Compiler.Xla.TensorFlow.Compiler.Xla.Client.XlaBuilder
+import Compiler.Xla.TensorFlow.Compiler.Xla.XlaData
+
+export
+argMax : (HasIO io, Primitive dtype) => XlaOp -> Nat -> io XlaOp
+argMax (MkXlaOp input) axis = do
+  opPtr <- primIO $ argMax input (xlaIdentifier {dtype}) (cast axis)
+  opPtr <- onCollectAny opPtr XlaOp.delete
+  pure (MkXlaOp opPtr)
+
+export
+argMin : (HasIO io, Primitive dtype) => XlaOp -> Nat -> io XlaOp
+argMin (MkXlaOp input) axis = do
+  opPtr <- primIO $ argMin input (xlaIdentifier {dtype}) (cast axis)
+  opPtr <- onCollectAny opPtr XlaOp.delete
+  pure (MkXlaOp opPtr)
diff --git a/src/Model/GaussianProcess.idr b/src/Model/GaussianProcess.idr
@@ -108,7 +108,7 @@ export
 ||| Fit the Gaussian process and noise to the specified data.
 export
 fit : ConjugateGPRegression features
-  -> (forall n . Tensor [n] F64 -> Optimizer $ Tensor [n] F64)
+  -> (forall n . Tensor [n] F64 -> Optimizer [n])
   -> Dataset features [1]
   -> ConjugateGPRegression features
 fit (MkConjugateGPR {p} mkPrior gpParams noise) optimizer (MkDataset x y) =
diff --git a/src/Optimize.idr b/src/Optimize.idr
@@ -16,6 +16,7 @@ limitations under the License.
 ||| This module contains definitions of function optimizers.
 module Optimize
 
+import Literal
 import Tensor
 
 ||| An `Optimizer` finds the value, in a `Tensor`-valued feature space, which (approximately)
@@ -28,22 +29,61 @@ import Tensor
 |||
 ||| @domain The type of the domain over which to find the optimizer.
 public export 0
-Optimizer : {default id 0 m : Type -> Type} -> (0 domain : Type) -> Type
-Optimizer a = (a -> m $ Tensor [] F64) -> m a
+Optimizer : Shape -> Type
+Optimizer domain = (Tensor domain F64 -> Tensor [] F64) -> Tensor domain F64
 
-||| Construct an `Optimizer` that implements grid search over a scalar feature space. Grid search
-||| approximates the optimum by evaluating the objective over a finite, evenly-spaced grid.
-|||
-||| **NOTE** This function is not yet implemented.
+-- naively, i'd like there to be just one optimizer, but is that possible, and practical?
+public export 0
+BatchOptimizer : Shape -> Type
+BatchOptimizer domain = ({n : _} -> Tensor (n :: domain) F64 -> Tensor [n] F64) -> Tensor domain F64
+
+||| Grid search over a scalar feature space. Grid search approximates the optimum by evaluating the
+||| objective over a finite, evenly-spaced grid.
 |||
 ||| @density The density of the grid.
 ||| @lower The lower (inclusive) bound of the grid.
 ||| @upper The upper (exclusive) bound of the grid.
 export
-gridSearch : (density : Tensor [d] U32) ->
+gridSearch : {d : _} ->
+             (density : Vect d Nat) ->
              (lower : Tensor [d] F64) ->
              (upper : Tensor [d] F64) ->
-             Optimizer (Tensor [d] F64)
+             BatchOptimizer [d]
+gridSearch {d=Z} _ _ _ _ = fromLiteral []
+gridSearch {d=S k} density lower upper f =
+  let densityAll : Nat
+      densityAll = product density
+
+      prodDims : Tensor [S k] U64 := fromLiteral $ cast $ scanr (*) 1 (tail density)
+      idxs = fromLiteral {shape=[densityAll]} $ cast $ Vect.range densityAll
+      densityTensor = broadcast $ fromLiteral {shape=[S k]} {dtype=U64} (cast density)
+      grid = broadcast {to=[densityAll, S k]} (expand 1 idxs)
+        `Tensor.div` broadcast {from=[S k]} (cast prodDims)
+        `Tensor.mod` densityTensor
+      gridRelative : Tensor [densityAll, S k] F64 = cast grid / cast densityTensor
+      points = with Tensor.(+)
+        broadcast lower + broadcast {to=[densityAll, S k]} (upper - lower) * gridRelative
+      idx = argmin 0 (f points)
+   in index 0 idx points
+
+||| If `xs` is a vector of exclusive upper bounds for a number of dimensions, this produces a list
+||| of all positions in the (higher-dimensional) grid. For example, `grid [3]` is `[[0], [1], [2]]`,
+||| and `grid [2, 3]` is
+||| ```
+||| [
+|||     [0, 0]
+|||   , [0, 1]
+|||   , [0, 2]
+|||   , [1, 0]
+|||   , [1, 1]
+|||   , [1, 2]
+||| ]
+||| ```
+export
+grid : (xs : Vect (S n) Nat) -> Vect (product xs) (Vect (S n) Nat)
+grid xs =
+  let prodDims = scanr (*) 1 (tail xs)
+   in map (\e => zipWith (\x, p => e `div` p `mod` x) xs prodDims) (range (product xs))
 
 ||| The limited-memory BFGS (L-BFGS) optimization tactic, see
 |||
@@ -58,4 +98,4 @@ gridSearch : (density : Tensor [d] U32) ->
 |||
 ||| @initialPoints The points from which to start optimization.
 export
-lbfgs : (initialPoints : Tensor [n] F64) -> Optimizer (Tensor [n] F64)
+lbfgs : (initialPoints : Tensor [n] F64) -> Optimizer [n]
diff --git a/src/Tensor.idr b/src/Tensor.idr
@@ -369,6 +369,16 @@ slice at (MkTensor expr) =
       size _ (DynamicSlice _ size') = size'
       size _ (DynamicIndex _) = 1
 
+namespace Dynamic
+  export
+  index :
+    Primitive dtype =>
+    (axis : Nat) ->
+    {auto 0 axisInBounds : InBounds axis shape} ->
+    Tensor [] U64 ->
+    Tensor shape dtype ->
+    Tensor (deleteAt axis shape) dtype
+
 ||| Concatenate two `Tensor`s along the specfied `axis`. For example,
 ||| `concat 0 (fromLiteral [[1, 2], [3, 4]]) (fromLiteral [[5, 6]])` and
 ||| `concat 1 (fromLiteral [[3], [6]]) fromLiteral ([[4, 5], [7, 8]])` are both
@@ -1161,6 +1171,28 @@ namespace Monoid
     Monoid (Tensor shape dtype) using Semigroup.Max where
       neutral = fill (- 1.0 / 0.0)
 
+export
+argmin :
+  (Primitive outType, Primitive.Num dtype) =>
+  (axis : Nat) ->
+  {auto 0 ok : InBounds axis shape} ->
+  Tensor shape dtype ->
+  Tensor [] outType
+
+export
+argmax :
+  (Primitive outType, Primitive.Num dtype) =>
+  (axis : Nat) ->
+  {auto 0 ok : InBounds axis shape} ->
+  Tensor shape dtype ->
+  Tensor [] outType
+
+export
+div : Primitive.Integral dtype => Tensor shape dtype -> Tensor shape dtype -> Tensor shape dtype
+
+export
+mod : Primitive.Integral dtype => Tensor shape dtype -> Tensor shape dtype -> Tensor shape dtype
+
 ---------------------------- other ----------------------------------
 
 ||| Cholesky decomposition. Computes the lower triangular matrix `L` from the symmetric, positive
diff --git a/src/Util.idr b/src/Util.idr
@@ -49,6 +49,13 @@ namespace Vect
     let lengthOK = lengthCorrect xs
      in rewrite sym lengthOK in zip (range (length xs)) (rewrite lengthOK in xs)
 
+  ||| Like `foldr`, but returns a vector of all intermediate accumulated states. The first
+  ||| state appears last in the result, and the last state appears first.
+  public export
+  scanr : (elem -> res -> res) -> res -> Vect len elem -> Vect (S len) res
+  scanr _ q [] = [q]
+  scanr f q (x :: xs) = let qs'@(q' :: _) = scanr f q xs in f x q' :: qs'
+
 namespace List
   ||| All numbers from `0` to `n - 1` inclusive, in increasing order.
   |||
diff --git a/test.ipkg b/test.ipkg
@@ -20,6 +20,7 @@ modules =
   Unit.Util.TestHashable,
   Unit.TestDistribution,
   Unit.TestLiteral,
+  Unit.TestOptimize,
   Unit.TestTensor,
   Unit.TestUtil,
 
diff --git a/test/Main.idr b/test/Main.idr
@@ -24,6 +24,7 @@ import Utils.TestComparison
 import Unit.Model.TestKernel
 import Unit.Util.TestHashable
 import Unit.TestDistribution
+import Unit.TestOptimize
 import Unit.TestTensor
 import Unit.TestLiteral
 import Unit.TestUtil
@@ -37,6 +38,7 @@ main = test [
     , Unit.TestUtil.group
     , Unit.TestLiteral.group
     , Unit.TestTensor.group
+    , Unit.TestOptimize.group
     , Unit.TestDistribution.group
     , Unit.Model.TestKernel.group
   ]
diff --git a/test/Unit/TestOptimize.idr b/test/Unit/TestOptimize.idr
@@ -0,0 +1,80 @@
+{--
+Copyright 2022 Joel Berkeley
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--}
+module Unit.TestOptimize
+
+import Literal
+import Optimize
+import Tensor
+
+import Utils.Cases
+import Utils.Comparison
+
+gridSearch : Property
+gridSearch = fixedProperty $ do
+  let lower = fromLiteral [-1.0, -1.0, -1.0]
+      upper = fromLiteral [1.0, 1.0, 1.0]
+
+      f : {n : _} -> Tensor [n, 3] F64 -> Tensor [n] F64
+      f x = reduce @{Sum} 1 (x ^ fill 2.0)
+
+  gridSearch [100, 100, 100] lower upper f ===# fromLiteral [0.0, 0.0, 0.0]
+
+grid' : Property
+grid' = fixedProperty $ do
+  grid [0] === []
+  grid [0, 2] === []
+  grid [1] === [[0]]
+  grid [3] === [[0], [1], [2]]
+  grid [2, 3] === [
+      [0, 0],
+      [0, 1],
+      [0, 2],
+      [1, 0],
+      [1, 1],
+      [1, 2]
+    ]
+  grid [2, 4, 3] === [
+      [0, 0, 0],
+      [0, 0, 1],
+      [0, 0, 2],
+      [0, 1, 0],
+      [0, 1, 1],
+      [0, 1, 2],
+      [0, 2, 0],
+      [0, 2, 1],
+      [0, 2, 2],
+      [0, 3, 0],
+      [0, 3, 1],
+      [0, 3, 2],
+      [1, 0, 0],
+      [1, 0, 1],
+      [1, 0, 2],
+      [1, 1, 0],
+      [1, 1, 1],
+      [1, 1, 2],
+      [1, 2, 0],
+      [1, 2, 1],
+      [1, 2, 2],
+      [1, 3, 0],
+      [1, 3, 1],
+      [1, 3, 2]
+    ]
+
+export covering
+group : Group
+group = MkGroup "Optimize" $ [
+    ("grid search", gridSearch)
+  ]