Skip to content

BackwardSpec

junji hashimoto edited this page May 31, 2019 · 11 revisions

BackwardSpec of ffi-exerimental tests ad of libtorch on haskell. The code is https://github.com/hasktorch/ffi-experimental/blob/master/ffi/test/BackwardSpec.hs#L86-L91.

The spec tests that partial differentiation(d c_i / d a_i) becomes 1. The pseudo codes is blow.

 a: A variable of 2x2 tensor. It is initialized by 1. 
 b: A variable of 2x2 tensor. It is initialized by random number.
 c = a + b
 d c_i / d a_i: Partial differentiation for each of `a`, `i` is a index.
 d c_i / d a_i becomes 1.

The code with comments.

...
module BackwardSpec (spec) where

...

-- This module(ATen.Managed.Native) includes factory methods and other methods for at::namespace.
import qualified ATen.Managed.Native as A

-- This module(Torch.Managed.Native) includes only factory methods for torch::namespace.
import Torch.Managed.Native
-- The following is written in https://pytorch.org/cppdocs/.
--   The at::Tensor class in ATen is not differentiable by default. 
--   To add the differentiability of tensors the autograd API provides, 
--   you must use tensor factory functions from the torch:: namespace instead of the at namespace.
--   For example, while a tensor created with at::ones will not be differentiable, 
--   a tensor created with torch::ones will be.

-- IntArray is a dimension of Tensor. 
intArray :: [Int64] -> IO (ForeignPtr IntArray)
intArray dims = do
  ary <- newIntArray
  forM_ dims $ intArray_push_back_l ary
  return ary

...

-- ffi's function uses arguments without IO monad and return-value 
-- with IO monad e.g. `a -> b -> IO c`.
-- As it is difficult to combine ffi sometime, 
-- The following functions provides 
--  the function of having IO monad' arguments (e.g.`IO a -> IO b -> IO c`) 
--  from ffi's function (e.g.`a -> b -> IO c`).
ap1 fn a0  :: (b -> IO a) -> IO b -> IO a
ap1 fn a0  = join $ fn <$> a0
-- This is the same as follows.
-- ap1 fn a0 = do
--   a <- a0
--   fn a

ap1 fn a0  :: (b -> c -> IO a) -> IO b -> IO c-> IO a
ap2 fn a0 a1  = join $ fn <$> a0 <*> a1
ap3 fn a0 a1 a2  = join $ fn <$> a0 <*> a1 <*> a2
ap4 fn a0 a1 a2 a3 = join $ fn <$> a0 <*> a1 <*> a2 <*> a3

-- Take elements of tensor by index.
at1 tensor i0 = tensor__at__l tensor i0
at2 tensor i0 i1 = ap2 tensor__at__l (at1 tensor i0) (pure i1)
at3 tensor i0 i1 i2 = ap2 tensor__at__l (at2 tensor i0 i1) (pure i2)

-- Wrapper of factory method(e.g. ones, zeros of tensor-factory-method)
new' fn dsize dtype = ap2 fn (intArray dsize) (options kCPU dtype)

-- add_tts is a function of `Tensor + Tensor`. 
-- The last argument of `newScalar_d 1` is just default number.(I do not know what it is.)
-- add_tss is a function of `Tensor + Scalar`.
add' a b :: ForeignPtr Tensor -> ForeignPtr Tensor -> IO (ForeignPtr Tensor)
add' a b = join $ A.add_tts <$> pure a <*> pure b <*> newScalar_d 1

addM' a b :: IO (ForeignPtr Tensor) -> IO (ForeignPtr Tensor) -> IO (ForeignPtr Tensor)
addM' a b = join $ A.add_tts <$> a <*> b <*> newScalar_d 1

add_s' a b :: ForeignPtr Tensor -> ForeignPtr Scalar -> IO (ForeignPtr Tensor)
add_s' a b = join $ A.add_tss <$> pure a <*> pure b <*> newScalar_d 1

addM_s' a b :: IO (ForeignPtr Tensor) -> IO (ForeignPtr Scalar) -> IO (ForeignPtr Tensor)
addM_s' a b = join $ A.add_tss <$> a <*> b <*> newScalar_d 1

-- tensorOptions_requires_grad_b sets a flag of calculating gradient.
options :: DeviceType -> ScalarType -> IO (ForeignPtr TensorOptions)
options dtype stype = ap2 tensorOptions_requires_grad_b (ap2 tensorOptions_dtype_s (device_D dtype) (pure stype)) (pure 1)

spec :: Spec
spec = forM_ [
  (kFloat,"float"),
  (kDouble,"double")
  ] $ \(dtype,dtype_str) -> describe ("BasicSpec:" <> dtype_str) $ do
--   This test of haskell is the same as following c++'s code.
--   {
--        torch::Tensor a = torch::ones({2, 2}, torch::requires_grad());
--        torch::Tensor b = torch::randn({2, 2});
--        auto c = a + b;
--        std::cout << a << std::endl;
--        std::cout << b << std::endl;
--        std::cout << c << std::endl;
--        c.backward();
--        auto aa = a.grad();
--        for(int i=0;i<2;i++)
--          for(int j=0;j<2;j++)
--            assert(aa[i][j]==1);
--   }
  it "Backward" $ do
    a <- new' ones_lo [2,2] dtype
    print "--a--"
    forM_ [0..1] $ \i ->
      forM_ [0..1] $ \j ->
        at2 a i j >>= tensor_item_double >>= print
    b <- new' randn_lo [2,2] dtype
    print "--b--"
    forM_ [0..1] $ \i ->
      forM_ [0..1] $ \j ->
        at2 b i j >>= tensor_item_double >>= print
    print "--c--"
    c <- add' a b
    forM_ [0..1] $ \i ->
      forM_ [0..1] $ \j ->
        at2 c i j >>= tensor_item_double >>= print
    tensor_print c
    -- Run backward-calculation.
    tensor_backward c
    -- Get gradient.
    a' <- tensor_grad a
    print "--a'--"
    forM_ [0..1] $ \i ->
      forM_ [0..1] $ \j ->
        -- Verify the gradient values are all 1.
        (at2 a' i j >>= tensor_item_double) `shouldReturn` 1

FAQ