BackwardSpec
junji hashimoto edited this page May 31, 2019
·
11 revisions
BackwardSpec of ffi-exerimental
tests ad of libtorch
on haskell.
The code is https://github.com/hasktorch/ffi-experimental/blob/master/ffi/test/BackwardSpec.hs#L86-L91.
The spec tests that partial differentiation(d c_i / d a_i) becomes 1. The pseudo codes is blow.
a: A variable of 2x2 tensor. It is initialized by 1.
b: A variable of 2x2 tensor. It is initialized by random number.
c = a + b
d c_i / d a_i: Partial differentiation for each of `a`, `i` is a index.
d c_i / d a_i becomes 1.
...
module BackwardSpec (spec) where
...
-- This module(ATen.Managed.Native) includes factory methods and other methods for at::namespace.
import qualified ATen.Managed.Native as A
-- This module(Torch.Managed.Native) includes only factory methods for torch::namespace.
import Torch.Managed.Native
-- The following is written in https://pytorch.org/cppdocs/.
-- The at::Tensor class in ATen is not differentiable by default.
-- To add the differentiability of tensors the autograd API provides,
-- you must use tensor factory functions from the torch:: namespace instead of the at namespace.
-- For example, while a tensor created with at::ones will not be differentiable,
-- a tensor created with torch::ones will be.
-- IntArray is a dimension of Tensor.
intArray :: [Int64] -> IO (ForeignPtr IntArray)
intArray dims = do
ary <- newIntArray
forM_ dims $ intArray_push_back_l ary
return ary
...
-- ffi's function uses arguments without IO monad and return-value
-- with IO monad e.g. `a -> b -> IO c`.
-- As it is difficult to combine ffi sometime,
-- The following functions provides
-- the function of having IO monad' arguments (e.g.`IO a -> IO b -> IO c`)
-- from ffi's function (e.g.`a -> b -> IO c`).
ap1 fn a0 :: (b -> IO a) -> IO b -> IO a
ap1 fn a0 = join $ fn <$> a0
-- This is the same as follows.
-- ap1 fn a0 = do
-- a <- a0
-- fn a
ap1 fn a0 :: (b -> c -> IO a) -> IO b -> IO c-> IO a
ap2 fn a0 a1 = join $ fn <$> a0 <*> a1
ap3 fn a0 a1 a2 = join $ fn <$> a0 <*> a1 <*> a2
ap4 fn a0 a1 a2 a3 = join $ fn <$> a0 <*> a1 <*> a2 <*> a3
-- Take elements of tensor by index.
at1 tensor i0 = tensor__at__l tensor i0
at2 tensor i0 i1 = ap2 tensor__at__l (at1 tensor i0) (pure i1)
at3 tensor i0 i1 i2 = ap2 tensor__at__l (at2 tensor i0 i1) (pure i2)
-- Wrapper of factory method(e.g. ones, zeros of tensor-factory-method)
new' fn dsize dtype = ap2 fn (intArray dsize) (options kCPU dtype)
-- add_tts is a function of `Tensor + Tensor`.
-- The last argument of `newScalar_d 1` is just default number.(I do not know what it is.)
-- add_tss is a function of `Tensor + Scalar`.
add' a b :: ForeignPtr Tensor -> ForeignPtr Tensor -> IO (ForeignPtr Tensor)
add' a b = join $ A.add_tts <$> pure a <*> pure b <*> newScalar_d 1
addM' a b :: IO (ForeignPtr Tensor) -> IO (ForeignPtr Tensor) -> IO (ForeignPtr Tensor)
addM' a b = join $ A.add_tts <$> a <*> b <*> newScalar_d 1
add_s' a b :: ForeignPtr Tensor -> ForeignPtr Scalar -> IO (ForeignPtr Tensor)
add_s' a b = join $ A.add_tss <$> pure a <*> pure b <*> newScalar_d 1
addM_s' a b :: IO (ForeignPtr Tensor) -> IO (ForeignPtr Scalar) -> IO (ForeignPtr Tensor)
addM_s' a b = join $ A.add_tss <$> a <*> b <*> newScalar_d 1
-- tensorOptions_requires_grad_b sets a flag of calculating gradient.
options :: DeviceType -> ScalarType -> IO (ForeignPtr TensorOptions)
options dtype stype = ap2 tensorOptions_requires_grad_b (ap2 tensorOptions_dtype_s (device_D dtype) (pure stype)) (pure 1)
spec :: Spec
spec = forM_ [
(kFloat,"float"),
(kDouble,"double")
] $ \(dtype,dtype_str) -> describe ("BasicSpec:" <> dtype_str) $ do
-- This test of haskell is the same as following c++'s code.
-- {
-- torch::Tensor a = torch::ones({2, 2}, torch::requires_grad());
-- torch::Tensor b = torch::randn({2, 2});
-- auto c = a + b;
-- std::cout << a << std::endl;
-- std::cout << b << std::endl;
-- std::cout << c << std::endl;
-- c.backward();
-- auto aa = a.grad();
-- for(int i=0;i<2;i++)
-- for(int j=0;j<2;j++)
-- assert(aa[i][j]==1);
-- }
it "Backward" $ do
a <- new' ones_lo [2,2] dtype
print "--a--"
forM_ [0..1] $ \i ->
forM_ [0..1] $ \j ->
at2 a i j >>= tensor_item_double >>= print
b <- new' randn_lo [2,2] dtype
print "--b--"
forM_ [0..1] $ \i ->
forM_ [0..1] $ \j ->
at2 b i j >>= tensor_item_double >>= print
print "--c--"
c <- add' a b
forM_ [0..1] $ \i ->
forM_ [0..1] $ \j ->
at2 c i j >>= tensor_item_double >>= print
tensor_print c
-- Run backward-calculation.
tensor_backward c
-- Get gradient.
a' <- tensor_grad a
print "--a'--"
forM_ [0..1] $ \i ->
forM_ [0..1] $ \j ->
-- Verify the gradient values are all 1.
(at2 a' i j >>= tensor_item_double) `shouldReturn` 1