Garbage Can Compiling to Categories with Inspectable Lambdas

There are a couple kinds of functions that we can turn into totally inspectable data.

Linear functions can be reconstituted into a matrix if you give a basis of vectors.

Functions from enumerable types can be turned into a lookup table

Sufficiently polymorphic functions are another example though. forall a. a-> a is commonly known to only be id. The same goes for fst = forall a b. (a,b)->a and snd and swap and all the nesting of . These functions have exactly one inhabiting value (excluding internal churning and the possibility of going into an infinite loop).

So the type directly tells us the implementation

forall a. (a,a)->a is similar. It can only be fst or snd. Types that reuse a type parameter in the input can only be permutations.

I’ve been trying to find a way to take a written lambda and convert it to data automatically and have been having trouble.

An opaque type that we have hidden the contructors to is the same (T,T)->T can only be fst or snd specialized to T since we can’t possibly destruct on T.

We can figure out which one by giving a labeled example to that function and then inspecting a single output.  This gives the permutation and duplication that was done.

Similarly for T -> Either T T

Once we have this, we can (Hopefully) reinterpret this lambda in terms of a monoidal category.

 

{-# LANGUAGE RankNTypes, GADTs, FlexibleInstances, 
DataKinds, TypeFamilies,MultiParamTypeClasses,

FlexibleContexts,

ScopedTypeVariables,
FunctionalDependencies,

GADTs,
TypeOperators
 #-}

--AllowAmbiguousTypes,
-- OverlappingInstances,
-- UndecidableInstances,
import Data.Proxy
import Unsafe.Coerce


data Tag = Tag Int deriving Show

type family (MonoMorphTag a) :: * where
	MonoMorphTag (a,b) = (MonoMorphTag a, MonoMorphTag b)
	MonoMorphTag (a->b) = (MonoMorphTag a) -> (MonoMorphTag b)
	MonoMorphTag Int = Int
	MonoMorphTag [a] = [a]
	MonoMorphTag (a,b,c) = (a,b,c)
	MonoMorphTag (a,b,c,d) = (a,b,c,d)
	MonoMorphTag Double = Double
	MonoMorphTag () = ()
	MonoMorphTag Char = Char
	MonoMorphTag _ = Tag 

unsafeMonoTag :: a -> MonoMorphTag a
unsafeMonoTag = unsafeCoerce


-- unsafeTagLeaves :: forall a. MonoMorphTag a -> Tag
-- unsafeTagLeaves = unsafeCoerce

type T = Tag

class GetVal a where
  val :: Int -> Proxy a -> (a, Int)

instance GetVal Tag where
	val n _ = (Tag n, n+1)

instance (GetVal a, GetVal b) => GetVal (a,b) where
	val n _ = ((v1, v2), n'') where
						(v1 , n') = val n (Proxy :: Proxy a) 
						(v2 , n'') = val n' (Proxy :: Proxy b)

data TagTree a = Node (TagTree a) (TagTree a) | Leaf a deriving Show -- | Apply (k a b) TagTree

class Treeify a  b where
	treeify :: a -> TagTree b

instance Treeify Tag Tag where
	treeify x = Leaf x

instance (Treeify a Tag, Treeify b Tag) => Treeify (a,b) Tag where
	treeify (a,b) = Node (treeify a) (treeify b)

class MonoMorph a where
	type Mono a :: *

instance MonoMorph (a,b) where
	type Mono (a,b) = (Mono a, Mono b)
{-
instance MonoMorph (MonoMorphTag a) where
	type Mono a = Tag
-}

{-
-- Hmm I'm not sure how to monomorhpize this.
fst' :: (TagTup a) => (a, b) -> a
fst' = fst
-}
{-
class AutoCurry a b | a -> b where
	autocurry :: a -> b 

instance AutoCurry (a->b->Tag) ((a,b)->Tag) where
	autocurry f = uncurry f

instance AutoCurry c (a->c') => AutoCurry (b->c) ((b,a) -> c') where
	autocurry f = uncurry (\b -> autocurry (f b))
-}

data Monoidal = Dup | Mon Monoidal Monoidal | Par Monoidal Monoidal | Fst | Snd | Id | Comp Monoidal Monoidal deriving Show

data Monoidal' a b where
	Id' :: Monoidal' a a
	Dup' :: Monoidal' a (a,a)
	Fst' :: Monoidal' (a,b) a
	Snd' :: Monoidal' (a,b) b
	Comp' :: Monoidal' b c -> Monoidal' a b -> Monoidal' a c
	Mon' :: Monoidal' a a' -> Monoidal' b b' -> Monoidal' (a,b) (a',b')


data FunData = FunData {inval :: TagTree Tag, outval :: TagTree Tag} deriving Show

class TestIdea a b where
	works :: (a -> b) -> (a, b)

instance (GetVal a) => TestIdea a b where
	works f = (inval,  f inval) where inval = fst $ val 0 (Proxy :: Proxy a) -- fst $ val 0 (Proxy :: Proxy b)

fuckmyshitup :: (GetVal a, Treeify a Tag, Treeify b Tag) => (a -> b) -> FunData
fuckmyshitup f = let (a, b) = works f in FunData ((treeify a) :: TagTree Tag) ((treeify b):: TagTree Tag) 

ccc :: FunData -> Monoidal
ccc (FunData x (Node y z)) = Mon (ccc $ FunData x y) (ccc $ FunData x z)
ccc (FunData (Leaf _) (Leaf _)) = Id
ccc (FunData (Node x y) z@(Leaf (Tag n))) = if inleft n x then Comp Fst (ccc (FunData x z)) else Comp Snd (ccc (FunData y z))


ineither :: Int -> TagTree Tag -> Bool
ineither n (Node x y) = (ineither n x) || (ineither n y)
ineither n (Leaf (Tag n')) = n == n'

inleft :: Int -> TagTree Tag -> Bool
inleft n (Node l r) = ineither n l
inleft n (Leaf (Tag n')) = n == n'

inright :: Int -> TagTree Tag -> Bool
inright n (Node l r) = ineither n r
inright n (Leaf (Tag n')) = n == n'

-- Then we can compile to categories. Replacing the entire structure with dup and par and
-- fst, snd, etc.

-- Make an infix operator $'
--data Apply k a b c = Apply (FreeCat k a b) c
--type ($$) = Apply
-- No, don't need getval.
-- We'll just need it for treeify?
{-instance GetVal c => GetVal (Apply k a b c) where
	val n _ = where x, n' = val n Proxy c
-}
-- Another Option

data A
data B
data C

-- This is basically a lambda calculus
-- I could probably finitely enumerate through all the typeclasses for all the variables
 
example = Proxy :: Proxy ((A,B) -> B)

-- Hmm this would allow you to force duplicate input types though.

{-
class (Tagify a ~ a, Tagify b ~ b) => TestIdea a b where
	works :: (a -> b) -> (a, b)

instance (GetVal a) => TestIdea a b where
	works f = (inval,  f inval) where inval = fst $ val 0 (Proxy :: Proxy a) -- fst $ val 0 (Proxy :: Proxy b)
-}
--thisworks :: String
--thisworks = works id

-- fst . (val 0)

{-
instance (F a ~ flag, GetVal' flag a) => GetVal a where
  val = val' (Proxy :: Proxy flag)

class GetVal' (flag :: Bool) a where
  val' :: Proxy flag -> a -> Tagify a

instance (GetVal a, GetVal b) => GetVal' 'True (a,b) where
  val' _ (x,y) = (val x, val y)

instance GetVal' 'False a where
  val' _ x = Tag 0
-}

 

What about TH? Also the new quantified constraints extensions might be helpful?

 

 

Ok. A Different approach. This works much better to what I had in mind. you can write aribatrary (\(x,y,) -> (y,x)) tuple like lambdas and it will convert them to a category. I really had to hack around to get the thing to compile. Like that Pick typeclass, what the heck? Why can I get defaults values in type families but not in typeclasses?

It is all decidedly not typesafe. You can get totally nonsensical things to compile to something. However if you stick to lambdas, you’ll be ok. Maybe.

No on further review this does not work. I got tricked that the type seemed ok at a certain point.  A couple problems arise upon actual application. Since the idea is to drive the form based on the type variables upon actual application to something that has types of the same form it gets all screwed up. Also tons of instances are overlapping, although I think this is fixable.

Maybe what I need is existential types that can’t ever unify together accidentally.

A couple thought on typelevel programming principles:

  1. Typeclasses are hard to get default cases. You want to use type families if that is what you want
  2. Typeclasses need unique stuff to appear on the right hand side. Only 1 pattern should match. You might need to add extra parameters to match to which you can force on the left hand side of the instance
  3. ~ type equality is real useful

 

An alternative to using lambda is to use an explicit Proxy. The type variables are basically just as good for syntactic purposes (a touch more noisy).

{-# LANGUAGE RankNTypes, GADTs, FlexibleInstances, 
DataKinds, TypeFamilies,MultiParamTypeClasses,
ImpredicativeTypes,

FlexibleContexts,

ScopedTypeVariables,
FunctionalDependencies,
UndecidableInstances,
GADTs,
TypeOperators

 #-}

-- OverlappingInstances, NoImplicitPrelude
--
--UndecidableInstances,
--OverlappingInstances,

import Data.Type.Bool
import Data.Proxy
--import Control.Category
--import GHC.Base hiding (id,(.))

class IsId a where
	val :: a -> a
   -- toCat 
instance forall a. IsId (a -> a) where
    val _ = id

{-
class Catable f a b | f -> a,b where
	toCat :: forall k. CartesianCategory k => k a b

instance forall a b. Catable ((a,b)->a) (a,b) a where
	toCat = fst
-}
class Fst ab a | ab -> a where
--   toCat :: forall k. k ab a
instance forall a b. Fst (a,b) a



class Anything b where
   fun :: b -> b 

class Stringly a where
	stringly :: a -> String

instance (Stringly a, Stringly b) => Stringly (a,b) where
   stringly (x,y) = "(" ++ (stringly x) ++ "," ++ (stringly y) ++ ")"
{-
instance (Stringly a, Stringly b) => Stringly (a -> b) where
   stringly f = "(" ++ (stringly x) ++ "->" ++ (stringly y) ++ ")"
-}

class Category k where
	dot' :: k b c -> k a b -> k a c
	id' :: k a a

instance Category (->) where
	dot' = (.)
	id' = id

class Category k => CartesianCat k where
	fst' :: k (a,b) a
	snd' :: k (a,b) b
	join' :: k a b -> k a c -> k a (b,c) 


instance CartesianCat (->) where
	fst' = fst
	snd' = snd
	join' = join''

class Catable a b where
	toCat :: CartesianCat k => (a -> b) -> (k a b)
--  toCat (\x -> ((x,x),x))  . id

-- it's not INSANE to just list out a finite list of possibilities ((a,b),c) etc.


{-
data HeldApply k a b = HeldApply (k a b) a


($$) :: Category k => k a b -> b -> HeldApply k a b
f $$ x = HeldApply f

instance Catable a (HeldApply a b) where
	toCat 
Doesn't seem to work. We don't have an a get get the heldapply out of the function

Maybe we could pass in the approriate function as a a lambda \f x -> Apply f x


instance ExponentialCategory k where
	apply :: k (k a b, a) b  

-}


instance Catable a a where
	toCat _ = id'
-- why is this okay? should these be covered by the other cases?
instance Catable (a,b) a where
	toCat _ = fst'

instance Catable (a,b) b where
	toCat _ = snd'

dup x = (x,x)

{-
instance Catable a (a,a) where
	toCat _ = dup
-}
join'' f g x = (f x, g x)
-- iterates down through the output
instance (Catable a b, Catable a c) => Catable a (b,c) where
	toCat f = join' (toCat (fst . f)) (toCat (snd . f))
{-
instance (InL c (a,b), Catable a c) => Catable (a,b) c where
	toCat f = (toCat (f . fst))

instance (InR c (a,b), Catable a c) => Catable (a,b) c where
	toCat f = (toCat (f . snd))
-}
instance (Catable a c, Catable b c, Pick' c (a,b) (In a c)) => Catable (a,b) c where
	toCat f = pick' (Proxy :: Proxy (In a c))

{-
instance (Catable a c, Catable b c, Pick c (a,b) (In a c)) => Catable (a,b) c where
	toCat f = (toCat (pick (Proxy :: Proxy (In a c))))
-}
{-
class In a c where
	find :: c -> a

instance In a a
   find = id
instance In a b => In a (b,c)
   find = find . fst
instance In a c => In a (b,c)
   find = find . snd
-}


{-
type family (LorR a c) :: Nat where
	LorR a (a,_) = 1
	LorR a (_,a) = 2
	LorR a ((b,c),d) = (LorR a (b,c)) + (LorR a d)
	LorR a (d,(b,c)) = (LorR a (b,c)) + (LorR a d)
	LorR a _ = 0
-}

type family (In a c) :: Bool where
	In a a = 'True
	In a (a,_) = 'True
	In a (_,a) = 'True
	In a ((b,c),d) =  In a (b,c) || In a d 
	In a (d,(b,c)) = In a (b,c) || In a d 
	In a _ = 'False



{-
type Snd = forall a b. (a,b) -> b

type family (FstSnd a) :: * where
	FstSnd 'True = Snd
	FstSnd 'False = Snd
-}

class Pick a c (d :: Bool) where
	pick :: Proxy d -> c -> a

instance (Pick a (e,f) (In a e), (e,f) ~ b) => Pick a (b,c) 'True where
	pick _ = (pick (Proxy :: Proxy (In a e))) . fst

instance (Pick a (e,f) (In a e), (e,f) ~ c) => Pick a (b,c) 'False where
	pick _ = (pick (Proxy :: Proxy (In a e))) . snd

instance Pick a (a,b) 'True where
	pick _ = fst

instance Pick a (b,a) 'False where
	pick _ = snd

instance Pick a a d where
	pick _ = id

-- The bool is true if in the left branch
class Pick' a c (d :: Bool) where
	pick' :: CartesianCat k => Proxy d -> k c a

instance (Pick' a (e,f) (In a e), (e,f) ~ b) => Pick' a (b,c) 'True where
	pick' _ = dot' (pick' (Proxy :: Proxy (In a e))) fst'

instance (Pick' a (e,f) (In a e), (e,f) ~ c) => Pick' a (b,c) 'False where
	pick' _ = dot' (pick' (Proxy :: Proxy (In a e))) snd'

instance Pick' a (a,b) 'True where
	pick' _ = fst'

instance Pick' a (b,a) 'False where
	pick' _ = snd'

instance Pick' a a d where
	pick' _ = id'



{-
class InL a c where

instance InL a a
instance In a b => InL a (b,c)

class InR a c 
instance InR a a
instance In a b => InR a (c,b) 
-}

{-

instance (Catable a c, Catable b c) => Catable (a,b) c where
	toCat f = 

instance (Catable a c, Catable b c) => Catable a (b,c) where
	toCat f = 
-}

{-
instance (Stringly a, Stringly b, (a,b) ~ c, IsTup c ~ 'True) => Stringly c where
   stringly (x,y) = "(" ++ (stringly x) ++ "," ++ (stringly y) ++ ")"
-}

--instance (IsTup a ~ 'False, IsArr a ~ 'False) => Stringly a where
--	stringly _ = "_"



instance forall a. Anything a where
	fun = id

example :: a -> a
example = val id

type family (IsTup a) :: Bool where
	IsTup (a,b) = 'True
	IsTup _ = 'False

type family (IsArr a) :: Bool where
	IsArr (a->b) = 'True
	IsArr _ = 'False

 

 

Pytorch Trajectory Optimization

Trajectory optimization is cool. The idea is to take a dynamical problem as a big ole optimization problem, finding the best actions to take to achieve your goals or maximize a reward.

There are a couple of flavors of trajectory optimization (shooting methods, collocation methods) http://www.matthewpeterkelly.com/tutorials/trajectoryOptimization/

PyTorch gives a pretty low overhead extension to Numpy that also gives autodifferentiation. It is mainly intended as a neural network library, for which it has a number of facilities.

Gradient Descent is not the preferred method for these problems (According to Boyd’s Convex optimization course). Gradient Descent has shit convergence compared to newton iteration, but is very flexible and easy to implement.

In addition, using a normal ODE solver from Scipy would be much more stable, but it would require cleverness to have the same code work for both scipy and the torch parts. So screw it.

One nicety of this approach is that we don’t even have to have our derivatives solved for. They could be all tied up in a

I thought that maybe I could just weight the dynamics cost enough to have it require the dynamics be satisfied, but that did not seem to work. Maybe with more fiddling? On further review my code had massive bugs in it. I’m not sure that the dynamics cost version wouldn’t work, but the Lagrange multiplier method seems to work well and makes sense too.

In this formulation, we can also train some kind of parametrized controller function f_w(x) by sampling some random initial starting conditions (or even dynamical parameters like mass and length etc, or noise forces). This is quite nice.

Additional bits that may be nice: Backtracking line search, logarithmic potential for inequalities, I wonder if a symplectic style interleaving of position and momentum might be nice even for this global case. Should definitely just tie up all the vars into a single x. Can we use a lagrangian or hamiltonian and then have pytorch differentiate that? It may in fact be nice to use some combinator to be able to hand the same function to ODEInt for a couple reasons (getting good initilizations  of the path for example).

For a simple system, I’m using \dot{x}=v , \dot{v}=f , where you get to control f at every time point and x is starting at 0 and wants to get to 1. I’m using a simple scheme of finite difference in time for the time derivative. x and v are defined at t and f, lx, lv are defined at the half time steps t + \frac{1}{2}. You need at least two time steps to get a derivative. I’m adding a square cost to the force, otherwise it would just get a huge force. lx and lv are Lagrange multipliers enforcing the equations of motion at each time step

Here was an initial pass (just here for for historical reasons, look at the updated one below. This one does not work as is)

 

import torch

batch = 1
N = 10
T = 5
dt = T/N

x = torch.zeros(batch,N, requires_grad=True)

#print(x)

v = torch.zeros(batch, N, requires_grad=True)

f = torch.zeros(batch, N-1, requires_grad=True)


def calc_loss(x,v,f):

	delx = (x[:,1:] - x[:, :-1]) / dt
	delv = (v[:,1:] - v[:, :-1]) / dt

	xbar = (x[:,1:] + x[:, :-1]) / 2
	vbar = (v[:,1:] + v[:, :-1]) / 2

	dxdt = vbar
	dvdt = f


	xres = xbar - dxdt
	vres = vbar - dvdt

	dyn_err = torch.sum(torch.abs(xres) + torch.abs(vres), dim=1) #torch.sum(xres**2 + vres**2, dim=1) # + Abs of same thing?


	reward = torch.sum((xbar-1)**2 + f**2, dim=1)


	total_cost = 100 * dyn_err + reward

	return total_cost

#print(x.grad)
#print(v.grad)
#print(f.grad)

import torch.optim as optim
'''
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update
'''

#Could interleave an ODE solve step - stinks that I have to write dyanmics twice
#Or interleave a sepearate dynamic solving
# Or could use an adaptive line search. Backtracking
# Goal is to get dyn_err quite small



learning_rate = 0.001
for i in range(40):
	total_cost=calc_loss(x,v,f)
	#total_cost.zero_grad()
	total_cost.backward()
	while dyn_loss > 0.01:
		dyn_loss.backward()
		with torch.no_grad():
			learning_rate = dyn_loss / (torch.norm(x.grad[:,1:]) + (torch.norm(v.grad[:,1:])
			x[:,1:] -= learning_rate * x.grad[:,1:] # Do not change Starting conditions
			v[:,1:] -= learning_rate * v.grad[:,1:]
	reward.backward()
	with torch.no_grad():
		f -= learning_rate * f.grad

print(x)
print(v)
print(f)

goofed up a couple things (inlcuding my xres making no sense. You need to explicility zero gradients. Pretty annoying). Lagrange multiplier method makes total sense.

Could we use a Hamiltonian and use autograd to derive equations of motion? Seems plausible and convenient.

Can I make a custom pytorch layer for sparse Hessians? The data oriented viewpoint would have you pump the gradient and hessian backward. Or could you automatically build an H-matrix structure for the hessian of convnets?

Put a neural controller in there. Each batch could have randomized parameters, noise, and initial conditions.

Is rebuilding total_cost every time bad?

import torch

batch = 1
N = 20
T = 5
dt = T/N

x = torch.zeros(batch,N, requires_grad=True)

#print(x)

v = torch.zeros(batch, N, requires_grad=True)

f = torch.zeros(batch, N-1, requires_grad=True)

lx = torch.zeros(batch, N-1, requires_grad=True)

lv = torch.zeros(batch, N-1, requires_grad=True)

'''
class Vars():
	def __init__(self, N=10):
		self.data = torch.zeros(batch, N, 2)
		self.data1 = torch.zeros(batch, N-1, 3)
		self.lx = self.data1[:,:,0]
		self.lv = self.data1[:,:,1]
		self.f = self.data1[:,:,2]
		self.x = self.data[:,:,0]
		self.v = self.data[:,:,1]
'''
def calc_loss(x,v,f, lx, lv):
        #finite difference derivative
	delx = (x[:,1:] - x[:, :-1]) / dt
	delv = (v[:,1:] - v[:, :-1]) / dt
        #average at half time step
	xbar = (x[:,1:] + x[:, :-1]) / 2
	vbar = (v[:,1:] + v[:, :-1]) / 2

	dxdt = vbar
	dvdt = f - xbar

        #residual of dynamics (want these numbers to be zeor)
	xres = delx - dxdt
	vres = delv - dvdt

	#dyn_err = torch.sum(torch.abs(xres) + torch.abs(vres), dim=1) #torch.sum(xres**2 + vres**2, dim=1) # + Abs of same thing?

	lagrange_mult = torch.sum(lx * xres + lv * vres, dim=1)


	cost = torch.sum( torch.abs(x-1), dim=1) + torch.sum( f**2, dim=1) #+ 
	#cost = torch.sum((x-1)**2, dim=1)


	total_cost =   cost + lagrange_mult  #100 * dyn_err + reward

	return total_cost

#print(x.grad)
#print(v.grad)
#print(f.grad)

import torch.optim as optim
'''
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update
'''

#Could interleave an ODE solve step - stinks that I have to write dyanmics twice
#Or interleave a sepearate dynamic solving
# Or could use an adaptive line search. Backtracking
# Goal is to get dyn_err quite small


'''
learning_rate = 0.001
for i in range(40):
	total_cost=calc_loss(x,v,f)
	#total_cost.zero_grad()
	total_cost.backward()
	while dyn_loss > 0.01:
		dyn_loss.backward()
		with torch.no_grad():
			learning_rate = dyn_loss / (torch.norm(x.grad[:,1:]) + (torch.norm(v.grad[:,1:])
			x[:,1:] -= learning_rate * x.grad[:,1:] # Do not change Starting conditions
			v[:,1:] -= learning_rate * v.grad[:,1:]
	reward.backward()
	with torch.no_grad():
		f -= learning_rate * f.grad
'''
learning_rate = 0.001
for i in range(10000):
	total_cost = calc_loss(x,v,f, lx, lv)
	print(total_cost)
	#print(x)
	#total_cost.zero_grad()

	total_cost.backward()
	with torch.no_grad():
		#print(f.grad)
		#print(lx.grad)
		#print(x.grad)
		#print(v.grad)
		f -= learning_rate * f.grad
		lx += learning_rate * lx.grad
		lv += learning_rate * lv.grad
		#print(x.grad[:,1:])
		x[:,1:] -= learning_rate * x.grad[:,1:] # Do not change Starting conditions
		v[:,1:] -= learning_rate * v.grad[:,1:]
	x.grad.data.zero_()
	v.grad.data.zero_()
	f.grad.data.zero_()
	lx.grad.data.zero_()
	lv.grad.data.zero_()

print(x)
print(v)
print(f)