Vomitting Out Some Machine Learning with Torch

Don’t know anything about Lua or Torch, and not so much about machine learning. Little project to get going.

Torch is to Lua what Numpy is to python. Never done any lua before, although for a while it was the main language on the esp8266. Torch seems like a popular base for machine learning in competition with theano and tensorflow. Lua is like if python and javascript has a slightly retarded baby.

Thought I’d give a simple tic tac toe playing guy a go. The structure is play a bunch of totally random games, collect up all the winning games. Then the problem is a classification problem where the categories are the next move (1-9).

Then used the stock nn neural network package to learn on it. Had a tough time finding clear docs. I am unimpressed.

Then use trained neural network to play against the random component.

The win stats increased from ~28% to ~45% (with some fluctuations run to run of a couple percent). Not bad. Especially since going second is disadvantageous. Okay, as I wrote that I realized it’s easy to try flipping that. Going first the stats go from 59% to 69%.

Hmmm. Maybe I should look at draws?

Also, a smart strategy for the moves would be to use the suggested moves according to their rank, not using the top suggested move then if that is invalid using a random move.

    math.randomseed(os.time())
    
    function won(board,x)
      --diagonals
      if board[1][1] == x and board[2][2]==x and board[3][3] == x then
        return true
      end
    
      if board[1][3] == x and board[2][2]==x and board[3][1] == x then
        return true
      end
    
    --rows
      for i=1,3 do
        if board[i][3] == x and board[i][2]==x and board[i][1] == x then
          return true
        end
      end
    --columns
      for i=1,3 do
        if board[1][i] == x and board[2][i]==x and board[3][i] == x then
          return true
        end
      end
    
      return false
    
    end
    
    
    function full(board)
      for i=1,3 do
        for j=1,3 do
          if board[i][j] == '' then
            return false
          end
        end
      end
    
      return true
    end
    
    
    function mapBoardtoNum(board)
      newboard = {{},{},{}}
      for i=1,3 do
        for j=1,3 do
          if board[i][j] == 'x' then
            newboard[i][j] = 1
          end
          if board[i][j] == '' then
            newboard[i][j] = 0
          end
          if board[i][j] == 'o' then
            newboard[i][j] = -1
          end
        end
      end
      return newboard
    end
    
    --[[
    print(won({
    {'x','',''},
    {'x','o',''},
    {'x','o',''}}, 'o'))
    ]]
    
    
    mymoves = {}
    myboards = {}
    wins = 0
    gamenum = 10000
    for k=1,gamenum do
    
      board = {{'','',''},
      {'','',''},
      {'','',''}}
    
      move = 'o'
    
      game = {}
      choices = {}
    
      turn = 1
    
      while not won(board,'x') and not won(board,'o') and not full(board) do
    
        if move == 'x' then
          move = 'o'
        elseif move == 'o' then
          move = 'x'
        end
    
        repeat
          i = math.random(3)
          j = math.random(3)
        until board[i][j] == ''
    
        if move == 'x' then
          game[turn] = mapBoardtoNum(board)
          choices[turn] = i -1 + 3 * (j-1) +1
          turn = turn + 1
      end
        board[i][j] = move
    
      end
    
      if won(board,'x') then
        wins = wins +1
        for i = 1,#game do
          table.insert(myboards, game[i])
          table.insert(mymoves, choices[i])
        end
      end
    
    end
    
    --print(mymoves)
    --print(#myboards)
    print('won ' .. wins ..' out of ' .. gamenum)
    
    training = {}
    --[[
    training.data = torch.Tensor(myboards)
    training.labels = torch.Tensor(mymoves)
    training.size = function() return (#mymoves) end
    ]]
    
    training.size = function() return (#mymoves) end
    for i=1,training:size() do
      training[i] = {torch.Tensor(myboards[i]), torch.Tensor({mymoves[i]})}
    end
    
    ninputs = 9
    nhiddens = 30
    noutputs = 9
    require 'nn'
    model = nn.Sequential()
    model:add(nn.Reshape(ninputs))
    model:add(nn.Linear(ninputs,nhiddens))
    model:add(nn.Tanh())
    model:add(nn.Linear(nhiddens,noutputs))
    model:add( nn.LogSoftMax() )
    
    criterion = nn.ClassNLLCriterion()
    
    
    trainer = nn.StochasticGradient(model, criterion)
    trainer.learningRate = 0.01
    trainer.maxIteration = 7
    
    trainer:train(training)
    
    
    
    
    --[[
    print(board)
    print(won(board,'o'))
    print(won(board,'x'))
    print(choices)
    print(game[1])
    ]]
    
    
    board = {
    {'x','o',''},
    {'x','o',''},
    {'x','o',''}
    }
    
    logprobs= model:forward(torch.Tensor(mapBoardtoNum(board)))
    print(logprobs)
    max, pred =torch.max(logprobs,1)
    print(max)
    print(pred)
    --[[
    -- Basic format
    {
    {'x','o',''},
    {'x','o',''},
    {'x','o',''}
    }
    ]]
    
    print('random won ' .. wins ..' out of ' .. gamenum)
    
    
    mymoves = {}
    myboards = {}
    wins = 0
    for k=1,gamenum do
    
      board = {{'','',''},
      {'','',''},
      {'','',''}}
    
      move = 'o'
    
      game = {}
      choices = {}
    
      turn = 1
    
      while not won(board,'x') and not won(board,'o') and not full(board) do
        --print('yo')
        if move == 'x' then
          move = 'o'
          repeat
            i = math.random(3)
            j = math.random(3)
          until board[i][j] == ''
          board[i][j] = move
    
    
        elseif move == 'o' then
          move = 'x'
          --print(board)
          --print(torch.Tensor(mapBoardtoNum(board)))
          local probs = model:forward(torch.Tensor(mapBoardtoNum(board)))
          maxs, pred = torch.max(probs,1)
          --i -1 + 3 * (j-1) +1
          pred = pred - 1
          i = pred % 3 + 1
          j = (pred - pred%3) / 3 + 1
          i = i[1]
          j = j[1]
          --print(i[1])
          --print(j)
          --print(board)
          --print(board[i][j])
          if board[i][j] == '' then
            board[i][j] = move
          else
              repeat
                i = math.random(3)
                j = math.random(3)
              until board[i][j] == ''
              board[i][j] = move
          end
    
    
    
        end
    
      end
    
      if won(board,'x') then
        wins = wins +1
      end
    
    end
    
    print('learned won ' .. wins ..' out of ' .. gamenum)