Vomitting Out Some Machine Learning with Torch

Don’t know anything about Lua or Torch, and not so much about machine learning. Little project to get going.

Torch is to Lua what Numpy is to python. Never done any lua before, although for a while it was the main language on the esp8266. Torch seems like a popular base for machine learning in competition with theano and tensorflow. Lua is like if python and javascript has a slightly retarded baby.

Thought I’d give a simple tic tac toe playing guy a go. The structure is play a bunch of totally random games, collect up all the winning games. Then the problem is a classification problem where the categories are the next move (1-9).

Then used the stock nn neural network package to learn on it. Had a tough time finding clear docs. I am unimpressed.

Then use trained neural network to play against the random component.

The win stats increased from ~28% to ~45% (with some fluctuations run to run of a couple percent). Not bad. Especially since going second is disadvantageous. Okay, as I wrote that I realized it’s easy to try flipping that. Going first the stats go from 59% to 69%.

Hmmm. Maybe I should look at draws?

Also, a smart strategy for the moves would be to use the suggested moves according to their rank, not using the top suggested move then if that is invalid using a random move.

math.randomseed(os.time())

function won(board,x)
  --diagonals
  if board[1][1] == x and board[2][2]==x and board[3][3] == x then
    return true
  end

  if board[1][3] == x and board[2][2]==x and board[3][1] == x then
    return true
  end

--rows
  for i=1,3 do
    if board[i][3] == x and board[i][2]==x and board[i][1] == x then
      return true
    end
  end
--columns
  for i=1,3 do
    if board[1][i] == x and board[2][i]==x and board[3][i] == x then
      return true
    end
  end

  return false

end


function full(board)
  for i=1,3 do
    for j=1,3 do
      if board[i][j] == '' then
        return false
      end
    end
  end

  return true
end


function mapBoardtoNum(board)
  newboard = {{},{},{}}
  for i=1,3 do
    for j=1,3 do
      if board[i][j] == 'x' then
        newboard[i][j] = 1
      end
      if board[i][j] == '' then
        newboard[i][j] = 0
      end
      if board[i][j] == 'o' then
        newboard[i][j] = -1
      end
    end
  end
  return newboard
end

--[[
print(won({
{'x','',''},
{'x','o',''},
{'x','o',''}}, 'o'))
]]


mymoves = {}
myboards = {}
wins = 0
gamenum = 10000
for k=1,gamenum do

  board = {{'','',''},
  {'','',''},
  {'','',''}}

  move = 'o'

  game = {}
  choices = {}

  turn = 1

  while not won(board,'x') and not won(board,'o') and not full(board) do

    if move == 'x' then
      move = 'o'
    elseif move == 'o' then
      move = 'x'
    end

    repeat
      i = math.random(3)
      j = math.random(3)
    until board[i][j] == ''

    if move == 'x' then
      game[turn] = mapBoardtoNum(board)
      choices[turn] = i -1 + 3 * (j-1) +1
      turn = turn + 1
  end
    board[i][j] = move

  end

  if won(board,'x') then
    wins = wins +1
    for i = 1,#game do
      table.insert(myboards, game[i])
      table.insert(mymoves, choices[i])
    end
  end

end

--print(mymoves)
--print(#myboards)
print('won ' .. wins ..' out of ' .. gamenum)

training = {}
--[[
training.data = torch.Tensor(myboards)
training.labels = torch.Tensor(mymoves)
training.size = function() return (#mymoves) end
]]

training.size = function() return (#mymoves) end
for i=1,training:size() do
  training[i] = {torch.Tensor(myboards[i]), torch.Tensor({mymoves[i]})}
end

ninputs = 9
nhiddens = 30
noutputs = 9
require 'nn'
model = nn.Sequential()
model:add(nn.Reshape(ninputs))
model:add(nn.Linear(ninputs,nhiddens))
model:add(nn.Tanh())
model:add(nn.Linear(nhiddens,noutputs))
model:add( nn.LogSoftMax() )

criterion = nn.ClassNLLCriterion()


trainer = nn.StochasticGradient(model, criterion)
trainer.learningRate = 0.01
trainer.maxIteration = 7

trainer:train(training)




--[[
print(board)
print(won(board,'o'))
print(won(board,'x'))
print(choices)
print(game[1])
]]


board = {
{'x','o',''},
{'x','o',''},
{'x','o',''}
}

logprobs= model:forward(torch.Tensor(mapBoardtoNum(board)))
print(logprobs)
max, pred =torch.max(logprobs,1)
print(max)
print(pred)
--[[
-- Basic format
{
{'x','o',''},
{'x','o',''},
{'x','o',''}
}
]]

print('random won ' .. wins ..' out of ' .. gamenum)


mymoves = {}
myboards = {}
wins = 0
for k=1,gamenum do

  board = {{'','',''},
  {'','',''},
  {'','',''}}

  move = 'o'

  game = {}
  choices = {}

  turn = 1

  while not won(board,'x') and not won(board,'o') and not full(board) do
    --print('yo')
    if move == 'x' then
      move = 'o'
      repeat
        i = math.random(3)
        j = math.random(3)
      until board[i][j] == ''
      board[i][j] = move


    elseif move == 'o' then
      move = 'x'
      --print(board)
      --print(torch.Tensor(mapBoardtoNum(board)))
      local probs = model:forward(torch.Tensor(mapBoardtoNum(board)))
      maxs, pred = torch.max(probs,1)
      --i -1 + 3 * (j-1) +1
      pred = pred - 1
      i = pred % 3 + 1
      j = (pred - pred%3) / 3 + 1
      i = i[1]
      j = j[1]
      --print(i[1])
      --print(j)
      --print(board)
      --print(board[i][j])
      if board[i][j] == '' then
        board[i][j] = move
      else
          repeat
            i = math.random(3)
            j = math.random(3)
          until board[i][j] == ''
          board[i][j] = move
      end



    end

  end

  if won(board,'x') then
    wins = wins +1
  end

end

print('learned won ' .. wins ..' out of ' .. gamenum)

 

Leave a Reply

Your email address will not be published. Required fields are marked *