# Vomitting Out Some Machine Learning with Torch

Don’t know anything about Lua or Torch, and not so much about machine learning. Little project to get going.

Torch is to Lua what Numpy is to python. Never done any lua before, although for a while it was the main language on the esp8266. Torch seems like a popular base for machine learning in competition with theano and tensorflow. Lua is like if python and javascript has a slightly retarded baby.

Thought I’d give a simple tic tac toe playing guy a go. The structure is play a bunch of totally random games, collect up all the winning games. Then the problem is a classification problem where the categories are the next move (1-9).

Then used the stock nn neural network package to learn on it. Had a tough time finding clear docs. I am unimpressed.

Then use trained neural network to play against the random component.

The win stats increased from ~28% to ~45% (with some fluctuations run to run of a couple percent). Not bad. Especially since going second is disadvantageous. Okay, as I wrote that I realized it’s easy to try flipping that. Going first the stats go from 59% to 69%.

Hmmm. Maybe I should look at draws?

Also, a smart strategy for the moves would be to use the suggested moves according to their rank, not using the top suggested move then if that is invalid using a random move.

```math.randomseed(os.time())

function won(board,x)
--diagonals
if board == x and board==x and board == x then
return true
end

if board == x and board==x and board == x then
return true
end

--rows
for i=1,3 do
if board[i] == x and board[i]==x and board[i] == x then
return true
end
end
--columns
for i=1,3 do
if board[i] == x and board[i]==x and board[i] == x then
return true
end
end

return false

end

function full(board)
for i=1,3 do
for j=1,3 do
if board[i][j] == '' then
return false
end
end
end

return true
end

function mapBoardtoNum(board)
newboard = {{},{},{}}
for i=1,3 do
for j=1,3 do
if board[i][j] == 'x' then
newboard[i][j] = 1
end
if board[i][j] == '' then
newboard[i][j] = 0
end
if board[i][j] == 'o' then
newboard[i][j] = -1
end
end
end
return newboard
end

--[[
print(won({
{'x','',''},
{'x','o',''},
{'x','o',''}}, 'o'))
]]

mymoves = {}
myboards = {}
wins = 0

board = {{'','',''},
{'','',''},
{'','',''}}

move = 'o'

game = {}
choices = {}

turn = 1

while not won(board,'x') and not won(board,'o') and not full(board) do

if move == 'x' then
move = 'o'
elseif move == 'o' then
move = 'x'
end

repeat
i = math.random(3)
j = math.random(3)
until board[i][j] == ''

if move == 'x' then
game[turn] = mapBoardtoNum(board)
choices[turn] = i -1 + 3 * (j-1) +1
turn = turn + 1
end
board[i][j] = move

end

if won(board,'x') then
wins = wins +1
for i = 1,#game do
table.insert(myboards, game[i])
table.insert(mymoves, choices[i])
end
end

end

--print(mymoves)
--print(#myboards)
print('won ' .. wins ..' out of ' .. gamenum)

training = {}
--[[
training.data = torch.Tensor(myboards)
training.labels = torch.Tensor(mymoves)
training.size = function() return (#mymoves) end
]]

training.size = function() return (#mymoves) end
for i=1,training:size() do
training[i] = {torch.Tensor(myboards[i]), torch.Tensor({mymoves[i]})}
end

ninputs = 9
nhiddens = 30
noutputs = 9
require 'nn'
model = nn.Sequential()

criterion = nn.ClassNLLCriterion()

trainer.learningRate = 0.01
trainer.maxIteration = 7

trainer:train(training)

--[[
print(board)
print(won(board,'o'))
print(won(board,'x'))
print(choices)
print(game)
]]

board = {
{'x','o',''},
{'x','o',''},
{'x','o',''}
}

logprobs= model:forward(torch.Tensor(mapBoardtoNum(board)))
print(logprobs)
max, pred =torch.max(logprobs,1)
print(max)
print(pred)
--[[
-- Basic format
{
{'x','o',''},
{'x','o',''},
{'x','o',''}
}
]]

print('random won ' .. wins ..' out of ' .. gamenum)

mymoves = {}
myboards = {}
wins = 0

board = {{'','',''},
{'','',''},
{'','',''}}

move = 'o'

game = {}
choices = {}

turn = 1

while not won(board,'x') and not won(board,'o') and not full(board) do
--print('yo')
if move == 'x' then
move = 'o'
repeat
i = math.random(3)
j = math.random(3)
until board[i][j] == ''
board[i][j] = move

elseif move == 'o' then
move = 'x'
--print(board)
--print(torch.Tensor(mapBoardtoNum(board)))
local probs = model:forward(torch.Tensor(mapBoardtoNum(board)))
maxs, pred = torch.max(probs,1)
--i -1 + 3 * (j-1) +1
pred = pred - 1
i = pred % 3 + 1
j = (pred - pred%3) / 3 + 1
i = i
j = j
--print(i)
--print(j)
--print(board)
--print(board[i][j])
if board[i][j] == '' then
board[i][j] = move
else
repeat
i = math.random(3)
j = math.random(3)
until board[i][j] == ''
board[i][j] = move
end

end

end

if won(board,'x') then
wins = wins +1
end

end

print('learned won ' .. wins ..' out of ' .. gamenum)
```