Why does my GPU immediately run out of memory when I try to run this code?
I am trying to write a neural network that will train on plays by Shakespeare and then write its own passages. I am using pytorch
. For some reason, my GPU immediately runs out of memory. Note I am not running it on my own GPU; I am running it using the free GPU acceleration from Google Colab. I've tried running a different notebook using the GPU and it works, so I know it's not because I ran into some GPU usage quota or anything like that. Here is a link to the notebook:
so you can try running it yourself. Alternatively, I will paste the code below as well
notice i have a print(i)
in the last for loop. when I run it, the only output I get from that print
is a single 0
, and then I get
RuntimeError Traceback (most recent call last)
ipython-input-52-e6121e5b189f in module()
23 targets = targets.to(dtype=torch.float32).cuda()
--- 25 out, hidden = net(inputs, hidden)
3 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
580 if batch_sizes is None:
581 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
-- 582 self.dropout, self.training, self.bidirectional, self.batch_first)
583 else:
584 result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.90 GiB total capacity; 12.43 GiB already allocated; 5.88 MiB free; 15.08 GiB reserved in total by PyTorch)
it's running out of memory before it has even done a single batch!
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import re
with open('drive/MyDrive/colab/shakespeare.txt', 'r') as file:
text = file.read()
chars = list(set(text))
index2char = dict(enumerate(chars))
char2index = {char: index for index, char in index2char.items()}
encoded = [char2index[word] for word in text]
seq_length = 50
regex = '.{1,' + str(seq_length + 1) + '}'
dataset = np.array(re.findall(regex, text, flags=re.S))
batch_size = 10
n_batches = len(dataset) // batch_size
dataset = dataset[:n_batches * batch_size]
device = torch.device(cuda) if torch.cuda.is_available() else torch.device(cpu)
dataset = dataset.reshape(n_batches, -1)
def passage_to_indices(passage: str):
return np.array([char2index[char] for char in passage])
class Net(nn.Module):
def __init__(self, input_size, batch_size, hidden_size, num_layers):
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
self.criterion = nn.CrossEntropyLoss()
self.input_size = input_size
self.batch_size = batch_size
self.hidden_size = hidden_size
self.num_layers = num_layers
def forward(self, input, hidden):
# lstm should take input of size (seq_length, batch_size, input_size)
# and hidden of size (num_layers, batch_size, hidden_size)
out, hidden = self.lstm(input, hidden)
return out, hidden
def init_hidden(self):
hidden = (
return hidden
input_size = len(chars) # 65
hidden_size = 256
num_layers = 2
net = Net(input_size=input_size,
net = net.cuda()
epochs = 2
for epoch in range(epochs):
hidden = net.init_hidden()
for i, batch in enumerate(dataset):
targets = torch.tensor([char2index[str(example[-1])] for example in batch])
# dim passages = (batch_size, seq_length)
passages = np.array([passage_to_indices(str(example[:-1])) for example in batch])
one_hot = F.one_hot(torch.tensor(passages), num_classes=input_size)
inputs = one_hot.view(seq_length, batch_size, input_size)
inputs = inputs.to(dtype=torch.float32).cuda()
targets = targets.to(dtype=torch.float32).cuda()
out, hidden = net(inputs, hidden)
Category Data Science