PYTORCH CHEAT SHEET
Imports
General
import torch # root package
from [Link] import Dataset, DataLoader # dataset representation and loading
Neural Network API
import [Link] as autograd # computation graph
from torch import Tensor # tensor node in the computation graph
import [Link] as nn # neural networks
import [Link] as F # layers, activations and more
import [Link] as optim # optimizers e.g. gradient descent, ADAM, etc.
from [Link] import script, trace # hybrid frontend decorator and tracing jit
See autograd, nn, functional and optim
Torchscript and JIT
[Link]() # takes your module or function and an example
# data input, and traces the computational steps
# that the data encounters as it progresses through the model
@script # decorator used to indicate data-dependent
# control flow within the code being traced
See Torchscript
ONNX
[Link](model, dummy data, [Link]) # exports an ONNX formatted
# model using a trained model, dummy
# data and the desired file name
model = [Link]("[Link]") # load an ONNX model
[Link].check_model(model) # check that the model
# IR is well formed
[Link].printable_graph([Link]) # print a human readable
# representation of the graph
See onnx
Vision
from torchvision import datasets, models, transforms # vision datasets,
# architectures &
# transforms
import [Link] as transforms # composable transforms
See torchvision
Distributed Training
import [Link] as dist # distributed communication
from [Link] import Process # memory sharing processes
See distributed and multiprocessing
Tensors
[Link] 1/3
Creation
[Link].is_available # check for cuda
x = [Link]() # move x's data from
# CPU to GPU and return new object
x = [Link]() # move x's data from GPU to CPU
# and return new object
if not args.disable_cuda and [Link].is_available(): # device agnostic code
[Link] = [Link]('cuda') # and modularity
else: #
[Link] = [Link]('cpu') #
[Link](device) # recursively convert their
# parameters and buffers to
# device specific tensors
x = [Link](device) # copy your tensors to a device
# (gpu, cpu)
See cuda
Deep Learning
[Link](m,n) # fully connected layer from
# m to n units
[Link](m,n,s) # X dimensional conv layer from
# m to n channels where X⍷{1,2,3}
# and the kernel size is s
[Link](s) # X dimension pooling layer
# (notation as above)
[Link] # batch norm layer
[Link]/LSTM/GRU # recurrent layers
[Link](p=0.5, inplace=False) # dropout layer for any dimensional input
nn.Dropout2d(p=0.5, inplace=False) # 2-dimensional channel-wise dropout
[Link](num_embeddings, embedding_dim) # (tensor-wise) mapping from
# indices to embedding vectors
See nn
Loss Functions
nn.X # where X is L1Loss, MSELoss, CrossEntropyLoss
# CTCLoss, NLLLoss, PoissonNLLLoss,
# KLDivLoss, BCELoss, BCEWithLogitsLoss,
# MarginRankingLoss, HingeEmbeddingLoss,
# MultiLabelMarginLoss, SmoothL1Loss,
# SoftMarginLoss, MultiLabelSoftMarginLoss,
# CosineEmbeddingLoss, MultiMarginLoss,
# or TripletMarginLoss
See loss functions
Activation Functions
nn.X # where X is ReLU, ReLU6, ELU, SELU, PReLU, LeakyReLU,
# RReLu, CELU, GELU, Threshold, Hardshrink, HardTanh,
# Sigmoid, LogSigmoid, Softplus, SoftShrink,
# Softsign, Tanh, TanhShrink, Softmin, Softmax,
# Softmax2d, LogSoftmax or AdaptiveSoftmaxWithLoss
See activation functions
Optimizers
opt = optim.x([Link](), ...) # create optimizer
[Link]() # update weights
optim.X # where X is SGD, Adadelta, Adagrad, Adam,
# AdamW, SparseAdam, Adamax, ASGD,
# LBFGS, RMSprop or Rprop
[Link] 2/3
See optimizers