!pip install torchviz

Collecting torchviz
  Downloading torchviz-0.0.3-py3-none-any.whl.metadata (2.1 kB)
Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (from torchviz) (2.8.0+cu126)
Requirement already satisfied: graphviz in /usr/local/lib/python3.12/dist-packages (from torchviz) (0.21)
Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (3.19.1)
Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (4.15.0)
Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (75.2.0)
Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (1.13.3)
Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (3.5)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (3.1.6)
Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (2025.3.0)
Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (12.6.77)
Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (12.6.77)
Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (12.6.80)
Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (9.10.2.21)
Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (12.6.4.1)
Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (11.3.0.4)
Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (10.3.7.77)
Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (11.7.1.2)
Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (12.5.4.2)
Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (0.7.1)
Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (2.27.3)
Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (12.6.77)
Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (12.6.85)
Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (1.11.1.6)
Requirement already satisfied: triton==3.4.0 in /usr/local/lib/python3.12/dist-packages (from torch->torchviz) (3.4.0)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch->torchviz) (1.3.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch->torchviz) (3.0.2)
Downloading torchviz-0.0.3-py3-none-any.whl (5.7 kB)
Installing collected packages: torchviz
Successfully installed torchviz-0.0.3

import torch
from torchviz import make_dot

z = torch.tensor(3.0, requires_grad=True)
z

tensor(3., requires_grad=True)

y = z**2
y

tensor(9., grad_fn=<PowBackward0>)

y.backward() # gradients have been computed

z.grad  # 2*z

tensor(6.)

Gradient accumulation and clearing the gradients

y = z**2 # Z=3
y.backward()
print(z.grad)
z.grad.zero_() # clear the gradient

tensor(6.)

tensor(0.)

Update the leaf variable values

z.is_leaf

True

make_dot(y, params={"z": z})

learning_rate = 0.2
z -= learning_rate * z.grad

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipython-input-2876811938.py in <cell line: 0>()
      1 learning_rate = 0.2
----> 2 z -= learning_rate * z.grad

RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.

y = z**2 # Z=3
y.backward()
print(z.grad)

tensor(6.)

with torch.no_grad():
  z -= 0.2 * z.grad
# z.grad.zero_()

tensor(1.8000, requires_grad=True)

A simple training loop

w = torch.tensor(2.0, requires_grad=True) # weight
x = torch.tensor(3.0, requires_grad=False) # input
b = torch.tensor(1.0, requires_grad=True) # bias
y_hat = w*x + b
y_hat

tensor(7., grad_fn=<AddBackward0>)

make_dot(y_hat, params={"w": w, "b": b, "y_hat": y_hat})

x = torch.tensor(3.0, requires_grad=False)
y = torch.tensor(1.0, requires_grad=False) # label given

w = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)

for i in range(10):
  # forward pass
  y_hat = w*x + b

  # compute loss

  loss = (y_hat - y)**2 # squared error

  print(i, f"w = {w.data} , b= {b.data}")
  print("loss=",loss)
  print()

  #backward pass
  loss.backward()

  # #gradients

  learning_rate= 0.01
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

  w.grad.zero_(), b.grad.zero_()

0 w = 2.0 , b= 1.0
loss= tensor(36., grad_fn=<PowBackward0>)

1 w = 1.6399999856948853 , b= 0.8799999952316284
loss= tensor(23.0400, grad_fn=<PowBackward0>)

2 w = 1.3519999980926514 , b= 0.7839999794960022
loss= tensor(14.7456, grad_fn=<PowBackward0>)

3 w = 1.1216000318527222 , b= 0.7071999907493591
loss= tensor(9.4372, grad_fn=<PowBackward0>)

4 w = 0.9372800588607788 , b= 0.6457599997520447
loss= tensor(6.0398, grad_fn=<PowBackward0>)

5 w = 0.7898240685462952 , b= 0.5966079831123352
loss= tensor(3.8655, grad_fn=<PowBackward0>)

6 w = 0.6718592643737793 , b= 0.5572863817214966
loss= tensor(2.4739, grad_fn=<PowBackward0>)

7 w = 0.5774874091148376 , b= 0.5258290767669678
loss= tensor(1.5833, grad_fn=<PowBackward0>)

8 w = 0.5019899606704712 , b= 0.5006632804870605
loss= tensor(1.0133, grad_fn=<PowBackward0>)

9 w = 0.4415919780731201 , b= 0.48053061962127686
loss= tensor(0.6485, grad_fn=<PowBackward0>)