关于自动微分和backpropogate grad都是0的问题

懵逼新手,求指导
从difftaichi的repo中 https://github.com/yuanming-hu/difftaichi/tree/master/examples 看到taichi可以自动微分,从物理计算结果backpropogate回去进行反向优化,小白我想实现这个过程,用taichi写了个简单的画线条的2D SDF,但从SDF绘制结果backpropogate回去到line参数的grad都是0,翻了课程视频还没有讲这块,想问问要实现 autograd,kernel的写法需要注意什么?

我的代码:

import torch
import math 
import taichi as ti
import cv2

SIZE = 256
STROKE_NUM = 32
STROKE_PARAMNUM = 9

learning_rate = .1
dx = 1/SIZE
dx2 = dx*dx/3

ti.init(default_fp=ti.f32, arch=ti.cpu)
canvas = ti.var(dt=ti.f32, shape=(SIZE, SIZE,3),needs_grad=True)
target = ti.var(dt=ti.f32, shape=(SIZE, SIZE,3),needs_grad=True)
strokes = ti.var(dt=ti.f32,shape=(STROKE_NUM,STROKE_PARAMNUM),needs_grad=True)
loss = ti.var(dt=ti.f32,shape=(),needs_grad=True)

@ti.func
def capsuleSDF(px, py, ax, ay, bx, by, r):
    pax = px - ax
    pay = py - ay
    bax = bx - ax
    bay = by - ay
    h = ti.max(ti.min((pax * bax + pay * bay) / (bax * bax + bay * bay), 1.0), 0.0)
    dx = pax - bax * h
    dy = pay - bay * h
    return ti.sqrt(dx * dx + dy * dy) - r

@ti.func
def lineSDFAABB( ax:ti.f32, ay:ti.f32, bx:ti.f32, by:ti.f32, width:ti.f32, R:ti.f32, G:ti.f32, B:ti.f32, A:ti.f32):
    x0 = ti.floor(ti.min(ax, bx) - width)-1
    x1 = ti.ceil(ti.max(ax, bx) + width)+1
    y0 = ti.floor(ti.min(ay, by) - width)-1
    y1 = ti.ceil(ti.max(ay, by) + width)+1
    for y in range(y0,y1):
        for x in range(x0,x1):
            blendAlpha = A * ti.max(ti.min(0.5 - capsuleSDF(x, y, ax, ay, bx, by, width), 1.0), 0.0)
            if(blendAlpha>0):
                invAlpha = (1.0-blendAlpha)
                canvas[x,y,0] = R*blendAlpha + canvas[x,y,0]*invAlpha
                canvas[x,y,1] = G*blendAlpha + canvas[x,y,1]*invAlpha
                canvas[x,y,2] = B*blendAlpha + canvas[x,y,2]*invAlpha


@ti.kernel
def clearCanvas():
    for i, j, k in canvas:
        canvas[i,j,k] = 0

@ti.kernel
def drawStrokes():
    for strokeId in range(STROKE_NUM):
        lineSDFAABB(strokes[strokeId,0],strokes[strokeId,1],strokes[strokeId,2],strokes[strokeId,3],strokes[strokeId,4],strokes[strokeId,5],strokes[strokeId,6],strokes[strokeId,7],strokes[strokeId,8])


@ti.kernel
def applyGrad():
    for i in range(STROKE_NUM):
        for j in range(STROKE_PARAMNUM):
            strokes[i, j] -= learning_rate * strokes.grad[i, j]

@ti.kernel
def compute_loss():
    for y in range(SIZE):
        for x in range(SIZE):
            cellLoss = (target[x,y,0] - canvas[x,y,0])**2 + (target[x,y,1] - canvas[x,y,1])**2 + (target[x,y,2] - canvas[x,y,2])**2
            loss[None] += dx2 * cellLoss

def saveCanvas(filename):
    img = np.zeros(shape=(SIZE, SIZE,3), dtype=np.float32)
    for i in range(SIZE):
        for j in range(SIZE):
            img[i, j, 0] = canvas[ i, j, 0]
            img[i, j, 1] = canvas[ i, j, 1]
            img[i, j, 2] = canvas[ i, j, 2]
    print('save',filename)
    cv2.imwrite(filename, img * 255)

def run():
    target_img = cv2.imread('taichi.png')[:, :, 0] / 255.0
    target_img = cv2.resize(target_img, (SIZE, SIZE))
    cv2.imshow('target', target_img )
    print('copy img to taichi')
    for i in range(SIZE):
        for j in range(SIZE):
            target[i, j, 0] = float(target_img[i, j])
            target[i, j, 1] = float(target_img[i, j])
            target[i, j, 2] = float(target_img[i, j])
    print("random init strokes")
    for i in range(STROKE_NUM):
        strokes[i,0] = np.random.randn()*SIZE *.5
        strokes[i,1] = np.random.randn()*SIZE *.5
        strokes[i,2] = np.random.randn()*SIZE *.5
        strokes[i,3] = np.random.randn()*SIZE *.5
        strokes[i,4] = 1+np.random.randn()*5
        strokes[i,5] = np.random.randn()
        strokes[i,6] = np.random.randn()
        strokes[i,7] = np.random.randn()
        strokes[i,8] = np.random.randn()
    print('start iteration')
    for opt in range(3):
        with ti.Tape(loss):
            clearCanvas()
            drawStrokes()
            compute_loss()
        print('Iter', opt, ' Loss =', loss[None])
        applyGrad()
        saveCanvas('D:/result/'+str(opt)+'.png')

run()