问题:弹簧质点模型,cuda和cpu的运行结果大不相同

跟着课程走写了个简单的弹簧质点系统模型,但是CPU和GPU并行模式下结果大不相同,CPU下是对的,GPU下是错的,这是为什么呢?希望有人帮忙解答,感谢!

import taichi as ti

ti.init(arch=ti.cpu)
#ti.init(arch=ti.cuda)

spring_Y = ti.field(dtype=ti.f32, shape =()) # Young’s Modulus
paused = ti.field(dtype=ti.f32, shape =())
drag_damping = ti.field(dtype=ti.f32, shape =())
dashpot_damping = ti.field(dtype=ti.f32, shape =())

max_num_particles = 1024
particle_mass = 1
dt=0.001
substeps=5
bottom_y = 0.05

num_particles = ti.field(dtype=ti.i32, shape=())
x = ti.Vector.field(2, dtype=ti.f32, shape=max_num_particles)
v = ti.Vector.field(2, dtype=ti.f32, shape=max_num_particles)
f = ti.Vector.field(2, dtype=ti.f32, shape=max_num_particles)
fixed = ti.field(dtype=ti.i32, shape=max_num_particles)
rest_length = ti.field(dtype=ti.f32, shape=(max_num_particles,max_num_particles))

@ti.kernel
def substep():

n = num_particles[None]

for i in range(n):
    if not fixed[i]:
        # Gravity
        f[i] = ti.Vector([0,-9.81]) * particle_mass
        for j in range(n):
            if rest_length[i,j] != 0:
                # There is a spring
                x_ij = x[i]-x[j]
                d = x_ij.normalized()
                f[i] += -spring_Y * (x_ij.norm() / rest_length[i,j] - 1) * d

                #Dasgpot damping
                v_rel = (v[i]-v[j]).dot(d) #relative velocity
                f[i] += -dashpot_damping[None] * v_rel *d
        

for i in range(n):
    if not fixed[i]:
        v[i] += dt*f[i] / particle_mass
        v[i] *= ti.exp(-dt * drag_damping[None])
    else:
        v[i] = ti.Vector([0,0])




# Collision
for i in range(n):
    if x[i][1]< bottom_y:
        x[i][1] = bottom_y
        v[i][1] = 0

for i in range(n):
    x[i] += dt * v[i]

@ti.kernel
def add_particle():

tot=6
left = 0.1
right = 0.4
up = 0.9
down=0.6
dx = (right-left)/tot
dy = (up-down)/tot
for i in range(tot):
    for j in range(tot):
        new_particle_id = num_particles[None]
        x[new_particle_id] = ti.Vector([left+dx*i,down+dy*j])
        fixed_=0
        fixed[new_particle_id] =fixed_

        for k in range(num_particles[None]):
            xx=x[new_particle_id] - x[k]
            if abs(abs(xx[0])-dx)<0.001 and xx[1]==0:
                rest_length[new_particle_id, k] = dx
                rest_length[k, new_particle_id] = dx

            if abs(abs(xx[1])-dy)<0.001 and xx[0]==0:
                rest_length[new_particle_id, k] = dy
                rest_length[k, new_particle_id] = dy   

        num_particles[None] += 1

def main():
gui = ti.GUI(‘Explicit Mass Spring System’, background_color=0xDDDDDD)

spring_Y[None] = 10000
dashpot_damping[None]=0
drag_damping[None]=10


add_particle()    

while True:
    
    for i in range(substeps):
        substep()

    X = x.to_numpy()
    gui.line(begin=(0.0, bottom_y), end=(1.0, bottom_y), color=0x0, radius=1)
    for i in range(num_particles[None]): 
        for j in range(num_particles[None]): 
            if rest_length[i, j] !=0:
                gui.line(begin=X[i], end=X[j], color=0x888888, radius=2)

    for i in range(num_particles[None]):
        c = 0x0 if fixed[i] else 0xFF0000
        gui.circle(X[i], color=c, radius=5)
    gui.show()

if name== ‘main’:
main()