99 行代码的《冰雪奇缘》

序
Material Point Method（物质点法）
Moving Least Squares Material Point Method（移动最小二乘物质点法）
Differentiable MLS-MPM (ChainQueen可微物理引擎)
“硬核”的计算机图形学
Taichi编程语言
Differentiable Programming (可微编程) 与 Taichi
import taichi as ti：假装成Python的Taichi语言
总结
序

终于下决心一口气把四颗智齿拔了，这两天伴随着牙疼只能吃土豆泥和蒸蛋，写编译器之类的体力活是干不了了。登录知乎发现自己居然曾今开了个专栏，无奈一直沉迷于写代码，一篇文章都没写，实在有些惭愧。

最近正好看了《冰雪奇缘2》，就顺便聊聊动画电影里面连续介质，比如雪的模拟吧。实际上，由于最近图形技术的发展，只需要99行代码就可以写一个简单的连续介质模拟器，模拟三种相互作用的不同材料（水，果冻，雪），并且在你的笔记本上就能运行：

import taichi as ti
quality = 1 # Use a larger value for higher-res simulations
n_particles, n_grid = 9000 * quality ** 2, 128 * quality
dx, inv_dx = 1 / n_grid, float(n_grid)
dt = 1e-4 / quality
p_vol, p_rho = (dx * 0.5)**2, 1
p_mass = p_vol * p_rho
E, nu = 0.1e4, 0.2 # Young's modulus and Poisson's ratio
mu_0, lambda_0 = E / (2 * (1 + nu)), E * nu / ((1+nu) * (1 - 2 * nu)) # Lame parameters

x = ti.Vector(2, dt=ti.f32, shape=n_particles) # position
v = ti.Vector(2, dt=ti.f32, shape=n_particles) # velocity
C = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # affine velocity field
F = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # deformation gradient
material = ti.var(dt=ti.i32, shape=n_particles) # material id
Jp = ti.var(dt=ti.f32, shape=n_particles) # plastic deformation
grid_v = ti.Vector(2, dt=ti.f32, shape=(n_grid, n_grid)) # grid node momemtum/velocity
grid_m = ti.var(dt=ti.f32, shape=(n_grid, n_grid)) # grid node mass
ti.cfg.arch = ti.cuda # Try to run on GPU

@ti.kernel
def substep():
  for i, j in ti.ndrange(n_grid, n_grid):
    grid_v[i, j] = [0, 0]
    grid_m[i, j] = 0
  for p in range(n_particles): # Particle state update and scatter to grid (P2G)
    base = (x[p] * inv_dx - 0.5).cast(int)
    fx = x[p] * inv_dx - base.cast(float)
    # Quadratic kernels  [http://mpm.graphics   Eqn. 123, with x=fx, fx-1,fx-2]
    w = [0.5 * ti.sqr(1.5 - fx), 0.75 - ti.sqr(fx - 1), 0.5 * ti.sqr(fx - 0.5)]
    F[p] = (ti.Matrix.identity(ti.f32, 2) + dt * C[p]) @ F[p] # deformation gradient update
    h = ti.exp(10 * (1.0 - Jp[p])) # Hardening coefficient: snow gets harder when compressed
    if material[p] == 1: # jelly, make it softer
      h = 0.3
    mu, la = mu_0 * h, lambda_0 * h
    if material[p] == 0: # liquid
      mu = 0.0
    U, sig, V = ti.svd(F[p])
    J = 1.0
    for d in ti.static(range(2)):
      new_sig = sig[d, d]
      if material[p] == 2:  # Snow
        new_sig = min(max(sig[d, d], 1 - 2.5e-2), 1 + 4.5e-3)  # Plasticity
      Jp[p] *= sig[d, d] / new_sig
      sig[d, d] = new_sig
      J *= new_sig
    if material[p] == 0:  # Reset deformation gradient to avoid numerical instability
      F[p] = ti.Matrix.identity(ti.f32, 2) * ti.sqrt(J)
    elif material[p] == 2:
      F[p] = U @ sig @ V.T() # Reconstruct elastic deformation gradient after plasticity
    stress = 2 * mu * (F[p] - U @ V.T()) @ F[p].T() + ti.Matrix.identity(ti.f32, 2) * la * J * (J - 1)
    stress = (-dt * p_vol * 4 * inv_dx * inv_dx) * stress
    affine = stress + p_mass * C[p]
    for i, j in ti.static(ti.ndrange(3, 3)): # Loop over 3x3 grid node neighborhood
      offset = ti.Vector([i, j])
      dpos = (offset.cast(float) - fx) * dx
      weight = w[i][0] * w[j][1]
      grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos)
      grid_m[base + offset] += weight * p_mass
  for i, j in ti.ndrange(n_grid, n_grid):
    if grid_m[i, j] > 0: # No need for epsilon here
      grid_v[i, j] = (1 / grid_m[i, j]) * grid_v[i, j] # Momentum to velocity
      grid_v[i, j][1] -= dt * 50 # gravity
      if i < 3 and grid_v[i, j][0] < 0:          grid_v[i, j][0] = 0 # Boundary conditions
      if i > n_grid - 3 and grid_v[i, j][0] > 0: grid_v[i, j][0] = 0
      if j < 3 and grid_v[i, j][1] < 0:          grid_v[i, j][1] = 0
      if j > n_grid - 3 and grid_v[i, j][1] > 0: grid_v[i, j][1] = 0
  for p in range(n_particles): # grid to particle (G2P)
    base = (x[p] * inv_dx - 0.5).cast(int)
    fx = x[p] * inv_dx - base.cast(float)
    w = [0.5 * ti.sqr(1.5 - fx), 0.75 - ti.sqr(fx - 1.0), 0.5 * ti.sqr(fx - 0.5)]
    new_v = ti.Vector.zero(ti.f32, 2)
    new_C = ti.Matrix.zero(ti.f32, 2, 2)
    for i, j in ti.static(ti.ndrange(3, 3)): # loop over 3x3 grid node neighborhood
      dpos = ti.Vector([i, j]).cast(float) - fx
      g_v = grid_v[base + ti.Vector([i, j])]
      weight = w[i][0] * w[j][1]
      new_v += weight * g_v
      new_C += 4 * inv_dx * weight * ti.outer_product(g_v, dpos)
    v[p], C[p] = new_v, new_C
    x[p] += dt * v[p] # advection

import random
group_size = n_particles // 3
for i in range(n_particles):
  x[i] = [random.random() * 0.2 + 0.3 + 0.10 * (i // group_size), random.random() * 0.2 + 0.05 + 0.32 * (i // group_size)]
  material[i] = i // group_size # 0: fluid 1: jelly 2: snow
  v[i] = [0, 0]
  F[i] = [[1, 0], [0, 1]]
  Jp[i] = 1

import numpy as np
gui = ti.GUI("Taichi MLS-MPM-99", res=512, background_color=0x112F41)
for frame in range(20000):
  for s in range(int(2e-3 // dt)):
    substep()
  colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32)
  gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()])
  gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk

这段代码用Python 3运行。运行前要根据操作系统和CUDA版本(如果有CUDA的话)安装taichi：

# CPU 版本 (支持Linux, OS X and Windows)
python3 -m pip install taichi-nightly

# GPU (CUDA 10.0) (只支持Linux)
python3 -m pip install taichi-nightly-cuda-10-0

# GPU (CUDA 10.1) (只支持Linux)
python3 -m pip install taichi-nightly-cuda-10-1

这99行代码虽然很短，其背后的故事却很长。虽然语法看起来是Python，其计算部分却会被一整套编译系统接管，最后输出可执行的x86_64或者PTX instructions，能够在CPU/GPU上高效运行。要只用99行代码实现一个这样一个模拟器，这其中凝聚了几代researcher的努力。我很幸运能够参与到这个接力赛中，并跑完最后一棒。

未完待续

——闲聊物理引擎，可微编程，SIGGRAPH生产力难题，与Taichi编程语言