渲染一个花瓶 - 优化 金牌收录

0

在上一节 渲染一个花瓶通过软光栅实现 中,很粗糙地实现了一个软渲染。

今天完善下面 2 个问题:

  • 透视插值矫正
  • 齐次空间裁剪

一、透视插值矫正

上一节中,我们直接使用重心坐标来插值三角形顶点的所有属性,其实是不大严谨的,因为重心坐标是在屏幕空间求得的,忽略了深度信息。我们的花瓶定点数量接近 6 万,比较细密,导致这个错误比较隐秘,看起来好像一切良好。为了暴露这个问题,我们创建一个只有 4 个顶点的平面,下图是矫正前后的比对效果。

 

二、齐次空间裁剪

裁剪原理非常简单,超出边界的顶点砍掉,利用新得到的边界交点重新构造三角形。上一节使用的是在屏幕空间进行裁剪,包围盒超出屏幕的部分就砍去。

抛弃边界上的整个三角形:

 

裁剪后:

通过对平面的裁剪可以更加清晰地了解裁剪的细节:

三、其他

部分矩阵的操作改成了 SIMD 方式,帧率有少许提升。

修复了相机移动时会卡死问题,还有其他一些 BUG.

四、操作

  • 拖动鼠标左键:拖动模型
  • 滚轮:视野大小缩放 (测试透视矫正时把 fov 调大比较明显)
  • 鼠标左键加滚轮:相机前进或后退(视觉效果和缩放视野差不多)
  • 空格键:切换三种光照模型
  • 按 C 键:颜色贴图
  • 按 N 键:法线贴图
  • 按 B 键:位移贴图
  • 按 L 键:线框模式
  • 按 K 键:背面剔除
  • 按 A 键:透视插值矫正
  • 按 F 键:切换三种裁剪方法
  • 按 V 键:显示花瓶
  • 按 P 键:显示平面

二进制链接:https://pan.baidu.com/s/107_t2ykiwygdzSv2FDZHYw
提取码:upwv

五、源码

/////////////////////////////////////////////////////////////
// Rendering a Vase
// Platform : Visual Studio 2022 (v143), EasyX_20220116
// Author   : 872289455@qq.com
// Date     : 2022-03-24
//
#include <ctime>
#include <cassert>
#include <cfloat>
#include <cmath>
#include <vector>
#include <random>
#include <functional>
#include <algorithm>
#include <xmmintrin.h>
#include <fvec.h>
#include <easyx.h>
using std::vector;

#define WIDTH                800
#define HEIGHT               600
#define Z_NEAR               (1.f)
#define Z_FAR                (50.f)
#define WORLD_UP             vec3(0.f, 1.f, 0.f)
#define MODE_FILL            0
#define MODE_LINE            1
#define CLIP_HOMO            0
#define CLIP_HOMO_RAW        1
#define CLIP_SCRN            2
#define COLOR_STRENGTH       0.85f
#define NORMAL_STRENGTH      3.f
#define DISPACE_STRENGTH     0.23f
#define RADIANS(degrees)     ((degrees) * 0.0174532925f)
#define DEGREES(radians)     ((radians) * 57.295779513f)
#define M_PI                 3.14159265358979323846f

///////////////////////////////////////////////////////////////////////////////////
// Math
///////////////////////////////////////////////////////////////////////////////////
struct vec2 {
    float x, y;
    vec2(float a = 0.f, float b = 0.f) : x(a), y(b) {}
    vec2 operator*(float f) {
        return vec2(f * x, f * y);
    }
    vec2 operator-(const vec2 &b) const {
        return vec2(x - b.x, y - b.y);
    }
    vec2 operator+(const vec2 &b) const {
        return vec2(x + b.x, y + b.y);
    }
};

struct vec3 {
    float x, y, z;
    vec3(float a = 0.f, float b = 0.f, float c = 0.f) : x(a), y(b), z(c) {}
    vec3 operator*(float f) {
        return vec3(f * x, f * y, f * z);
    }
    vec3 operator-() const {
        return vec3(-x, -y, -z);
    }
    vec3 operator-(float f) const {
        return vec3(x - f, y - f, z - f);
    }
    vec3 operator-(const vec3 &b) const {
        return vec3(x - b.x, y - b.y, z - b.z);
    }
    vec3 operator+(const vec3 &b) const {
        return vec3(x + b.x, y + b.y, z + b.z);
    }
    float length() {return sqrtf(x*x+y*y+z*z);}
};

struct vec4 {
    __declspec(align(16)) 
    union {
        struct { float x, y, z, w; };
        struct { float v[4]; };
    };
    vec4(float a = 0.f, float b = 0.f, float c = 0.f, float d = 0.f) : x(a), y(b), z(c), w(d) {}
    vec4(vec3 v, float f = 1.f) : x(v.x), y(v.y), z(v.z), w(f) {}
    float& operator[](int i) { return v[i]; }
    vec4 operator*(float f) {
        __m128 a = _mm_load_ps(&v[0]);
        __m128 b = _mm_load1_ps(&f);
        vec4 ret;
        _mm_store_ps(&ret.v[0], _mm_mul_ps(a, b));
        return ret;
    }
    vec4 operator-(const vec4 &b) const {
        return vec4(x - b.x, y - b.y, z - b.z, w -b.w);
    }
    vec4 operator+(const vec4 &b) const {
        return vec4(x + b.x, y + b.y, z + b.z, w + b.w);
    }
    vec3 to_vec3() {
        return vec3(x, y, z);
    }
};

vec2 operator*(float f, vec2 vec) {
    return vec * f;;
}

vec3 operator*(float f, vec3 vec) {
    return vec * f;;
}

vec4 operator*(float f, vec4& vec) {
    return vec * f;;
}

float dot(vec3 a, vec3 b) {
    return a.x * b.x + a.y * b.y + a.z * b.z;
}

vec3 cross(vec3 a, vec3 b) {
    return vec3(a.y * b.z - b.y * a.z,
                a.z * b.x - b.z * a.x,
                a.x * b.y - b.x * a.y
                );
}

vec2 normalize(vec2 v) {
    float len_sq = v.x * v.x + v.y * v.y;
    if (len_sq < FLT_EPSILON)
        return v;
    return (1.f / sqrt(len_sq)) * v;
}

vec3 normalize(vec3 v) {
    float len_sq = v.x * v.x + v.y * v.y + v.z * v.z;
    if (len_sq < FLT_EPSILON)
        return v;
    return (1.f / sqrt(len_sq)) * v;
}

struct quat {
    union {
        struct { float x, y, z, w; };
        struct { vec3 vec; float scalar; };
    };
    quat(float angle, vec3 &axis) {
        float s = 0.f;
        float c = 0.f;
        float rads = RADIANS(angle);
        axis = normalize(axis);
        s = sinf(rads * 0.5f);
        c = cosf(rads * 0.5f);
        x = s * axis.x;
        y = s * axis.y;
        z = s * axis.z;
        w = c;
    }
    vec3 rotate(vec3 from) {
        vec3 a = 2.f * dot(vec, from) * vec;
        vec3 b = (w * w - dot(vec, vec)) * from;
        vec3 c = 2.f * w * cross(vec, from);
        return a + b + c;
    }
};

struct mat3 {
    vec3 col[3];
    mat3(vec3 c0, vec3 c1, vec3 c2) {
        col[0] = c0;
        col[1] = c1;
        col[2] = c2;
    }
    vec3 operator*(vec3 v) {
        return vec3(
            col[0].x * v.x + col[1].x * v.y + col[2].x * v.z,
            col[0].y * v.x + col[1].y * v.y + col[2].y * v.z,
            col[0].z * v.x + col[1].z * v.y + col[2].z * v.z
        );
    }
};

__m128 simd_mat4_mul_vec4(__m128 const *m, __m128 &v) {
    __m128 v0 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
    __m128 v1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
    __m128 v2 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
    __m128 v3 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3));

    __m128 m0 = _mm_mul_ps(m[0], v0);
    __m128 m1 = _mm_mul_ps(m[1], v1);
    __m128 m2 = _mm_mul_ps(m[2], v2);
    __m128 m3 = _mm_mul_ps(m[3], v3);

    __m128 a0 = _mm_add_ps(m0, m1);
    __m128 a1 = _mm_add_ps(m2, m3);
    __m128 a2 = _mm_add_ps(a0, a1);

    return a2;
}

void simd_mat4_mul(__m128 const *in1, __m128 const *in2, __m128 *out) { 
    #pragma omp parallel for
    for (int i = 0; i < 4; i++) {
        __m128 e0 = _mm_shuffle_ps(in2[i], in2[i], _MM_SHUFFLE(0, 0, 0, 0));
        __m128 e1 = _mm_shuffle_ps(in2[i], in2[i], _MM_SHUFFLE(1, 1, 1, 1));
        __m128 e2 = _mm_shuffle_ps(in2[i], in2[i], _MM_SHUFFLE(2, 2, 2, 2));
        __m128 e3 = _mm_shuffle_ps(in2[i], in2[i], _MM_SHUFFLE(3, 3, 3, 3));

        __m128 m0 = _mm_mul_ps(in1[0], e0);
        __m128 m1 = _mm_mul_ps(in1[1], e1);
        __m128 m2 = _mm_mul_ps(in1[2], e2);
        __m128 m3 = _mm_mul_ps(in1[3], e3);

        __m128 a0 = _mm_add_ps(m0, m1);
        __m128 a1 = _mm_add_ps(m2, m3);
        __m128 a2 = _mm_add_ps(a0, a1);

        out[i] = a2;
    }
}

struct mat4;
void M128ToMat4(__m128* in, mat4* out);
void Mat4ToM128(mat4* in, __m128* out);

struct mat4 {
    __declspec(align(16)) 
    union {
        float v[16];
        struct { vec4 col[4]; };
    };
    mat4(vec4 c0 = vec4(1.f, 0.f, 0.f, 0.f),
         vec4 c1 = vec4(0.f, 1.f, 0.f, 0.f),
         vec4 c2 = vec4(0.f, 0.f, 1.f, 0.f),
         vec4 c3 = vec4(0.f, 0.f, 0.f, 1.f)
        ){
        col[0] = c0;
        col[1] = c1;
        col[2] = c2;
        col[3] = c3;
    }
    vec4& operator[](int i) { return col[i]; }
    mat4  operator*(float f) {
        return mat4(f * col[0], f * col[1], f * col[2], f * col[3]);
    }
    vec4 operator*(vec4 vec) {
        __m128 M[4];
        vec4 ret;
        Mat4ToM128(this, M);
        __m128 V = _mm_load_ps(&vec[0]);
        __m128 res = simd_mat4_mul_vec4(M, V);
        _mm_store_ps(&ret[0], res);
        return ret;
    }
    mat4 operator*(mat4 &m) {
        __m128 in1[4];
        __m128 in2[4];
        __m128 out[4];
        mat4 res;
        Mat4ToM128(this, in1);
        Mat4ToM128(&m, in2);
        simd_mat4_mul(in1, in2, out);
        M128ToMat4(out, &res);
        return res;
    }
};

void Mat4ToM128(mat4 *in, __m128 *out) {
    #pragma omp parallel for
    for (int i = 0; i < 4; i++)
        out[i] = _mm_load_ps(&in->col[i][0]);
}

void M128ToMat4(__m128* in, mat4* out) {
    #pragma omp parallel for
    for (int i = 0; i < 4; i++)
        _mm_store_ps(&out->col[i][0], in[i]);
}

void simd_mat4_transpose(__m128 const *in, __m128 *out) {
    __m128 tmp0 = _mm_shuffle_ps(in[0], in[1], 0x44);
    __m128 tmp2 = _mm_shuffle_ps(in[0], in[1], 0xEE);
    __m128 tmp1 = _mm_shuffle_ps(in[2], in[3], 0x44);
    __m128 tmp3 = _mm_shuffle_ps(in[2], in[3], 0xEE);

    out[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
    out[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
    out[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
    out[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
}

mat4 transposed(mat4 m) {
    __m128 in[4];
    __m128 out[4];
    Mat4ToM128(&m, in);
    simd_mat4_transpose(in, out);
    mat4 res;
    M128ToMat4(out, &res);
    return res;
}

__m128 simd_vec4_dot(const __m128 &v1, const __m128 &v2) {
    __m128 const mul0 = _mm_mul_ps(v1, v2);
    __m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
    __m128 const add0 = _mm_add_ps(mul0, swp0);
    __m128 const swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
    __m128 const add1 = _mm_add_ps(add0, swp1);
    return add1;
}

// ref to glm
void simd_mat4_inverse(__m128 const *in, __m128 *out) {
    __m128 Fac0; {
        __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(3, 3, 3, 3));
        __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(2, 2, 2, 2));

        __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(2, 2, 2, 2));
        __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(3, 3, 3, 3));

        __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
        __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
        Fac0 = _mm_sub_ps(Mul00, Mul01);
    }

    __m128 Fac1; {
        __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(3, 3, 3, 3));
        __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(1, 1, 1, 1));

        __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(1, 1, 1, 1));
        __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(3, 3, 3, 3));

        __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
        __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
        Fac1 = _mm_sub_ps(Mul00, Mul01);
    }

    __m128 Fac2; {
        __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(2, 2, 2, 2));
        __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(1, 1, 1, 1));

        __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(1, 1, 1, 1));
        __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(2, 2, 2, 2));

        __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
        __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
        Fac2 = _mm_sub_ps(Mul00, Mul01);
    }

    __m128 Fac3; {
        __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(3, 3, 3, 3));
        __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(0, 0, 0, 0));

        __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(0, 0, 0, 0));
        __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(3, 3, 3, 3));

        __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
        __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
        Fac3 = _mm_sub_ps(Mul00, Mul01);
    }

    __m128 Fac4; {
        __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(2, 2, 2, 2));
        __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(0, 0, 0, 0));

        __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(0, 0, 0, 0));
        __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(2, 2, 2, 2));

        __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
        __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
        Fac4 = _mm_sub_ps(Mul00, Mul01);
    }

    __m128 Fac5; {
        __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(1, 1, 1, 1));
        __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(0, 0, 0, 0));

        __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(0, 0, 0, 0));
        __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
        __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(1, 1, 1, 1));

        __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
        __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
        Fac5 = _mm_sub_ps(Mul00, Mul01);
    }

    __m128 SignA = _mm_set_ps(1.0f, -1.0f, 1.0f, -1.0f);
    __m128 SignB = _mm_set_ps(-1.0f, 1.0f, -1.0f, 1.0f);

    __m128 Temp0 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(0, 0, 0, 0));
    __m128 Vec0 = _mm_shuffle_ps(Temp0, Temp0, _MM_SHUFFLE(2, 2, 2, 0));

    __m128 Temp1 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(1, 1, 1, 1));
    __m128 Vec1 = _mm_shuffle_ps(Temp1, Temp1, _MM_SHUFFLE(2, 2, 2, 0));

    __m128 Temp2 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(2, 2, 2, 2));
    __m128 Vec2 = _mm_shuffle_ps(Temp2, Temp2, _MM_SHUFFLE(2, 2, 2, 0));

    __m128 Temp3 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(3, 3, 3, 3));
    __m128 Vec3 = _mm_shuffle_ps(Temp3, Temp3, _MM_SHUFFLE(2, 2, 2, 0));

    __m128 Mul00 = _mm_mul_ps(Vec1, Fac0);
    __m128 Mul01 = _mm_mul_ps(Vec2, Fac1);
    __m128 Mul02 = _mm_mul_ps(Vec3, Fac2);
    __m128 Sub00 = _mm_sub_ps(Mul00, Mul01);
    __m128 Add00 = _mm_add_ps(Sub00, Mul02);
    __m128 Inv0 = _mm_mul_ps(SignB, Add00);

    __m128 Mul03 = _mm_mul_ps(Vec0, Fac0);
    __m128 Mul04 = _mm_mul_ps(Vec2, Fac3);
    __m128 Mul05 = _mm_mul_ps(Vec3, Fac4);
    __m128 Sub01 = _mm_sub_ps(Mul03, Mul04);
    __m128 Add01 = _mm_add_ps(Sub01, Mul05);
    __m128 Inv1 = _mm_mul_ps(SignA, Add01);

    __m128 Mul06 = _mm_mul_ps(Vec0, Fac1);
    __m128 Mul07 = _mm_mul_ps(Vec1, Fac3);
    __m128 Mul08 = _mm_mul_ps(Vec3, Fac5);
    __m128 Sub02 = _mm_sub_ps(Mul06, Mul07);
    __m128 Add02 = _mm_add_ps(Sub02, Mul08);
    __m128 Inv2 = _mm_mul_ps(SignB, Add02);

    __m128 Mul09 = _mm_mul_ps(Vec0, Fac2);
    __m128 Mul10 = _mm_mul_ps(Vec1, Fac4);
    __m128 Mul11 = _mm_mul_ps(Vec2, Fac5);
    __m128 Sub03 = _mm_sub_ps(Mul09, Mul10);
    __m128 Add03 = _mm_add_ps(Sub03, Mul11);
    __m128 Inv3 = _mm_mul_ps(SignA, Add03);

    __m128 Row0 = _mm_shuffle_ps(Inv0, Inv1, _MM_SHUFFLE(0, 0, 0, 0));
    __m128 Row1 = _mm_shuffle_ps(Inv2, Inv3, _MM_SHUFFLE(0, 0, 0, 0));
    __m128 Row2 = _mm_shuffle_ps(Row0, Row1, _MM_SHUFFLE(2, 0, 2, 0));

    __m128 Det0 = simd_vec4_dot(in[0], Row2);
    __m128 Rcp0 = _mm_div_ps(_mm_set1_ps(1.0f), Det0);

    out[0] = _mm_mul_ps(Inv0, Rcp0);
    out[1] = _mm_mul_ps(Inv1, Rcp0);
    out[2] = _mm_mul_ps(Inv2, Rcp0);
    out[3] = _mm_mul_ps(Inv3, Rcp0);
}

mat4 inverse(mat4 m) {
    __m128 in[4];
    __m128 out[4];
    mat4 res;
    Mat4ToM128(&m, in);
    simd_mat4_inverse(in, out);
    M128ToMat4(out, &res);
    return res;
}

mat4 rotateY(double angle) {
    float rad = RADIANS(50.f * angle);
    float cs = cos(rad);
    float sn = sin(rad);
    return mat4(
        vec4(cs, 0.f, -sn, 0.f),
        vec4(0.f, 1.f, 0.f, 0.f),
        vec4(sn, 0.f, cs, 0.f),
        vec4(0.f, 0.f, 0.f, 1.f)
    );
}

///////////////////////////////////////////////////////////////////////////////////
// Procedural Texturing
///////////////////////////////////////////////////////////////////////////////////

template <typename T>
T lerp(T& a, T& b, float t) {
    return a + (b - a) * t;
}

class Noise {
    const uint32_t TABLE_SIZE = 0X100U;
    const uint32_t TABLE_MASK = 0XFFU;
    vector<vector<vec2>> grid;
public:
    Noise() {
        std::mt19937 gen(static_cast<uint32_t>(time(nullptr)));
        std::uniform_real_distribution<float> distrFloat(-1.f, 1.f);
        auto randFloat = std::bind(distrFloat, gen);
        grid = vector<vector<vec2>> (TABLE_SIZE, vector<vec2>(TABLE_SIZE));
        for (uint32_t row = 0; row < TABLE_SIZE; row++)
            for (uint32_t col = 0; col < TABLE_SIZE; col++) 
                grid[row][col] = normalize(vec2(randFloat(), randFloat()));
    }

    vec3 eval(vec2 p) {
        uint32_t x = (uint32_t)std::floor(p.x);
        uint32_t y = (uint32_t)std::floor(p.y);
        float u = fade(p.x - x);
        float v = fade(p.y - y);
        uint32_t x0 = x & TABLE_MASK;
        uint32_t x1 = (x0 + 1) & TABLE_MASK;
        uint32_t y0 = y & TABLE_MASK;
        uint32_t y1 = (y0 + 1) & TABLE_MASK;
        vec2 lerpx1 = lerp(grid[x0][y0], grid[x1][y0], u);
        vec2 lerpx2 = lerp(grid[x0][y1], grid[x1][y1], u);
        vec2 lerpy = lerp(lerpx1, lerpx2, v);
        return vec3(lerpy.x, lerpy.y, COLOR_STRENGTH);
    }
private:
    float fade(float t) {
        return t * t * t * (t * (t * 6 - 15) + 10);
    }
};

struct Image {
    int height;
    int width;
    int channels;
    vector<uint8_t> data;
    Image(int h = 0, int w = 0, int c = 0, vector<uint8_t> raw = vector<uint8_t>()) : height(h), width(w), channels(c), data(raw) {}
};

vec3 getPixelAt(const Image *map, int x, int y) {
    if (x < 0) x = map->width - 1;
    if (x >= map->width) x = 0;
    if (y < 0) y = map->height - 1;;
    if (y >= map->height) y = 0;

    float r, g, b;
    int ofs = map->channels * (y * map->width + x);
    r = g = b = map->channels > 0 ? map->data[ofs + 0] : 0.f;
    g = b = map->channels > 1 ? map->data[ofs + 1] : r;
    b = map->channels > 2 ? map->data[ofs + 2] : g;
    return (1.f / 255.f) * vec3(r, g, b);
}

// vec3 returned in range 0.0 - 1.0
vec3 texture(const Image *map, const vec2& texCoord) {
    vec2 uv = texCoord;
    int x = int(map->width * uv.x);
    int y = int(map->height * uv.y);
    float u = map->width * uv.x - x;
    float v = map->height * uv.y - y;

    vec3 c00 = getPixelAt(map, x, y);
    vec3 c10 = getPixelAt(map, x + 1, y);
    vec3 c01 = getPixelAt(map, x, y + 1);
    vec3 c11 = getPixelAt(map, x + 1, y + 1);
    return (1 - u) * (1 - v) * c00 + u * (1 - v) * c10 + (1 - u) * v * c01 + u * v * c11;
}

Image buildColorTexture(void) {
    Noise noise;
    float baseFreq = 12;
    int level = 6;
    float amplitudeMult = 0.5f;
    float frequencyMult = 1.9f;
    int width = 1024;
    int height = 1024;

    vector<uint8_t> pixels;
    pixels.resize(width * height * 3);

    float dx = 1.0f / (width - 1);
    float dy = 1.0f / (height - 1);
    for (int row = 0; row < height; row++) {
        for (int col = 0; col < width / 2; col++) {
            float x = dx * col;
            float y = dy * row;
            float freq = baseFreq;
            float amplitude = 1.f;
            vec3 sumVec;
            for (int oct = 0; oct < level; oct++) {
                sumVec = sumVec + amplitude * noise.eval(freq * vec2(x, y));
                freq *= frequencyMult;
                amplitude *= amplitudeMult;
            }
            sumVec = normalize(sumVec);
            float turb = sin(sumVec.y);
            int lofs = 3 * (row * width + col);
            int rofs = 3 * (row * width + width - 1 - col);
            pixels[lofs + 0] = pixels[rofs + 0] = static_cast<uint8_t>((turb * 1.0 + 1.0) * 255.f * 0.5);
            pixels[lofs + 1] = pixels[rofs + 1] = static_cast<uint8_t>((turb * 0.7 + 1.0) * 255.f * 0.5);
            pixels[lofs + 2] = pixels[rofs + 2] = static_cast<uint8_t>((turb * 0.8 + 1.0) * 255.f * 0.5);
        }
    }
    return Image(width, height, 3, pixels);
}

Image buildColorTextureGrid(void) {
    vector<uint8_t> pixels(512 * 512 * 3, 0);
    for (int i = 0; i < 512; i++) {
        for (int j = 0; j < 512; j++) {
            int x = i / 64, y = j / 64;
            int ofs = 3 * (i * 512 + j);
            if ((x + y) & 1)
                pixels[ofs] = pixels[ofs + 1] = 250;
            else
                pixels[ofs + 1] = pixels[ofs + 2] = 128;
        }
    }
    return Image(512, 512, 3, pixels);
}

Image buildNormalTextureFrom(const Image *src) {
    vector<uint8_t> pixels;
    pixels.reserve(3 * src->width * src->height);
    for (int y = 0; y < src->height; y++) {
        for (int x = 0; x < src->width; x++) {
            float tl = getPixelAt(src, x - 1, y - 1).length();
            float t = getPixelAt(src, x - 1, y).length();
            float tr = getPixelAt(src, x - 1, y + 1).length();
            float r = getPixelAt(src, x, y + 1).length();
            float br = getPixelAt(src, x + 1, y + 1).length();
            float b = getPixelAt(src, x + 1, y).length();
            float bl = getPixelAt(src, x + 1, y - 1).length();
            float l = getPixelAt(src, x, y - 1).length();
            float dx = float(tr + 2.f * r + br) - float(tl + 2.f * l + bl);
            float dy = float(bl + 2.f * b + br) - float(tl + 2.f * t + tr);
            float dz = 1.f / NORMAL_STRENGTH;
            vec3 n = normalize(vec3(dx, dy, dz));
            pixels.push_back(static_cast<uint8_t>((n.x + 1.0) * 255.f * 0.5));
            pixels.push_back(static_cast<uint8_t>((n.y + 1.0) * 255.f * 0.5));
            pixels.push_back(static_cast<uint8_t>((n.z + 1.0) * 255.f * 0.5));
        }
    }
    return Image(src->width, src->height, 3, pixels);
}


///////////////////////////////////////////////////////////////////////////////////
// Modeling
///////////////////////////////////////////////////////////////////////////////////
struct Mesh {
    vector<vec3> pos;
    vector<vec3> normal;
    vector<vec3> tangent;
    vector<vec2> uv;
    vector<int>  index;
};

struct Plane : public Mesh {
    Plane(float size, int divs) {
        assert(size > 1.f);
        assert(divs > 0);
        float dxz = 2.f * size / divs;
        for (int i = 0; i <= divs; i++) {
            float z = i * dxz - size;
            for (int j = 0; j <= divs; j++) {
                float x = j * dxz - size;
                pos.push_back(vec3(x, -1E-2F, z));
                normal.push_back(vec3(0.f, 1.f, 0.f));
                tangent.push_back(vec3(1.f, 0.f, 0.f));
                uv.push_back(vec2(float(j) / float(divs), float(i) / float(divs)));
            }
        }
        for (int z = 0; z < divs; z++) {
            int start = z * (divs + 1);
            int next_start = (z + 1) * (divs + 1);
            for (int x = 0; x < divs; x++) {
                int next_x = x + 1;
                index.push_back(start + x);
                index.push_back(next_start + x);
                index.push_back(start + next_x);
                index.push_back(start + next_x);
                index.push_back(next_start + x);
                index.push_back(next_start + next_x);
            }
        }
    }
};

struct Vase : public Mesh {
    Vase(int y_edges) {
        assert(y_edges > 3);
        int x_edges = 2 * y_edges;
        float dx = 2.f * M_PI / x_edges;
        float dy = 2.f * M_PI / y_edges;
        for (int y = 0; y <= y_edges; y++) {
            float v = y * dy;
            float cv = cos(v);
            float sv = sin(v);
            float r = 2 + sv;
            for (int x = 0; x <= x_edges; x++) {
                float u = x * dx;
                float cu = cos(u);
                float su = sin(u);
                vec3 tang(cu * cv, 1, su * cv);
                vec3 biTangent(-su, 0, cu);
                pos.push_back(vec3(r * cu, v, r * su));
                tangent.push_back(tang);
                normal.push_back(normalize(cross(tang, biTangent)));
                uv.push_back(vec2(u / (2 * M_PI), v / (2 * M_PI)));
            }
        }
        for (int y = 0; y < y_edges; y++) {
            int start = y * (x_edges + 1);
            int next_start = (y + 1) * (x_edges + 1);
            for (int x = 0; x < x_edges; x++) {
                int next_x = x + 1;
                index.push_back(start + x);
                index.push_back(next_start + x);
                index.push_back(start + next_x);
                index.push_back(start + next_x);
                index.push_back(next_start + x);
                index.push_back(next_start + next_x);
            }
        }
        pos.push_back(vec3());
        tangent.push_back(vec3(1.f, 0.f, 0.f));
        normal.push_back(vec3(0.f, -1.f, 0.f));
        uv.push_back(vec2(0.5, 0.5));
        for (int x = 0; x < x_edges; x++) {
            index.push_back((int)pos.size() - 1);
            index.push_back(x);
            index.push_back(x + 1);
            normal[x] = vec3(0.f, -1.f, 0.f);
        }
        normal[x_edges] = vec3(0.f, -1.f, 0.f);
    }
};


///////////////////////////////////////////////////////////////////////////////////
// Orbit Camera, events process
///////////////////////////////////////////////////////////////////////////////////
uint8_t activeColorTexture = 1u;    // 'C' key
uint8_t activeNormalTexture = 0u;   // 'N' key
uint8_t activeDispTexture = 0u;     // 'B' key
uint8_t activeLineMode = 0u;        // 'L' key
uint8_t cullFace = 0u;              // 'K' key 
uint8_t selectShader = 0u;          // SPACE key.

uint8_t correctCentric = 1u;        // 'A' key
uint8_t clipType = CLIP_HOMO;       // 'F' key.  0:homogeneous clipping, 1:raw homogeneous clip, 2:screen clipping
uint8_t showVase = 1u;              // 'V' key
uint8_t showPlane = 1u;             // 'P' key

uint8_t curClipType = clipType;     // when rendering a frame, the states should not be changed

class Camera {
    vec3  pos, target;
    float zoom, aspect;
    mat4  projection, view;
public:
    Camera(vec3 position, vec3 center, float asp) : pos(position), target(center), zoom(25.f), aspect(asp) {
        updateProjection();
        updateView();
    }
    void processInput() {
        ExMessage msg;
        static bool firstClick = true;
        static int lastx, lasty;
        while (peekmessage(&msg, EM_MOUSE | EM_KEY)) {
            // Drag Left Button: Rotate
            if (WM_MOUSEMOVE == msg.message) {
                if (false == msg.lbutton) {
                    firstClick = true;
                }
                else if (firstClick) {
                    firstClick = false;
                    lastx = msg.x;
                    lasty = msg.y;
                }
                else {
                    float xoff = 0.5f * (msg.x - lastx);
                    float yoff = 0.5f * (msg.y - lasty);
                    lastx = msg.x;
                    lasty = msg.y;
                    mouseMove(xoff, yoff);
                }
            }
            else if (WM_MOUSEWHEEL == msg.message) {
                if (msg.lbutton)    // Left Button + Scroll: Camera Forward or Backward, have a bug
                    srcollPosition(-0.01f * msg.wheel);
                else                // Scroll: field of view
                    scrollZoom(0.01f * msg.wheel);
            }
            else if (WM_KEYDOWN == msg.message) {
                switch (msg.vkcode) {
                case VK_SPACE: selectShader = (selectShader + 1) % 3;    break;
                case 'F':      clipType = (clipType + 1) % 3;            break;
                case 'C':      activeColorTexture ^= 1u;                 break;
                case 'N':      activeNormalTexture ^= 1u;                break;
                case 'B':      activeDispTexture ^= 1u;                  break;
                case 'L':      activeLineMode ^= 1u;                     break;
                case 'K':      cullFace ^= 1u;                           break;
                case 'A':      correctCentric ^= 1u;                     break;
                case 'V':      showVase ^= 1u;                           break;
                case 'P':      showPlane ^= 1u;                          break;
                default:                                                 break;
                }
            }
        }
    }
    mat4& viewMatrix()        { return view; }
    mat4& projectionMatrix()  { return projection;}
    vec3& position()          { return pos; }
    float fov()               { return zoom; }
private:
    void updateProjection() {
        float t = Z_NEAR * tan(RADIANS(0.5f * zoom));
        float b = -t;
        float r = t * aspect;
        float l = -r;
        projection[0] = vec4(2 * Z_NEAR / (r - l), 0.f, 0.f, 0.f);
        projection[1] = vec4(0.f, 2 * Z_NEAR / (t - b), 0.f, 0.f);
        projection[2] = vec4((r + l) / (r - l), (t + b) / (t - b), -(Z_FAR - Z_NEAR) / (Z_FAR - Z_NEAR), -1.f);
        projection[3] = vec4(0.f, 0.f, -2 * Z_FAR * Z_NEAR / (Z_FAR - Z_NEAR), 0.f);
    }
    void updateView() {
        vec3 z_axis = normalize(pos - target);
        vec3 x_axis = normalize(cross(WORLD_UP, z_axis));
        vec3 y_axis = normalize(cross(z_axis, x_axis));

        view[0] = vec4(x_axis.x, y_axis.x, z_axis.x, 0.f);
        view[1] = vec4(x_axis.y, y_axis.y, z_axis.y, 0.f);
        view[2] = vec4(x_axis.z, y_axis.z, z_axis.z, 0.f);
        view[3] = vec4(-dot(x_axis, pos), -dot(y_axis, pos), -dot(z_axis, pos), 1.f);
    }

    void mouseMove(float xoff, float yoff) {
        vec3 front = normalize(target - pos);
        vec3 right = normalize(cross(front, WORLD_UP));
        vec3 up = normalize(cross(right, front));
        float angle = DEGREES(acos(dot(front, WORLD_UP)));
        if ((yoff < 0 && (angle + yoff < 20)) || (yoff > 0 && (angle + yoff > 160)))
            return;

        quat qx(-xoff, up);
        pos = qx.rotate(pos - target) + target;
        front = normalize(target - pos);
        right = normalize(cross(front, up));

        quat qy(-yoff, right);
        pos = qy.rotate(pos - target) + target;
        updateView();
    }

    void srcollPosition(float yoff) {
        vec3 front = normalize(target - pos);
        vec3 newPos = pos + yoff * front;
        vec3 pt = target - newPos;
        float len = pt.length();
        if (dot(pt, front) <= 0 || len < 3.5f)
            return;
        pos = newPos;
        updateView();
    }
    
    void scrollZoom(float yoff) {
        if (zoom + yoff > 120 || zoom + yoff < 10)
            return;
        zoom += yoff;
        updateProjection();
    }
};

///////////////////////////////////////////////////////////////////////////////////
// Rendering
///////////////////////////////////////////////////////////////////////////////////
struct Vertex {
    vec3 pos, normal, tangent, color;
    vec2 uv;
    Vertex() {}
    Vertex(vec3 p, vec3 n, vec3 t, vec3 c, vec2 u) : pos(p), normal(n), tangent(t), color(c), uv(u) {}

    Vertex operator+(const Vertex &b) const {
        return Vertex(pos + b.pos, normal + b.normal, tangent + b.tangent, color + b.color, uv + b.uv);
    }
    Vertex operator-(const Vertex& b) const {
        return Vertex(pos - b.pos, normal - b.normal, tangent - b.tangent, color - b.color, uv - b.uv);
    }
    Vertex operator*(float t) {
        return Vertex(pos * t, normal * t, tangent * t, color * t, uv * t);
    }
};

typedef void (*VERTEX_SHADER)(const Vertex& in, Vertex& out, vec4& mvpPos);
typedef vec3(*FRAGMENT_SHADER)(const Vertex& in);

struct Program {
    bool              flatHint;
    VERTEX_SHADER     vertexShader;
    FRAGMENT_SHADER   fragmentShader;
    Program(VERTEX_SHADER vert = nullptr, FRAGMENT_SHADER frag = nullptr, bool isFlat = false) : 
            vertexShader(vert), fragmentShader(frag), flatHint(isFlat) {}
};

struct Light {
    vec3  dir;      // Light direction in eye coords.
    float La;       // Ambient light intensity
    float Ld;       // Diffuse light intensity
    float Ls;       // Specular light intensity
    Light(vec3 d = vec3(0.f, 1.f, 1.f), float la = 0.03f, float ld = 0.2f, float ls = 0.8f) : 
            dir(d), La(la), Ld(ld), Ls(ls) {}
};

// Sharing of uniform data between shader programs.
struct perFrameData {
    mat4 modelViewMatrix;
    mat4 projectionMatrix;
    mat4 normalMatrix;
    mat4 viewportMatrix;
    mat4 MVP;

    Image *colorMap;
    Image *normalMap;
    Image *dispMap;

    Light light[4] = {
        Light(vec3( 1.0f,  1.f, 1.f)), 
        Light(vec3(-2.0f, -2.f, 1.f)), 
        Light(vec3(-0.5f,  0.f, 1.f)),
        Light(vec3(-1.0f,  0.f, 0.f)) 
    };
} uniform;

void setMatrixUniform(mat4 &m, mat4 &v, mat4 &p) {
    float w2 = WIDTH / 2.f;
    float h2 = HEIGHT / 2.f;
    uniform.modelViewMatrix = v * m;
    uniform.projectionMatrix = p;
    uniform.normalMatrix = transposed(inverse(uniform.modelViewMatrix));
    uniform.viewportMatrix = mat4(vec4(w2, 0.f, 0.f, 0.f), vec4(0.f, h2, 0.f, 0.f), vec4(0.f, 0.f, 1.f, 0.f), vec4(w2 - 0.5f, h2 - 0.5f, 0.f, 1.f));
    uniform.MVP = p * uniform.modelViewMatrix;
}

void clamp(int& value, int l, int r) {
    value = min(value, r);
    value = max(value, l);
}

#define CLIP_FUNC(name, sign, dir, dir1, dir2) \
float name(vec4 *a, vec4 *b) {\
    float t, dx, dy, dz, dw, den;\
    dx = (b->x - a->x);\
    dy = (b->y - a->y);\
    dz = (b->z - a->z);\
    dw = (b->w - a->w);\
    den = -(sign d ## dir) + dw;\
    if (den == 0) t=0;\
    else t = (sign a->dir - a->w) / den;\
    return t;\
}

CLIP_FUNC(clipXmin, -, x, y, z)
CLIP_FUNC(clipXmax, +, x, y, z)
CLIP_FUNC(clipYmin, -, y, x, z)
CLIP_FUNC(clipYmax, +, y, x, z)
CLIP_FUNC(clipZmin, -, z, x, y)
CLIP_FUNC(clipZmax, +, z, x, y)

float (*clipProc[6])(vec4*, vec4*) = {
    clipXmin, clipXmax,
    clipYmin, clipYmax,
    clipZmin, clipZmax
};

class Render {
    DWORD*          framebuf;
    VERTEX_SHADER   vertexShader;
    FRAGMENT_SHADER fragmentShader;
    vector<float>   zbuf;
    int             drawMode;
    bool            flatHint;
    // every mesh with a draw Context
    struct Context {
        int             vertices;
        vector<Vertex>  VertexIn;
        vector<Vertex>  VertexOut;
        vector<vec4>    mvpPos;
        vector<int>     index;
        Image           colorMap;
        Image           normalMap;
        Image           dispMap;
    };

    vector<Context> ctx;

public:
    Render() : vertexShader(nullptr), fragmentShader(nullptr), drawMode(MODE_FILL), flatHint(false) { 
        initgraph(WIDTH, HEIGHT);
        setbkcolor(77u << 16 | 76u << 8 | 51u);     // RGB
        framebuf = GetImageBuffer();
        assert(framebuf);
        zbuf.resize(WIDTH * HEIGHT);
    }

    int createBuffersWith(Mesh *msh) {
        ctx.push_back(Context());
        int vao = ctx.size() - 1;
        assert(vao > -1);
        assert(msh);
        ctx[vao].index = msh->index;
        ctx[vao].vertices = (int)msh->pos.size();
        ctx[vao].mvpPos.resize(ctx[vao].vertices);
        ctx[vao].VertexIn.resize(ctx[vao].vertices);
        ctx[vao].VertexOut.resize(ctx[vao].vertices);
        for (int i = 0; i < ctx[vao].vertices; i++) {
            ctx[vao].VertexIn[i].pos = msh->pos[i];
            ctx[vao].VertexIn[i].normal = msh->normal[i];
            ctx[vao].VertexIn[i].tangent = msh->tangent[i];
            ctx[vao].VertexIn[i].uv = msh->uv[i];
        }
        return vao;
    }

    void useProgram(Program& shader) {
        flatHint = shader.flatHint;
        vertexShader = shader.vertexShader;
        fragmentShader = shader.fragmentShader;
    }

    void clear(void) {
        std::fill_n(framebuf, WIDTH * HEIGHT, 51u << 16 | 76u << 8 | 77u);  // BGRA
        std::fill_n(zbuf.begin(), zbuf.size(), FLT_MAX);
    }

    void drawElements(int vao) {
        assert(vao > -1);
        useTexture(vao);
        curClipType = clipType;
        vector<Vertex>& VertexIn = ctx[vao].VertexIn;
        vector<Vertex>& VertexOut = ctx[vao].VertexOut;
        vector<vec4>& mvpPos = ctx[vao].mvpPos;
        vector<int>& index = ctx[vao].index;
        int& vertices = ctx[vao].vertices;

        #pragma omp parallel for
        for (int i = 0; i < vertices; i++)
            vertexShader(VertexIn[i], VertexOut[i], mvpPos[i]);
        
        #pragma omp parallel for
        for (int i = 0; i < (int)index.size(); i += 3) {
            Vertex vert[3] = { VertexOut[index[i]], VertexOut[index[i + 1]], VertexOut[index[i + 2]] };
            vec4 v[3] = {mvpPos[index[i]], mvpPos[index[i + 1]], mvpPos[index[i + 2]] };

            int clp[3] = { clipCode(v[0]), clipCode(v[1]), clipCode(v[2]) };
            if (curClipType == CLIP_HOMO)
                clipTriangle(vert, v, clp, 0);
            else {
                if ((curClipType == CLIP_HOMO_RAW) && (clp[0] | clp[1] | clp[2]))
                    continue;
                drawTriangle(vert, v);
            }

        }
    }

    void bindTexture(int vao, const Image& colorMap, const Image& normalMap, const Image& dispMap) {
        assert(vao != -1);
        ctx[vao].colorMap = colorMap;
        ctx[vao].normalMap = normalMap;
        ctx[vao].dispMap = dispMap;
    }

    void polygonMode(int drawhint) {
        drawMode = drawhint;
    }

private:
    void useTexture(int vao) {
        assert(vao != -1);
        uniform.colorMap = &ctx[vao].colorMap;
        uniform.normalMap = &ctx[vao].normalMap;
        uniform.dispMap = &ctx[vao].dispMap;
    }

    void drawTriangle(Vertex *vert, vec4 *v) {
        float hw[3] = { 0 };
        for (int i = 0; i < 3; i++) {
            v[i] = uniform.viewportMatrix * v[i];
            hw[i] = 1.f / v[i].w;
            v[i] = v[i] * hw[i];
        }

        if (cullFace && cross(v[1].to_vec3() - v[0].to_vec3(), v[2].to_vec3() - v[0].to_vec3()).z <= 0)
            return;

        if (activeLineMode) {
            drawLine(v[0], v[1]);
            drawLine(v[0], v[2]);
            drawLine(v[1], v[2]);
            return;
        }

        int left = (int)min(v[0].x, min(v[1].x, v[2].x));
        int right = (int)(1 + max(v[0].x, max(v[1].x, v[2].x)));
        int bottom = (int)min(v[0].y, min(v[1].y, v[2].y));
        int top = (int)(1 + max(v[0].y, max(v[1].y, v[2].y)));

        if (curClipType == CLIP_SCRN) {
            clamp(left, 0, WIDTH);    clamp(right, 0, WIDTH);
            clamp(bottom, 0, HEIGHT); clamp(top, 0, HEIGHT);
        }

        for (int x = left; x < right; x++) {
            for (int y = bottom; y < top; y++) {
                vec3 centric = barycentric(x, y, v);
                if (insideTriangle(centric)) {
                    if (flatHint) {
                        float z = v[1].z;
                        if (z < zbuf[indexOf(x, y)]) {
                            zbuf[indexOf(x, y)] = z;
                            setpixel(x, y, fragmentShader(vert[1]));
                        }
                    }
                    else {
                        float& a = centric.x, & b = centric.y, & c = centric.z;
                        // depth value interpolated linearly
                        float z = a * v[0].z + b * v[1].z + c * v[2].z;
                        if (correctCentric) {
                            perspectiveCorrect(centric, hw);
                        }
                        if (z < zbuf[indexOf(x, y)]) {
                            Vertex fragIn;
                            zbuf[indexOf(x, y)] = z;
                            fragIn.pos = a * vert[0].pos + b * vert[1].pos + c * vert[2].pos;
                            fragIn.normal = a * vert[0].normal + b * vert[1].normal + c * vert[2].normal;
                            fragIn.tangent = a * vert[0].tangent + b * vert[1].tangent + c * vert[2].tangent;
                            fragIn.color = a * vert[0].color + b * vert[1].color + c * vert[2].color;
                            fragIn.uv = a * vert[0].uv + b * vert[1].uv + c * vert[2].uv;
                            setpixel(x, y, fragmentShader(fragIn));
                        }
                    }
                }
            }
        }
    }


    void clipTriangle(Vertex *vert, vec4 *v, int *clp, int clipBit) {
        Vertex tmp1, tmp2, ver[3];
        vec4 vtmp1, vtmp2, ve[3];
        float t = 0.f;

        int co = clp[0] | clp[1] | clp[2];
        if (co == 0) {
            drawTriangle(vert, v);
        }
        else {
            int ca = clp[0] & clp[1] & clp[2];
            if (ca) return;     // the triangle is completely outside 
            while (clipBit < 6 && (co & (1 << clipBit)) == 0)
                clipBit++;
            if (clipBit == 6)   return;
            int clipMask = 1 << clipBit;
            int col = (clp[0] ^ clp[1] ^ clp[2]) & clipMask;
            if (col) {
                // one point outside, make p0 outside first
                if (clp[0] & clipMask) {
                    ver[0] = vert[0], ver[1] = vert[1], ver[2] = vert[2];
                    ve[0] = v[0], ve[1] = v[1], ve[2] = v[2];
                }
                else if (clp[1] & clipMask) {
                    ver[0] = vert[1], ver[1] = vert[2], ver[2] = vert[0];
                    ve[0] = v[1], ve[1] = v[2], ve[2] = v[0];
                }
                else {
                    ver[0] = vert[2], ver[1] = vert[0], ver[2] = vert[1];
                    ve[0] = v[2], ve[1] = v[0], ve[2] = v[1];
                }

                t = clipProc[clipBit](&ve[0], &ve[1]);
                tmp1 = lerp(ver[0], ver[1], t);
                vtmp1 = lerp(ve[0], ve[1], t);

                t = clipProc[clipBit](&ve[2], &ve[0]);
                tmp2 = lerp(ver[2], ver[0], t);
                vtmp2 = lerp(ve[2], ve[0], t);

                Vertex vert1[3] = { ver[1], ver[2], tmp1 };
                vec4 v1[3] = { ve[1], ve[2], vtmp1 };
                int cl1[3] = { clipCode(v1[0]), clipCode(v1[1]), clipCode(v1[2]) };
                clipTriangle(vert1, v1, cl1, clipBit + 1);

                Vertex vert2[3] = { tmp2, tmp1, ver[2]};
                vec4 v2[3] = { vtmp2, vtmp1, ve[2]};
                int cl2[3] = { clipCode(v2[0]), clipCode(v2[1]), clipCode(v2[2]) };
                clipTriangle(vert2, v2, cl2, clipBit + 1);
            }
            else {
                // two points outside, make sure p0 inside first
                if ((clp[0] & clipMask) == 0) {
                    ver[0] = vert[0], ver[1] = vert[1], ver[2] = vert[2];
                    ve[0] = v[0], ve[1] = v[1], ve[2] = v[2];
                }
                else if ((clp[1] & clipMask) == 0) {
                    ver[0] = vert[1], ver[1] = vert[2], ver[2] = vert[0];
                    ve[0] = v[1], ve[1] = v[2], ve[2] = v[0];
                }
                else {
                    ver[0] = vert[2], ver[1] = vert[0], ver[2] = vert[1];
                    ve[0] = v[2], ve[1] = v[0], ve[2] = v[1];
                }

                t = clipProc[clipBit](&ve[0], &ve[1]);
                tmp1 = lerp(ver[0], ver[1], t);
                vtmp1 = lerp(ve[0], ve[1], t);

                t = clipProc[clipBit](&ve[2], &ve[0]);
                tmp2 = lerp(ver[2], ver[0], t);
                vtmp2 = lerp(ve[2], ve[0], t);

                Vertex vert1[3] = { ver[0], tmp1, tmp2};
                vec4 v1[3] = { ve[0], vtmp1, vtmp2};
                int cl1[3] = { clipCode(v1[0]), clipCode(v1[1]), clipCode(v1[2]) };
                clipTriangle(vert1, v1, cl1, clipBit + 1);
            }
        }
    }

    int clipCode(vec4 &v) {
        float w = v.w * (1.0f + 1E-5f);
        return ((v.x < -w) << 0)| ((v.x > w) << 1) |
            ((v.y < -w) << 2) | ((v.y > w) << 3) |
            ((v.z < -w) << 4) | ((v.z > w) << 5);
    }

    int indexOf(int x, int y) {
        return (HEIGHT - 1 - y) * WIDTH + x;
    }

    void setpixel(int x, int y, vec3 fragColor) {
        uint8_t r = (uint8_t)max(0.f, min(255.f, 255.f * fragColor.x));
        uint8_t g = (uint8_t)max(0.f, min(255.f, 255.f * fragColor.y));
        uint8_t b = (uint8_t)max(0.f, min(255.f, 255.f * fragColor.z));
        framebuf[indexOf(x, y)] = 0Xffu << 24 | r << 16 | g << 8 | b; // BGRA
    }

    bool insideTriangle(vec3& centric) {
        return  centric.x >= 0.f && centric.x <= 1.f && \
                centric.y >= 0.f && centric.y <= 1.f && \
                centric.z >= 0.f && centric.z <= 1.f;
    }
     
    vec3 barycentric(int x, int y, const vec4* _v) {
        vec2 p(static_cast<float>(x), static_cast<float>(y));
        vec2 a(_v[0].x, _v[0].y), b(_v[1].x, _v[1].y), c(_v[2].x, _v[2].y);
        vec2 v0 = b - a, v1 = c - a, v2 = p - a;
        float div = 1.f / (v0.x * v1.y - v1.x * v0.y);
        float v = div * (v2.x * v1.y - v1.x * v2.y);
        float w = div * (v0.x * v2.y - v2.x * v0.y);
        float u = 1.f - v - w;
        return vec3(u,v,w);
    }

    void drawLine(vec4 begin, vec4 end) {
        vec3 lineColor(0.45F, 0.73F, 0.19F);
        int x0 = int(begin.x);
        int y0 = int(begin.y);
        int x1 = int(end.x);
        int y1 = int(end.y);
        if (curClipType == CLIP_SCRN)
            if (x0 < 0 || x0 >= WIDTH || y0 < 0 || y0 >= HEIGHT || x1 < 0 || x1 >= WIDTH || y1 < 0 || y1 >= HEIGHT)
                return;
        int dx = abs(x1 - x0), sx = x0 < x1 ? 1 : -1;
        int dy = abs(y1 - y0), sy = y0 < y1 ? 1 : -1;
        int err = (dx > dy ? dx : -dy) / 2;
        while (setpixel(x0, y0, lineColor), x0 != x1 || y0 != y1) {
            int e2 = err;
            if (e2 > -dx) { err -= dy; x0 += sx; }
            if (e2 < dy) { err += dx; y0 += sy; }
        }
    }

    // https://www.khronos.org/registry/OpenGL/specs/gl/glspec46.core.pdf  page 479
    void perspectiveCorrect(vec3& centric, const float *hw) {
        float &x = centric.x, &y = centric.y, &z = centric.z;
        float a = x * hw[0], b = y * hw[1], c = z * hw[2];
        float d = 1.f / (a + b + c);
        x = a * d;
        y = b * d;
        z = c * d;
    }
};


///////////////////////////////////////////////////////////////////////////////////
// Shaders
///////////////////////////////////////////////////////////////////////////////////
// Common functions for all shaders
vec3 updatePosition(const vec3& old, const vec3& normal, const vec2& uv) {
    vec3 out = old;
    vec3 h = texture(uniform.dispMap, uv);
    float hs = (h.x + h.y + h.z) / 3.f;
    float t = 2 * hs - 1;
    return old - DISPACE_STRENGTH * t * normal;
}

vec3 updateNormal(vec3 Normal, vec3 Tangent, vec2 uv) {
    vec3 N = normalize(Normal);
    vec3 T = normalize(Tangent - N * dot(Tangent, N));
    vec3 B = normalize(cross(N, T)); 
    mat3 TBN(T, B, N);
    vec3 normal = 2.0 * texture(uniform.normalMap, uv) - 1.f;
    return normalize(TBN * normal);
}

vec3 blinnPhongColor(const vec3 &pos, const vec3 &n, const vec2 &uv) {
    vec3 color(0.3f, 0.3f, 0.3f);
    if (activeColorTexture)
        color = texture(uniform.colorMap, uv);
    float la = 0.f;
    vec3 ambient, diffuse, specular;
    for (auto &l : uniform.light) {
        ambient = ambient + l.La * color;
        vec3 s = l.dir;
        float sDotN = max(dot(s, n), 0.f);
        diffuse = diffuse + l.Ld * sDotN * color;
        if (sDotN > 0.0) {
            vec3 v = normalize(-pos);
            vec3 h = normalize(v + s);
            specular = specular + l.Ls * powf(max(dot(h, n), 0), 64) * vec3(0.8f, 0.8f, 0.8f);
        }
    }
    return ambient + diffuse + specular;
}

///////////////////////////////////////////////////////////////////////////////////
// 1. Blinn-Phong Shading: shade each pixel
///////////////////////////////////////////////////////////////////////////////////
void phongVertShader(const Vertex& in, Vertex& out, vec4& mvpPos) {
    vec3 pos = in.pos;
    if (activeDispTexture)
        pos = updatePosition(in.pos, in.normal, in.uv);
    out.pos = (uniform.modelViewMatrix * vec4(pos)).to_vec3();
    out.normal = normalize((uniform.normalMatrix * vec4(in.normal, 0.f)).to_vec3());
    out.tangent = normalize((uniform.normalMatrix * vec4(in.tangent, 0.f)).to_vec3());
    out.uv = in.uv;
    mvpPos = uniform.MVP * vec4(pos);
}

vec3 phongFragShader(const Vertex& in) {
    vec3 N = normalize(in.normal);
    if (activeNormalTexture)
        N = updateNormal(N, in.tangent, in.uv);
    // two-sided shading
    vec3 view = normalize(-in.pos);
    float vDotN = dot(view, N);
    if (vDotN <= 0)
       N = -N;
    return blinnPhongColor(in.pos, N, in.uv);
}


///////////////////////////////////////////////////////////////////////////////////
// 2. Gouraud Shading: shade each vertext
///////////////////////////////////////////////////////////////////////////////////
void gouraudVertShader(const Vertex& in, Vertex& out, vec4& mvpPos) {
    vec3 pos = in.pos;
    if (activeDispTexture)
        pos = updatePosition(in.pos, in.normal, in.uv);
    pos = (uniform.modelViewMatrix * vec4(pos)).to_vec3();
    vec3 N = normalize((uniform.normalMatrix * vec4(in.normal, 0.f)).to_vec3());
    vec3 T = normalize((uniform.normalMatrix * vec4(in.tangent, 0.f)).to_vec3());
    if (activeNormalTexture)
        N = updateNormal(N, T, in.uv);
    vec3 view = normalize(-pos);
    float vDotN = dot(view, N);
    if (vDotN < 0)
       N = -N;
    out.color = blinnPhongColor(pos, N, in.uv);
    mvpPos = uniform.projectionMatrix * vec4(pos);
}

vec3 gouraudFragShader(const Vertex& in) {
    return in.color;
}

///////////////////////////////////////////////////////////////////////////////////
// 3. Flat Shading: shade each triangle
///////////////////////////////////////////////////////////////////////////////////
void flatVertShader(const Vertex& in, Vertex& out, vec4& mvpPos) {
    return gouraudVertShader(in, out, mvpPos);
}
vec3 flatFragShader(const Vertex& in) {
    return gouraudFragShader(in);
}

int main() {
    TCHAR  buf[128];
    size_t vn1, tn1, vn2, tn2, vn, tn;
    double deltaTime = 0.f;
    double lastTime = 0.f;
    double currentTime = 0.f;
    uint64_t start, now, frequency;

    Render render;
    Camera camera(vec3(0.f, 15.f, 30.f), vec3(0.f, M_PI, 0.f), (float)WIDTH / (float)HEIGHT);

    Mesh* vase = new Vase(64);
    Image colorMap = buildColorTexture();
    Image normalMap = buildNormalTextureFrom(&colorMap);
    int vaoVase = render.createBuffersWith(vase);
    render.bindTexture(vaoVase, colorMap, normalMap, colorMap);
    vn1 = vase->pos.size();
    tn1 = vase->index.size() / 3;
    delete vase; 
    vase = nullptr;

    Mesh* plane = new Plane(5.f, 1);
    colorMap = buildColorTextureGrid();
    normalMap = buildNormalTextureFrom(&colorMap);
    int vaoPlane = render.createBuffersWith(plane);
    render.bindTexture(vaoPlane, colorMap, normalMap, colorMap);
    vn2 = plane->pos.size();
    tn2 = plane->index.size() / 3;
    delete plane; 
    plane = nullptr;

    Program blinnPhong(phongVertShader, phongFragShader);
    Program gouraud(gouraudVertShader, gouraudFragShader);
    Program flat(flatVertShader, flatFragShader, true);
    Program shaders[3] = {blinnPhong, gouraud, flat};
    const TCHAR  *shaderName[3] = {_T("blinnPhong"), _T("gouraud"), _T("flat")};

    QueryPerformanceCounter((LARGE_INTEGER*)&start);
    QueryPerformanceFrequency((LARGE_INTEGER*)&frequency);
    BeginBatchDraw();
    while (1) {
        vn = tn = 0;
        QueryPerformanceCounter((LARGE_INTEGER*)&now);
        currentTime = (double)(now - start) / frequency;
        deltaTime = currentTime - lastTime;
        lastTime = currentTime;
        mat4 m = rotateY(currentTime);

        camera.processInput();
        setMatrixUniform(m, camera.viewMatrix(), camera.projectionMatrix());
        render.polygonMode(activeLineMode);

        render.clear();
        render.useProgram(shaders[selectShader]);
        if (showVase) {
            render.drawElements(vaoVase);
            vn += vn1;
            tn += tn1;
        }
        if (showPlane) {
            render.drawElements(vaoPlane);
            vn += vn2;
            tn += tn2;
        }

        outtextxy(WIDTH / 2, 10, shaderName[selectShader]);
        swprintf_s(buf, _T("  vertices   : %zu"), vn);
        outtextxy(0, 0, buf);
        swprintf_s(buf, _T("  triangles  : %zu"), tn);
        outtextxy(0, 16, buf);
        outtextxy(0, 32, L"  Scroll       : fov");
        outtextxy(0, 48, L"  Drag LButton    : Rotate");
        outtextxy(0, 64, L"  LButton + Scroll: Forward / Backward");
        outtextxy(0, 80, L"  Press [C]: Color texture");
        outtextxy(0, 96, L"  Press [N]: Normal texture");
        outtextxy(0, 112, L"  Press [B]: Bump texture");
        outtextxy(0, 128, L"  Press [L]: Line Mode");
        outtextxy(0, 144, L"  Press [K]: Cull Face");
        outtextxy(0, 160, L"  Press [Space] : Shaders");
        swprintf_s(buf, _T("  Camera Position: (%.3f, %.3f, %.3f)"), camera.position().x, camera.position().y, camera.position().z);
        outtextxy(0, 176, buf);
        swprintf_s(buf, _T("  Camera Fov: %.3f"), camera.fov());
        outtextxy(0, 192, buf);
        swprintf_s(buf, _T("fps  : %d"), (int)(1.f / deltaTime));
        outtextxy(WIDTH - 96, 0, buf);
        swprintf_s(buf, _T("time: %.1fms"), 1000 * deltaTime);
        outtextxy(WIDTH - 96, 16, buf);
        FlushBatchDraw();
    }
    return 0;
}

添加评论