Unity光源配置:Tiled And Cluster
unity最新的HDRP中采用了Cluster和Tiled的光源配置,这里主要记录光源是如何从Unity当中的Light配置设置到最终Shader的对应Pass当中的。
Cluster是把视锥体看做三维体素,每个体素当中计算影响这个体素的光线,就和Volumetric Fog一样的原理。
Tiled是把屏幕分成格子,对每个格子计算影响这个格子的光源。
Tiled光照列表的计算总共分为三步:
- 计算光照列表的所有凸包
- 使用凸包,计算bigtile的光照列表的大致计算。
- 使用fplt进行更精细的光照列表计算。
##Tile光照列表计算
下面是光源计算,为每一个Tile生成光照列表:
public void BuildGPULightListsCommon(HDCamera hdCamera, CommandBuffer cmd, RenderTargetIdentifier cameraDepthBufferRT, RenderTargetIdentifier stencilTextureRT, bool skyEnabled)
{
var camera = hdCamera.camera;
// 计算光照列表开始
cmd.BeginSample("Build Light List");
var w = (int)hdCamera.screenSize.x;
var h = (int)hdCamera.screenSize.y;
s_TempScreenDimArray[0] = w;
s_TempScreenDimArray[1] = h;
//首先设置Tile的大小 64,64
var numBigTilesX = (w + 63) / 64;
var numBigTilesY = (h + 63) / 64;
// 计算各种矩阵:
m_LightListProjMatrices[0] =
m_LightListProjscrMatrices[0] =
m_LightListInvProjscrMatrices[0] =
m_LightListProjHMatrices[0] = ;
m_LightListInvProjHMatrices[0] = ;
// 如果有光源 ,计算所有光源的AABB包围盒 :见shader:lightlistbuild-bigtile.compute
if (m_lightCount != 0)
{
//第一步计算AABB
var genAABBKernel = isProjectionOblique ? s_GenAABBKernel_Oblique : s_GenAABBKernel;
// 是否正交
cmd.SetComputeIntParam(buildScreenAABBShader, HDShaderIDs.g_isOrthographic, isOrthographic ? 1 : 0);
// 光源数量
cmd.SetComputeIntParam(buildScreenAABBShader, HDShaderIDs.g_iNrVisibLights, m_lightCount);
// 光源凸包数据
cmd.SetComputeBufferParam(buildScreenAABBShader, genAABBKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
// 矩阵数组
cmd.SetComputeMatrixArrayParam(buildScreenAABBShader, HDShaderIDs.g_mProjectionArr, m_LightListProjHMatrices);
cmd.SetComputeMatrixArrayParam(buildScreenAABBShader, HDShaderIDs.g_mInvProjectionArr, m_LightListInvProjHMatrices);
// AABB结果
cmd.SetComputeBufferParam(buildScreenAABBShader, genAABBKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
int tgY = (int)hdCamera.numEyes;
// 提交计算
cmd.DispatchCompute(buildScreenAABBShader, genAABBKernel, (m_lightCount + 7) / 8, tgY, 1);
}
// 粗糙的Tile计算。
// enable coarse 2D pass on 64x64 tiles (used for both fptl and clustered).
if (m_FrameSettings.lightLoopSettings.enableBigTilePrepass)
{
cmd.SetComputeIntParam(buildPerBigTileLightListShader, HDShaderIDs.g_iNrVisibLights, m_lightCount);
cmd.SetComputeIntParam(buildPerBigTileLightListShader, HDShaderIDs.g_isOrthographic, isOrthographic ? 1 : 0);
cmd.SetComputeIntParams(buildPerBigTileLightListShader, HDShaderIDs.g_viDimensions, s_TempScreenDimArray);
// TODO: These two aren't actually used...
cmd.SetComputeIntParam(buildPerBigTileLightListShader, HDShaderIDs._EnvLightIndexShift, m_lightList.lights.Count);
cmd.SetComputeIntParam(buildPerBigTileLightListShader, HDShaderIDs._DecalIndexShift, m_lightList.lights.Count + m_lightList.envLights.Count);
cmd.SetComputeMatrixArrayParam(buildPerBigTileLightListShader, HDShaderIDs.g_mScrProjectionArr, m_LightListProjscrMatrices);
cmd.SetComputeMatrixArrayParam(buildPerBigTileLightListShader, HDShaderIDs.g_mInvScrProjectionArr, m_LightListInvProjscrMatrices);
cmd.SetComputeFloatParam(buildPerBigTileLightListShader, HDShaderIDs.g_fNearPlane, camera.nearClipPlane);
cmd.SetComputeFloatParam(buildPerBigTileLightListShader, HDShaderIDs.g_fFarPlane, camera.farClipPlane);
cmd.SetComputeBufferParam(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, HDShaderIDs.g_vLightList, s_BigTileLightList);
cmd.SetComputeBufferParam(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.SetComputeBufferParam(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, HDShaderIDs._LightVolumeData, s_LightVolumeDataBuffer);
cmd.SetComputeBufferParam(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
int tgZ = (int)hdCamera.numEyes;
cmd.DispatchCompute(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, numBigTilesX, numBigTilesY, tgZ);
}
// fplt计算
// optimized for opaques only
if (m_FrameSettings.lightLoopSettings.isFptlEnabled)
{
var genListPerTileKernel = isProjectionOblique ? s_GenListPerTileKernel_Oblique : s_GenListPerTileKernel;
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs.g_isOrthographic, isOrthographic ? 1 : 0);
cmd.SetComputeIntParams(buildPerTileLightListShader, HDShaderIDs.g_viDimensions, s_TempScreenDimArray);
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs._EnvLightIndexShift, m_lightList.lights.Count);
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs._DecalIndexShift, m_lightList.lights.Count + m_lightList.envLights.Count);
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs.g_iNrVisibLights, m_lightCount);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs._LightVolumeData, s_LightVolumeDataBuffer);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
cmd.SetComputeMatrixParam(buildPerTileLightListShader, HDShaderIDs.g_mScrProjection, m_LightListProjscrMatrices[0]);
cmd.SetComputeMatrixParam(buildPerTileLightListShader, HDShaderIDs.g_mInvScrProjection, m_LightListInvProjscrMatrices[0]);
cmd.SetComputeTextureParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_depth_tex, cameraDepthBufferRT);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_vLightList, s_LightList);
if (m_FrameSettings.lightLoopSettings.enableBigTilePrepass)
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_vBigTileLightList, s_BigTileLightList);
if (enableFeatureVariants)
{
uint baseFeatureFlags = 0;
if (m_lightList.directionalLights.Count > 0)
{
baseFeatureFlags |= (uint)LightFeatureFlags.Directional;
}
if (skyEnabled)
{
baseFeatureFlags |= (uint)LightFeatureFlags.Sky;
}
if (!m_FrameSettings.lightLoopSettings.enableComputeMaterialVariants)
{
baseFeatureFlags |= LightDefinitions.s_MaterialFeatureMaskFlags;
}
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs.g_BaseFeatureFlags, (int)baseFeatureFlags);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_TileFeatureFlags, s_TileFeatureFlags);
}
cmd.DispatchCompute(buildPerTileLightListShader, genListPerTileKernel, numTilesX, numTilesY, 1);
}
// Cluster 计算
VoxelLightListGeneration(cmd, hdCamera, m_LightListProjscrMatrices, m_LightListInvProjscrMatrices, cameraDepthBufferRT);
// 计算光照列表结束
cmd.EndSample("Build Light List");
}
光源包围盒计算
计算屏幕空间光源包围盒的shader:scrbound.compute
原始代码中记录的包围盒的类型,中心和几个扩展方向
public struct SFiniteLightBound
{
public Vector3 boxAxisX;
public Vector3 boxAxisY;
public Vector3 boxAxisZ;
public Vector3 center; // a center in camera space inside the bounding volume of the light source.
public Vector2 scaleXY;
public float radius;
};
每个光源都有一个上面的结构体,从一个统一的结构体转换到屏幕对齐的AABB包围盒。
###BigTile光照列表计算
计算BigTile光照列表的compute shader:
使用了三种方法:
- 包围盒相交。
- 球形相交。
- CullByExactEdgeTests:这个还没有看。
// 根据NR_THREADS作为不同线程的采样间隔,计算互斥的光源信息
[numthreads(NR_THREADS, 1, 1)]
void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
/*
这里的计算是每个Tile一次提交,每个提交,多个Thread并行。,每个Thread处理不想交的一组灯光。
*/
// 这个是Tile的ID
uint eyeIndex = u3GroupID.z;
uint2 tileIDX = u3GroupID.xy;
// 这个是每个Tile当中的threadID
uint t=threadID;
// 计算每个Tile的实际大小。
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrBigTilesX = (iWidth+63)/64;
uint nrBigTilesY = (iHeight+63)/64;
// 线程共享,只让第一个线程进行初始化操作。
if(t==0) lightOffs = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
// Raw pixel coordinates of tile
uint2 viTilLL = 64*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
// 每个tile的包围盒
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
// 获取灯光包围盒信息
const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(l, g_iNrVisibLights, eyeIndex);
// 灯光的包围盒信息
const float2 vMi = g_vBoundsBuffer[boundsIndices.min].xy;
const float2 vMa = g_vBoundsBuffer[boundsIndices.max].xy;
// 灯光的包围盒和tile的包围盒是否相交
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;
// 原子求和操作,让lightOffs加1.
InterlockedAdd(lightOffs, uInc, uIndex);
// 通过加到当前Tile的灯光列表,uIndex是组间不同的。
if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l;
}
}
#if /*!defined(SHADER_API_XBOXONE) && */!defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
// 计算实际的光照数。
int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);
// 球形相交计算,如果球形没有相交, 则把lightlist中记录原来相交的id变成uint_max
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))), eyeIndex );
#endif
#ifdef EXACT_EDGE_TESTS
CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, eyeIndex);
#endif
// 灯光按类型排序
SORTLIST(lightsListLDS, iNrCoarseLights, MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE, t, NR_THREADS);
if(t==0) lightOffs = 0;
GroupMemoryBarrierWithGroupSync();
// 重新计算光照数量
int i;
for(i=t; i<iNrCoarseLights; i+=NR_THREADS)
if(lightsListLDS[i]<(uint)g_iNrVisibLights)
InterlockedAdd(lightOffs, 1);
GroupMemoryBarrierWithGroupSync();
iNrCoarseLights = lightOffs;
// 计算当前tile的索引
int offs = tileIDX.y*nrBigTilesX + tileIDX.x + (eyeIndex * nrBigTilesX * nrBigTilesY);
//最终的光照列表,放到记录光照的列表当中,所有的tile的光照列表存在一个大的tile当中。
for(i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
g_vLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*offs + i] = i==0 ? iNrCoarseLights : lightsListLDS[max(i-1, 0)];
}
FPTL
精准的判断每个tile当中的光照列表,这里使用了更加精细的tile划分(16*16),相对于bigtile还会对Z值进行比较。原本的Bigtile 只是用了屏幕方向的xy。
同时加入了光源类型相关的操作。
shader使用:lightlistbuild.compute
compute Shader使用的kernel确定:
if (GetFeatureVariantsEnabled())
{
s_GenListPerTileKernel = buildPerTileLightListShader.FindKernel(m_FrameSettings.lightLoopSettings.enableBigTilePrepass ? "TileLightListGen_SrcBigTile_FeatureFlags" : "TileLightListGen_FeatureFlags");
s_GenListPerTileKernel_Oblique = buildPerTileLightListShader.FindKernel(m_FrameSettings.lightLoopSettings.enableBigTilePrepass ? "TileLightListGen_SrcBigTile_FeatureFlags_Oblique" : "TileLightListGen_FeatureFlags_Oblique");
}
else
{
s_GenListPerTileKernel = buildPerTileLightListShader.FindKernel(m_FrameSettings.lightLoopSettings.enableBigTilePrepass ? "TileLightListGen_SrcBigTile" : "TileLightListGen");
s_GenListPerTileKernel_Oblique = buildPerTileLightListShader.FindKernel(m_FrameSettings.lightLoopSettings.enableBigTilePrepass ? "TileLightListGen_SrcBigTile_Oblique" : "TileLightListGen_Oblique");
}
FPTLshader
[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
if(t<MAX_NR_COARSE_ENTRIES)
prunedList[t]=0;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
uint nrTiles = nrTilesX * nrTilesY; // Precompute?
// build tile scr boundary
const uint uFltMax = 0x7f7fffff; // FLT_MAX as a uint
if(t==0)
{
ldsZMin = uFltMax;
ldsZMax = 0;
lightOffs = 0;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
uint2 viTilLL = 16*tileIDX;
// establish min and max depth first
float dpt_mi=asfloat(uFltMax), dpt_ma=0.0;
// 计算包含深度的包围盒。
float4 vLinDepths;
{
// Fetch depths and calculate min/max
UNITY_UNROLL
for(int i = 0; i < 4; i++)
{
int idx = i * NR_THREADS + t;
uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
const float fDepth = FetchDepth(g_depth_tex, uCrd);
vLinDepths[i] = GetLinearDepth(uCrd+float2(0.5,0.5), fDepth);
if(fDepth<VIEWPORT_SCALE_Z) // if not skydome
{
dpt_mi = min(fDepth, dpt_mi);
dpt_ma = max(fDepth, dpt_ma);
}
}
InterlockedMax(ldsZMax, asuint(dpt_ma));
InterlockedMin(ldsZMin, asuint(dpt_mi));
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
}
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
// 使用aabb计算粗糙的光照列表。
// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
const uint log2BigTileToTileRatio = firstbithigh(64) - firstbithigh(16);
int NrBigTilesX = (nrTilesX+((1<<log2BigTileToTileRatio)-1))>>log2BigTileToTileRatio;
const int bigTileIdx = (tileIDX.y>>log2BigTileToTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToTileRatio); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
{
int l = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+l0+1];
#else
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
// Skip density volumes (lights are sorted by category). TODO: improve data locality
if (_LightVolumeData[l].lightCategory == LIGHTCATEGORY_DENSITY_VOLUME) { break; }
const float3 vMi = g_vBoundsBuffer[l].xyz;
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xyz;
// 包围盒在xyz方向上同时判断相交。
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
}
}
// 粗糙的光照列表计算结束
// FPTL进行精细调整。
#ifdef FINE_PRUNING_ENABLED
if(t<2) ldsDoesLightIntersect[t] = 0;
#endif
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
// 球形相交和 bigtile类似
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifndef FINE_PRUNING_ENABLED
{
// 修改保存位置
if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t];
if(t==0) ldsNrLightsFinal=iNrCoarseLights;
}
#else
{
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths);
}
#endif
// 光照类型相关,t是线程号。每个线程对应一个类型。
if(t<CATEGORY_LIST_SIZE) ldsCategoryListCount[t]=0;
#ifdef USE_FEATURE_FLAGS
if(t==0) ldsFeatureFlags=0;
#endif
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
// 统计每种类型光源的数量
int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
{
InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
#ifdef USE_FEATURE_FLAGS
InterlockedOr(ldsFeatureFlags, _LightVolumeData[prunedList[i]].featureFlags);
#endif
}
// 光照排序,减少在计算光照过程中分支的情况
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
#endif
#ifdef USE_FEATURE_FLAGS
if(t == 0)
{
uint featureFlags = ldsFeatureFlags | g_BaseFeatureFlags;
// In case of back
if(ldsZMax < ldsZMin) // is background pixel
{
// There is no stencil usage with compute path, featureFlags set to 0 is use to have fast rejection of tile in this case. It will still execute but will do nothing
featureFlags = 0;
}
g_TileFeatureFlags[tileIDX.y * nrTilesX + tileIDX.x] = featureFlags;
}
#endif
// write lights to global buffers
int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
// All our cull data are in the same list, but at render time envLights are separated so we need to shift the index
// to make it work correctly
int shiftIndex[CATEGORY_LIST_SIZE];
ZERO_INITIALIZE_ARRAY(int, shiftIndex, CATEGORY_LIST_SIZE);
shiftIndex[CATEGORY_LIST_SIZE - 2] = _EnvLightIndexShift;
shiftIndex[CATEGORY_LIST_SIZE - 1] = _DecalIndexShift;
//将光源按照类型顺序写入到全局结果中。
for(int category=0; category<CATEGORY_LIST_SIZE; category++)
{
int nrLightsFinal = ldsCategoryListCount[category];
int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES;
const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
{
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[max(0,2 * l - 1 + localOffs)] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
}
localOffs += nrLightsFinal;
offs += (nrTilesX*nrTilesY);
}
}