extends Node @export_subgroup("Output", "output") @export var output_mesh_instance : MultiMeshInstance3D @export_subgroup("Structure", "structure") @export var structure_boid_count : int = 76 @export var structure_starting_cube_size : float = 32. @export_subgroup("Capacities", "capacity") @export var capacity_max_velocity : float = 13.0 : set(value) : capacity_max_velocity = value; need_behavior_update = true @export var capacity_acceleration : float = 26.0: set(value) : capacity_acceleration = value; need_behavior_update = true @export_subgroup("Behavior", "behavior") @export var behavior_attraction_distance : float = 16.: set(value) : behavior_attraction_distance = value; need_behavior_update = true @export var behavior_repulsion_distance : float = 1.: set(value) : behavior_repulsion_distance = value; need_behavior_update = true @export var behavior_separation : float = 4.2: set(value) : behavior_separation = value; need_behavior_update = true @export var behavior_alignment : float = 1.6: set(value) : behavior_alignment = value; need_behavior_update = true @export var behavior_cohesion : float = 1.: set(value) : behavior_cohesion = value; need_behavior_update = true @export var behavior_purpose : float = 1.6: set(value) : behavior_purpose = value; need_behavior_update = true @export_subgroup("Interactor", "interactor") @export var interactor_attractor_node : Node3D @export var interactor_collider_node : Area3D # GPU Version variables @onready var rd : RenderingDevice @onready var shader_rid : RID @onready var flock_buffer : RID @onready var flock_behavior_buffer : RID @onready var flock_structure_buffer : RID @onready var flock_runtime_buffer : RID @onready var flock_result_buffer : RID @onready var flock_obstacle_buffer : RID @onready var uniform_set : RID @onready var pipeline : RID @onready var flock_runtime := PackedFloat32Array() # Update data @onready var need_behavior_update := false func _ready() -> void: assert(output_mesh_instance != null and output_mesh_instance.multimesh.transform_format == MultiMesh.TransformFormat.TRANSFORM_3D) output_mesh_instance.multimesh.set_instance_count(structure_boid_count) output_mesh_instance.multimesh.set_visible_instance_count(structure_boid_count) _init_compute_shader() func _notification(what: int) -> void: if what == NOTIFICATION_PREDELETE: rd.free_rid(flock_buffer) rd.free_rid(flock_behavior_buffer) rd.free_rid(flock_structure_buffer) rd.free_rid(flock_runtime_buffer) rd.free_rid(flock_obstacle_buffer) rd.free_rid(uniform_set) rd.free_rid(shader_rid) rd.free_rid(pipeline) func _init_compute_shader() -> void: #rd = RenderingServer.create_local_rendering_device() rd = RenderingServer.get_rendering_device() var shader_code : String = "#version 450 layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in; struct Boid { vec3 position[2]; vec3 speed[2]; }; layout(set = 0, binding = 0, std430) restrict buffer Flock { Boid boids[]; } flock; layout(binding = 1, std140) uniform Behavior { float attraction_distance; float repulsion_distance; float separation; float alignment; float cohesion; float purpose; float acceleration; float max_velocity; }; layout(binding = 2, std140) uniform CurrentState { vec3 target; float delta; float swap; }; layout(binding = 3, std140) uniform StructureInfo { int flock_size; // Number of boids in flock int plane_count; // Number of collision planes int sphere_count; // Number of collision spheres int is_using_vertex_color; // The multimesh is using vertex color int is_using_custom_data; // The multimesh is using custom data }; layout(set = 0, binding = 4, std430) writeonly buffer Output { float m[]; // Transformation matrix and (possibly) additional info. } result; layout(set = 0, binding = 5, std430) readonly buffer CollisionObjects { vec4 info[]; } colliders; void main() { float ratio = separation + alignment + cohesion + purpose; vec3 cohesion_acc = vec3(0.); vec3 alignment_acc = vec3(0.); uint cohesion_count = 0; vec3 separation_acc = vec3(0.); uint separation_count = 0; uint i = gl_GlobalInvocationID.x; if (i > flock_size) { return; } uint src = swap < 0 ? 0 : 1; uint dst = 1 - src; vec3 p = flock.boids[i].position[src]; vec3 v = flock.boids[i].speed[src]; for(int k = 0; k < flock_size; ++k) { if(k == i) continue; vec3 f = flock.boids[k].position[src]; vec3 diff = f - p; float dist = length(diff); if (dist < attraction_distance) { if (dist < repulsion_distance) { separation_acc += f; separation_count += 1; } else { cohesion_acc += f; alignment_acc += flock.boids[k].speed[src]; cohesion_count += 1; } } } vec3 cohesion_v = (cohesion_count == 0) ? vec3(0.) : normalize((cohesion_acc / float(cohesion_count)) - p); vec3 alignment_v = (cohesion_count == 0) ? vec3(0.) : normalize(alignment_acc / float(cohesion_count)); vec3 separation_v = (separation_count == 0) ? vec3(0.) : normalize(p - (separation_acc / float(separation_count))); vec3 direction_v = normalize(target - p); vec3 acceleration = normalize( ( direction_v * purpose + cohesion_v * cohesion + separation_v * separation + alignment_v * alignment) / ratio) * acceleration * delta; v += acceleration; float speed = length(v); vec3 nv = normalize(v); if (speed > max_velocity) { v = nv * max_velocity; speed = max_velocity; } float prog_threshold = speed * delta * 10.; // We will hit in the 10 next frames. Should be a parameter. for(int k = 0; k < plane_count; ++k) { vec4 plane = colliders.info[k]; vec3 plane_normal = plane.xyz; float det = - dot(nv, plane_normal); if (det > 0.) { float t = dot(plane.xyz * plane.w - p, plane.xyz) / det; if (t > 0. && t < prog_threshold) { nv = nv - 2. * dot(nv, plane_normal) * plane_normal; v = nv * speed; } } } for(int k = plane_count; k < plane_count + sphere_count; ++k) { vec4 sphere = colliders.info[k]; vec3 center = sphere.xyz; float radius = sphere.w; vec3 m = p - center; float b = dot(m, nv); float c = dot(m, m) - radius * radius; if (c <= 0. || b <= 0.) { float d = b * b - c; if (d > 0.) { float t = - b - sqrt(d); if (t > 0. && t < prog_threshold) { vec3 q = p + t * nv; vec3 sphere_normal = normalize(q - center); vec3 tn = cross(nv, sphere_normal); nv = cross(sphere_normal, tn); v = nv * speed; } } } } p += v * delta; flock.boids[i].position[dst] = p; flock.boids[i].speed[dst] = v; // Compute the transformation matrix. vec3 v_z = nv; vec3 v_x = normalize(cross(vec3(0., 1., 0.), v_z)); vec3 v_y = cross(v_z, v_x); uint r_o = i * (12 + (is_using_vertex_color == 1 ? 4 : 0) + (is_using_custom_data == 1 ? 4 : 0)); result.m[r_o + 0] = v_x.x; result.m[r_o + 1] = v_y.x; result.m[r_o + 2] = v_z.x; result.m[r_o + 3] = p.x; result.m[r_o + 4] = v_x.y; result.m[r_o + 5] = v_y.y; result.m[r_o + 6] = v_z.y; result.m[r_o + 7] = p.y; result.m[r_o + 8] = v_x.z; result.m[r_o + 9] = v_y.z; result.m[r_o + 10] = v_z.z; result.m[r_o + 11] = p.z; } " var shader_source := RDShaderSource.new() shader_source.language = RenderingDevice.SHADER_LANGUAGE_GLSL shader_source.source_compute = shader_code var shader_spirv := rd.shader_compile_spirv_from_source(shader_source) if shader_spirv.compile_error_compute != "": push_error(shader_spirv.compile_error_compute) push_error("In: " + shader_code) assert(false) shader_rid = rd.shader_create_from_spirv(shader_spirv) if not shader_rid.is_valid(): print("Invalid Shader") return var uniforms : Array[RDUniform] = [] ## Flock info ## var flock_info := PackedFloat32Array() flock_info.resize(structure_boid_count * 16) var sz : float = structure_starting_cube_size / 2. for i in range(structure_boid_count): var v : Vector3 = Vector3(randf_range(-sz, sz), randf_range(-sz, sz), randf_range(-sz, sz)) var k := i * 16 flock_info[k + 0] = v.x; flock_info[k + 1] = v.y; flock_info[k + 2] = v.z; flock_info[k + 3] = 1. flock_info[k + 4] = v.x; flock_info[k + 5] = v.y; flock_info[k + 6] = v.z; flock_info[k + 7] = 1. flock_info[k + 8] = 0.01; flock_info[k + 9] = 0.0; flock_info[k +10] = 0.0; flock_info[k +11] = 1. flock_info[k +12] = 0.01; flock_info[k +13] = 0.0; flock_info[k +14] = 0.0; flock_info[k +15] = 1. output_mesh_instance.multimesh.set_instance_transform(i, Transform3D.IDENTITY.translated(v)) var flock_info_bytes := flock_info.to_byte_array() flock_buffer = _register_uniform(rd, 0, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, uniforms, flock_info_bytes.size(), flock_info_bytes) ## Flock behavior ## flock_behavior_buffer = _register_uniform(rd, 1, RenderingDevice.UNIFORM_TYPE_UNIFORM_BUFFER, uniforms, 8 * 4) need_behavior_update = true ## Flock Runtime Info ## flock_runtime.resize(8) flock_runtime[0] = 0. ; flock_runtime[1] = 0. ; flock_runtime[2] = 0.; flock_runtime[3] = 0.016; flock_runtime[4] = 1. ; var flock_runtime_bytes := flock_runtime.to_byte_array() flock_runtime_buffer = _register_uniform(rd, 2, RenderingDevice.UNIFORM_TYPE_UNIFORM_BUFFER, uniforms, flock_runtime_bytes.size(), flock_runtime_bytes) ## Obstacle buffer ## var collision_info := PackedVector4Array() var objects_count := _get_collision_plane_count(collision_info) var collision_bytes := collision_info.to_byte_array() flock_obstacle_buffer = _register_uniform(rd, 5, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, uniforms, 8 if collision_bytes.size() == 0 else collision_bytes.size(), collision_bytes) ## Flock Structure ## var flock_structure := PackedInt32Array() flock_structure.resize(8) # Needed for uniform buffer structure alignment flock_structure[0] = structure_boid_count; flock_structure[1] = objects_count["plane"] ; flock_structure[2] = objects_count["sphere"] flock_structure[3] = 1 if output_mesh_instance.multimesh.use_colors else 0 flock_structure[4] = 1 if output_mesh_instance.multimesh.use_custom_data else 0 var flock_structure_bytes := flock_structure.to_byte_array() flock_structure_buffer = _register_uniform(rd, 3, RenderingDevice.UNIFORM_TYPE_UNIFORM_BUFFER, uniforms, flock_structure_bytes.size(), flock_structure_bytes) ## Flock result (goes directly into the multimesh ! Thx Jason Knight for the precious tip ! ## var multimesh_rid := output_mesh_instance.multimesh.get_rid() var multimesh_buffer_rid := RenderingServer.multimesh_get_buffer_rd_rid(multimesh_rid) assert(multimesh_buffer_rid.is_valid()) var multimesh_out_uniform := RDUniform.new() multimesh_out_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER multimesh_out_uniform.binding = 4 multimesh_out_uniform.add_id(multimesh_buffer_rid) uniforms.append(multimesh_out_uniform) ## And finally, initialize the uniform set ## uniform_set = rd.uniform_set_create(uniforms, shader_rid, 0) pipeline = rd.compute_pipeline_create(shader_rid) func _register_uniform(rendering_device : RenderingDevice, binding : int, type : RenderingDevice.UniformType, uniform_list : Array[RDUniform], size : int, bytes : PackedByteArray = PackedByteArray()) -> RID: var buffer_rid : RID if type == RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER: buffer_rid = rendering_device.storage_buffer_create(size, bytes) else: buffer_rid = rendering_device.uniform_buffer_create(size, bytes) var uniform := RDUniform.new() uniform.uniform_type = type uniform.binding = binding uniform.add_id(buffer_rid) uniform_list.append(uniform) return buffer_rid func _get_collision_plane_count(info : PackedVector4Array) -> Dictionary[String, int]: if interactor_collider_node == null: return { "plane" : 0, "sphere" : 0 } var plane_count : int = 0 var sphere_count : int = 0 var planes : Array[Vector4] = [] var spheres : Array[Vector4] = [] for c in interactor_collider_node.get_children(): if c is CollisionShape3D: # We only get planes. var collision_shape := c as CollisionShape3D match collision_shape.shape.get_class(): "WorldBoundaryShape3D": planes.append(_get_plane_info(collision_shape)) plane_count += 1 "SphereShape3D": spheres.append(_get_sphere_info(collision_shape)) sphere_count += 1 _: # Unmanaged type pass info.append_array(planes) info.append_array(spheres) return { "plane" : plane_count, "sphere" : sphere_count } func _get_plane_info(container : CollisionShape3D) -> Vector4: var shape := container.shape as WorldBoundaryShape3D var tp := container.transform * shape.plane var n : Vector3 = tp.normal var d : float = tp.d return Vector4(n.x, n.y, n.z, d) func _get_sphere_info(container : CollisionShape3D) -> Vector4: var shape := container.shape as SphereShape3D var p := container.position return Vector4(p.x, p.y, p.z, shape.radius) func _update_behavior_values() -> void: if need_behavior_update: var flock_behavior := PackedFloat32Array() flock_behavior.resize(8) flock_behavior[0] = behavior_attraction_distance flock_behavior[1] = behavior_repulsion_distance flock_behavior[2] = behavior_separation flock_behavior[3] = behavior_alignment flock_behavior[4] = behavior_cohesion flock_behavior[5] = behavior_purpose flock_behavior[6] = capacity_acceleration flock_behavior[7] = capacity_max_velocity var flock_behavior_bytes := flock_behavior.to_byte_array() rd.buffer_update(flock_behavior_buffer, 0, flock_behavior_bytes.size(), flock_behavior_bytes) need_behavior_update = false func _process(delta : float) -> void: _update_behavior_values() _update_shader_entry(delta) RenderingServer.call_on_render_thread(_run_shader) func _update_shader_entry(delta : float) -> void: var t := interactor_attractor_node.position flock_runtime[0] = t.x ; flock_runtime[1] = t.y ; flock_runtime[2] = t.z ; flock_runtime[3] = delta flock_runtime[4] = -flock_runtime[4] var flock_runtime_bytes := flock_runtime.to_byte_array() rd.buffer_update(flock_runtime_buffer, 0, flock_runtime_bytes.size(), flock_runtime_bytes) func _run_shader() -> void: var list := rd.compute_list_begin() rd.compute_list_bind_compute_pipeline(list, pipeline) rd.compute_list_bind_uniform_set(list, uniform_set, 0) @warning_ignore("integer_division") rd.compute_list_dispatch(list, structure_boid_count / 16, 1, 1) rd.compute_list_end()
or share this direct link: