Changes - SailfishOS Open Build Service

Changes of Revision 32

We truncated the diff of some files because they were too big. If you want to see the full diff for every file, click here.

[-] [+]	Changed	_service:tar_git:mesa.spec
@@ -16,7 +16,7 @@ Name: mesa Summary: Mesa graphics libraries -Version: 23.3.1+git6 +Version: 23.3.5+git1 Release: 0 Group: System/Libraries License: MIT
[-] [+]	Changed	_service ^
@@ -2,7 +2,7 @@ <service name="tar_git"> <param name="url">https://github.com/sailfish-on-dontbeevil/mesa</param> <param name="branch">master</param> - <param name="revision">23.3.1+git6</param> + <param name="revision">23.3.5+git1</param> <param name="token"/> <param name="debian">N</param> <param name="dumb">N</param>
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/.gitlab-ci/test/gitlab-ci.yml ^
@@ -380,7 +380,6 @@ echo "export SCRIPTS_DIR=./install" >> ${JOB_FOLDER}/set-job-env-vars.sh echo "Variables passed through:" cat ${JOB_FOLDER}/set-job-env-vars.sh - echo "export CI_JOB_JWT=${CI_JOB_JWT}" >> ${JOB_FOLDER}/set-job-env-vars.sh set -x # Copy the mesa install tarball to the job folder, for later extraction
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/.pick_status.json ^
@@ -1,5 +1,19815 @@ [ { + "sha": "4cd5b2b5426e8d670fc3657eee040a79e3f9df1e", + "description": "intel/hasvk: assume() we don't get ISL_NUM_FORMATS", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "3d4ef6f983fa78c3d6f361ab2b5a3409e6c9d09d", + "description": "intel/vulkan: assume() that we don't use \"ISL_NUM_FORMATS\"", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "529e7ab9755c33c5c59438f3b58b5cfdc2eeffc5", + "description": "lavapipe: RM2024 extension promotions", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "1c01fd028616c755fdac59016b17b07933a416ed", + "description": "util/disk_cache: Use secure_getenv to determine cache directories", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "e8b0e5cac9062f9f290a46460279eaa3eb0c60a8", + "description": "radv: Use secure_getenv for RADV_THREAD_TRACE_TRIGGER", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "c01a07f2e47bc778ff6faf665b98be5556c77e77", + "description": "radv: Use secure_getenv in radv_builtin_cache_path", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "72f95a8364c018ed833aab171f8d5fa65145cb10", + "description": "mesa/main: Use secure_getenv for shader dumping", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "321e2cee5315e94c050f8659a8cd55e0e7cd9076", + "description": "vtn: Use secure_getenv for shader dumping", + "nominated": true, + "nomination_type": 0, + "resolution": 2, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "f3b892b74ab7db998dd24d8443803ba9dc20f8a6", + "description": "aux/trace: Guard triggers behind __normal_user", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "7ea96ff75b771ea8eb48d2b9fec05e5edc958b21", + "description": "vulkan: Use secure_getenv for trigger files", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "8b209a62006ad6fd4593bb4f528ce8aee23fc038", + "description": "util: Provide a secure_getenv fallback for platforms without it", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "0fa4ea98ca70646f445552fce3e1912655b34274", + "description": "ci: always skip dEQP-VK.info.device_extensions", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "4824238dd901b57e2f804b38fdc88e2d1a533c4f", + "description": "zink: Fix return type and values of create_buffer and create_images", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "main_sha": null, + "because_sha": "f6383e03f9c8d56ffc76f014175fc9697bd66945", + "notes": null + }, + { + "sha": "c309d2017230e657fd042b9b9dd7acd1c621d2c5", + "description": "aco/insert_exec_mask: Fix unconditional demote at top-level control flow.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "4097df29f6e716155fae17f0ec5ce38fa3ec2a96", + "description": "nvk: allow 3d compressed textures", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "9ddf647eabebd2e346c2bcb5e68e198ecca650ac", + "description": "nvk: Fix whitespace in nvk_image.c", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "e8bec58de0138ba1e6118b45b1c0240e25cdf11b", + "description": "nil: Set the level offset to 0 in nil_image_for_level", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null + }, + { + "sha": "445aacb4217cbf5fb7be604c5484eb84c3c06497", + "description": "clc: retrieve libclang path at runtime.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "main_sha": null, + "because_sha": "e22491c83265200f518e9fb4deff54e3c2823b68", + "notes": null + }, + { + "sha": "8efd11fce99710757b14cb575f33778f730ec904", + "description": "clc: force fPIC for every user when using shared LLVM", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "main_sha": null, + "because_sha": "e22491c83265200f518e9fb4deff54e3c2823b68", + "notes": null + }, + { + "sha": "37a13463478703e42e590b8d23a221604653b384", + "description": "meson: remove opencl-external-clang-headers option and rely on shared-llvm", + "nominated": false, + "nomination_type": 3, + "resolution": 4, + "main_sha": null, + "because_sha": null,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/VERSION ^
@@ -1 +1 @@ -23.3.1 +23.3.5
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/bin/symbols-check.py ^
@@ -7,6 +7,7 @@ # This list contains symbols that _might_ be exported for some platforms PLATFORM_SYMBOLS = [ + '_GLOBAL_OFFSET_TABLE_', '__bss_end__', '__bss_start__', '__bss_start',
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes.rst ^
@@ -3,6 +3,10 @@ The release notes summarize what's new or changed in each Mesa release. +- :doc:`23.3.5 release notes <relnotes/23.3.5>` +- :doc:`23.3.4 release notes <relnotes/23.3.4>` +- :doc:`23.3.3 release notes <relnotes/23.3.3>` +- :doc:`23.3.2 release notes <relnotes/23.3.2>` - :doc:`23.3.1 release notes <relnotes/23.3.1>` - :doc:`23.3.0 release notes <relnotes/23.3.0>` - :doc:`23.1.9 release notes <relnotes/23.1.9>` @@ -404,6 +408,10 @@ :maxdepth: 1 :hidden: + 23.3.5 <relnotes/23.3.5> + 23.3.4 <relnotes/23.3.4> + 23.3.3 <relnotes/23.3.3> + 23.3.2 <relnotes/23.3.2> 23.3.1 <relnotes/23.3.1> 23.3.0 <relnotes/23.3.0> 23.1.9 <relnotes/23.1.9>
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.1.rst ^
@@ -19,7 +19,7 @@ :: - TBD. + 6e48126d70fdb3f20ffeb246ca0c2e41ffdc835f0663a03d4526b8bf5db41de6 mesa-23.3.1.tar.xz New features
[-] [+]	Added	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.2.rst ^
@@ -0,0 +1,177 @@ +Mesa 23.3.2 Release Notes / 2023-12-27 +====================================== + +Mesa 23.3.2 is a bug fix release which fixes bugs found since the 23.3.1 release. + +Mesa 23.3.2 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 23.3.2 implements the Vulkan 1.3 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + 3cfcb81fa16f89c56abe3855d2637d396ee4e03849b659000a6b8e5f57e69adc mesa-23.3.2.tar.xz + + +New features +------------ + +- None + + +Bug fixes +--------- + +- anv: glcts regression on zink +- nir: Trivial loop not unrolling +- Possible regression with AMD GPU with flatpak apps +- Compiling Mesa with X in custom prefix fails in Intel Vulkan driver +- radv/aco: Crysis 2 Remastered RT reflections are blocky around the edges with ACO, renders normally with LLVM + + +Changes +------- + +Bas Nieuwenhuizen (1): + +- radv: Use correct writemask for cooperative matrix ordering. + +Boris Brezillon (3): + +- util/hash_table: Use FREE() to be consistent with the CALLOC_STRUCT() call +- util/hash_table: Don't leak hash_u64_key objects when the entry exists +- util/hash_table: Don't leak hash_key_u64 objects when the u64 hash table is destroyed + +Christian Gmeiner (2): + +- etnaviv: Update headers from rnndb +- etnaviv: Add static_assert(..) to catch memory corruption + +Dave Airlie (1): + +- intel/compiler: move gen5 final pass to actually be final pass + +David Heidelberg (2): + +- ci/freedreno: timestamp-get no longer fails on Adreno +- ci/freedreno: fail introduced by ARB_post_depth_coverage + +Eric Engestrom (10): + +- docs: add sha256sum for 23.3.1 +- .pick_status.json: Update to d761871761e5fe7d498b0cc818ed627698ed1225 +- .pick_status.json: Update to 377c6b2d45ee73da3e5431846a3b4bfdd7ae2b83 +- ci/b2c: drop passthrough of unset CI_JOB_JWT +- .pick_status.json: Updates notes for 6a92af158dc132eee449c175bdee66d92c68d191 +- vulkan/wsi: fix build when platform headers are installed in non-standard locations +- .pick_status.json: Update to 670a799ebff9a98daafccf49324c2a01311b0c41 +- .pick_status.json: Update to e61fae6eb8ae1ae1228d6f89329324310db808ae +- .pick_status.json: Update to 1e6fcd6a611574241b1cde306afcc416a03ac76b +- .pick_status.json: Update to 55c262898ae7188311c89a60e4ec0fbb67b7a95b + +Faith Ekstrand (1): + +- nir: Scalarize bounds checked loads and stores + +Friedrich Vock (2): + +- radv,vtn,driconf: Add and use radv_rt_ssbo_non_uniform workaround for Crysis 2/3 Remastered +- radv/rt: Initialize unused children in PLOC early-exit + +George Ouzounoudis (1): + +- vulkan: Fix dynamic graphics state enum usage + +Gert Wollny (1): + +- r600/sfn: keep workgroup and invocation ID registers for whole shader + +Jesse Natalie (1): + +- d3d12: Only destroy the winsys during screen destruction, not reset + +Jonathan Gray (1): + +- intel/common: add directory prefix to intel_gem.h include + +José Expósito (1): + +- egl/glx: fallback to software when Zink is forced and fails + +Karol Herbst (4): + +- rusticl/kernel: explicitly set rounding modes +- rusticl: do not warn on empty RUSTICL_DEBUG or RUSTICL_FEATURES +- rusticl: silence clippy::arc-with-non-send-sync for now +- rusticl: check rustc version for flags requiring newer rustc/clippy + +Kenneth Graunke (3): + +- iris: Initialize bo->index to -1 when importing buffers +- iris: Don't search the exec list if BOs have never been added to one +- iris: Skip mi_builder init for indirect draws + +Lionel Landwerlin (4): + +- nir/clone: fix missing printf_info clone +- nir/divergence: handle printf intrinsic +- anv: fix incorrect queue_family access on command buffer +- anv: wait for CS write completion before executing secondary + +Michel Dänzer (2): + +- gallium/dri: Return __DRI_ATTRIB_SWAP_UNDEFINED for _SWAP_METHOD +- glx: Handle IGNORE_GLX_SWAP_METHOD_OML regardless of GLX_USE_APPLEGL + +Pierre-Eric Pelloux-Prayer (4): + +- radeonsi/sqtt: fix RGP pm4 state emit function +- radeonsi/sqtt: clear record_counts variable +- radeonsi/sqtt: rework pm4.reg_va_low_idx +- radeonsi/sqtt: use calloc instead of malloc + +Robert Foss (1): + +- egl/surfaceless: Fix EGL_DEVICE_EXT implementation + +Sil Vilerino (1): + +- d3d12: Fix AV1 video encode 32 bits build + +Sviatoslav Peleshko (2): + +- nir/loop_analyze: Don't test non-positive iterations count +- intel/fs: Don't optimize DW*1 MUL if it stores value to the accumulator + +Tapani Pälli (5): + +- anv/hasvk/drirc: change anv_assume_full_subgroups to have subgroup size +- drirc: setup anv_assume_full_subgroups=16 for UnrealEngine5.1 +- iris: use intel_needs_workaround with 14015055625 +- mesa: fix enum support for EXT_clip_cull_distance +- drirc/anv: disable FCV optimization for Baldur's Gate 3 + +Timothy Arceri (1): + +- radeonsi: fix divide by zero in si_get_small_prim_cull_info() + +Vinson Lee (1): + +- etnaviv: Remove duplicate initializers + +Yiwei Zhang (1): + +- vulkan/wsi/wayland: ensure drm modifiers stored in chain are immutable + +Yonggang Luo (1): + +- dzn: Fixes -Werror=incompatible-pointer-type
[-] [+]	Added	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.3.rst ^
@@ -0,0 +1,155 @@ +Mesa 23.3.3 Release Notes / 2024-01-10 +====================================== + +Mesa 23.3.3 is a bug fix release which fixes bugs found since the 23.3.2 release. + +Mesa 23.3.3 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 23.3.3 implements the Vulkan 1.3 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + 518307c0057fa3cee8b58df78be431d4df5aafa7edc60d09278b2d7a0a80f3b4 mesa-23.3.3.tar.xz + + +New features +------------ + +- None + + +Bug fixes +--------- + +- Error during SPIR-V parsing of OpCopyLogical +- radv: Atlas Fallen corrupted rendering +- intel: Require 64KB alignment when using CCS and multiple engines +- 23.3.2 regression: kms_swrast_dri.so segfaults +- Mesa is not compatible with Python 3.12 due to use of distutils +- anv: importing memory for a compressed image using modifier is hitting an assert + + +Changes +------- + +Connor Abbott (1): + +- ir3/legalize: Fix helper propagation with b.any/b.all/getone + +Daniel Schürmann (1): + +- nir/opt_move_discards_to_top: don't schedule discard/demote across subgroup operations + +Dave Airlie (5): + +- gallivm: handle llvm 16 atexit ordering problems. +- intel/compiler: fix release build unused variable. +- llvmpipe: fix caching for texture shaders. +- intel/compiler: reemit boolean resolve for inverted if on gen5 +- radv: don't emit cp dma packets on video rings. + +Eric Engestrom (13): + +- docs: add sha256sum for 23.3.2 +- .pick_status.json: Mark eb5bb5c784e97c533e30b348e82e446ac0da59c8 as denominated +- .pick_status.json: Update to ebee672ef87794f3f4201270623a92f34e62b8ff +- .pick_status.json: Mark 060439bdf0e74f0f2e255d0a81b5356f9a2f5457 as denominated +- .pick_status.json: Mark 8d0e70f628b745ad81124e0c3fe5e46ea84f6b46 as denominated +- .pick_status.json: Update to 39c8cca34fb72db055df18abf1d473e099f4b05b +- .pick_status.json: Update to 2c078bfd18cae0ed1a0a3916020e49fb74668504 +- .pick_status.json: Update to e2a7c877ad1fd6bda4032f707eea7646e5229969 +- .pick_status.json: Update to 031978933151e95690e93919e7bfd9f1753f2794 +- .pick_status.json: Mark fbe4e16db2d369c3e54067d17f81bdce8661a461 as denominated +- .pick_status.json: Mark b38c776690c9c39b04c57d74f9b036de56995aff as denominated +- .pick_status.json: Update to f6d2df5a7542025022e69b81dbe3af3e51ea5cd3 +- .pick_status.json: Update to 67ad1142cf6afe61de834cefeddb4be06382899f + +Erik Faye-Lund (2): + +- zink: update profile schema +- zink: use KHR version of maint5 features + +Friedrich Vock (1): + +- radv/rt: Free traversal NIR after compilation + +Georg Lehmann (1): + +- aco: fix applying input modifiers to DPP8 + +Jonathan Gray (1): + +- zink: put sysmacros.h include under #ifdef MAJOR_IN_SYSMACROS + +José Roberto de Souza (2): + +- anv: Assume that imported bos already have flat CCS requirements satisfied +- anv: Increase ANV_MAX_QUEUE_FAMILIES + +Karol Herbst (2): + +- zink: lock screen queue on context_destroy and CreateSwapchain +- zink: fix heap-use-after-free on batch_state with sub-allocated pipe_resources + +Konstantin Seurer (2): + +- vtn: Remove transpose(m0)*m1 fast path +- vtn: Allow for OpCopyLogical with different but compatible types + +Leo Liu (1): + +- gallium/vl: match YUYV/UYVY swizzle with change of color channels + +Lionel Landwerlin (2): + +- isl: implement Wa_22015614752 +- intel/fs: fix depth compute state for unchanged depth layout + +Marek Olšák (1): + +- glthread: don't unroll draws using user VBOs with GLES + +Mary Guillemard (2): + +- zink: Initialize pQueueFamilyIndices for image query / create +- zink: Always fill external_only in zink_query_dmabuf_modifiers + +Mike Blumenkrantz (1): + +- zink: enforce maxTexelBufferElements for texel buffer sizing + +Rhys Perry (1): + +- aco/tests: use more raw strings + +Samuel Pitoiset (2): + +- radv: fix binding partial depth/stencil views with dynamic rendering +- radv: disable stencil test without a stencil attachment + +Sil Vilerino (2): + +- Revert "d3d12: Only destroy the winsys during screen destruction, not reset" +- Revert "d3d12: Fix screen->winsys leak in d3d12_screen" + +Vinson Lee (1): + +- ac/rgp: Fix single-bit-bitfield-constant-conversion warning + +Yonggang Luo (1): + +- meson: Support for both packaging and distutils + +antonino (1): + +- egl: only check dri3 on X11
[-] [+]	Added	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.4.rst ^
@@ -0,0 +1,199 @@ +Mesa 23.3.4 Release Notes / 2024-01-24 +====================================== + +Mesa 23.3.4 is a bug fix release which fixes bugs found since the 23.3.3 release. + +Mesa 23.3.4 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 23.3.4 implements the Vulkan 1.3 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + df12d765be4650fe532860b18aa18e6da1d0b07d1a21dfdfe04660e6b7bac39a mesa-23.3.4.tar.xz + + +New features +------------ + +- None + + +Bug fixes +--------- + +- [AMDGPU RDNA3] Antialiasing is broken in Blender +- Assassin's Creed Odyssey wrong colors on Arc A770 +- The Finals fails to launch with DX12 on Intel Arc unless "force_vk_vendor" is set to -1. +- zink crashes on nvidia +- radv: games render with garbage output on RX5600M through PRIME with DCC +- radv: RGP reports for mesh shaders are confusing +- d3d10umd: Build failure regression with MSVC during 23.3 development cycle +- VA-API CI tests freeze +- Radeon: YUYV DMA BUF eglCreateImageKHR fails + + +Changes +------- + +Alessandro Astone (1): + +- zink: Fix resizable BAR detection logic + +Boris Brezillon (3): + +- panvk: Fix tracing +- panvk: Fix access to unitialized panvk_pipeline_layout::num_sets field +- panfrost: Clamp the render area to the damage region + +Daniel Schürmann (1): + +- aco: give spiller more room to assign spilled SGPRs to VGPRs + +Dave Airlie (2): + +- radv/video: refactor sq start/end code to avoid decode hangs. +- radv: don't submit empty command buffers on encoder ring. + +David Rosca (1): + +- radeonsi/vcn: Fix H264 slice header when encoding I frames + +Eric Engestrom (7): + +- docs: add sha256sum for 23.3.3 +- .pick_status.json: Update to 68f5277887aae1cdc202f45ecd44df2c3c59ba7d +- .pick_status.json: Update to 4fe5f06d400a7310ffc280761c27b036aec86646 +- .pick_status.json: Update to ff84aef116f9d0d13440fd13edf2ac0b69a8c132 +- .pick_status.json: Update to 6e4bb8253ed36f911a0a45dfecf89c237a8cd362 +- .pick_status.json: Update to d0a3bac163ca803eda03feb3afea80e516568caf +- .pick_status.json: Update to eca4f0f632b1e3e6e24bd12ee5f00522eb7d0fdb + +Friedrich Vock (4): + +- radv/rt: Add workaround to make leaves always active +- radv: Fix shader replay allocation condition +- nir: Make is_trivial_deref_cast public +- nir: Handle casts in nir_opt_copy_prop_vars + +Georg Lehmann (1): + +- aco: stop scheduling at p_logical_end + +Hans-Kristian Arntzen (1): + +- wsi/x11: Add workaround for Detroit Become Human. + +Ian Romanick (1): + +- intel/compiler: Track mue_compaction and mue_header_packing flags in brw_get_compiler_config_value + +Jesse Natalie (1): + +- mesa: Consider mesa format in addition to internal format for mip/cube completeness + +Karol Herbst (3): + +- rusticl/kernel: run opt/lower_memcpy later to fix a crash +- nir: rework and fix rotate lowering +- rusticl/kernel: check that local size on dispatch doesn't exceed limits + +Konstantin Seurer (4): + +- ac/llvm: Enable helper invocations for quad OPs +- lavapipe: Fix DGC vertex buffer handling +- lavapipe: Mark vertex elements dirty if the stride changed +- lavapipe: Report the correct preprocess buffer size + +Lionel Landwerlin (4): + +- anv: fix disabled Wa_14017076903/18022508906 +- anv: hide vendor ID for The Finals +- anv: fix pipeline executable properties with graphics libraries +- anv: implement undocumented tile cache flush requirements + +Lucas Stach (1): + +- etnaviv: disable 64bpp render/sampler formats + +Matt Turner (4): + +- symbols-check: Add _GLOBAL_OFFSET_TABLE_ +- nir: Fix cast +- util: Add DETECT_ARCH_HPPA macro +- util/tests: Disable half-float NaN test on hppa/old-mips + +Max R (1): + +- d3d10umd: Fix compilation + +Mike Blumenkrantz (5): + +- lavapipe: fix devenv icd filename +- zink: always force flushes when originating from api frontend +- zink: ignore tc buffer replacement info +- zink: fix buffer rebind early-out check +- zink: fix separate shader patch variable location adjustment + +Patrick Lerda (1): + +- glsl/nir: fix gl_nir_cross_validate_outputs_to_inputs() memory leak + +Pavel Ondračka (1): + +- r300: fix reusing of color varying slots for generic ones + +Pierre-Eric Pelloux-Prayer (2): + +- ac/surface: don't oversize surf_size +- radeonsi: compute epitch when modifying surf_pitch + +Rhys Perry (3): + +- radv: do nir_shader_gather_info after radv_nir_lower_rt_abi +- nir/lower_non_uniform: set non_uniform=false when lowering is not needed +- nir/lower_shader_calls: remove CF before nir_opt_if + +Samuel Pitoiset (2): + +- radv: do not issue SQTT marker with DISPATCH_MESH_INDIRECT_MULTI +- radv: fix indirect dispatches on the compute queue on GFX7 + +Sviatoslav Peleshko (1): + +- nir: Use alu source components count in nir_alu_srcs_negative_equal + +Tapani Pälli (4): + +- anv: check for wa 16013994831 in emit_so_memcpy_end +- iris: expand pre-hiz data cache flush to gfx >= 125 +- anv: expand pre-hiz data cache flush to gfx >= 125 +- iris: replace constant cache invalidate with hdc flush + +Tatsuyuki Ishi (1): + +- radv: never set DISABLE_WR_CONFIRM for CP DMA clears and copies + +Timur Kristóf (1): + +- radv: Correctly select SDMA support for PRIME blit. + +Yiwei Zhang (4): + +- vulkan/wsi/wayland: fix returns and avoid leaks for failed swapchain +- venus: fix pipeline layout lifetime +- venus: fix pipeline derivatives +- venus: fix to respect the final pipeline layout + +Yonggang Luo (1): + +- compiler/spirv: The spirv shader is binary, should write in binary mode
[-] [+]	Added	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.5.rst ^
@@ -0,0 +1,154 @@ +Mesa 23.3.5 Release Notes / 2024-02-01 +====================================== + +Mesa 23.3.5 is a bug fix release which fixes bugs found since the 23.3.4 release. + +Mesa 23.3.5 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 23.3.5 implements the Vulkan 1.3 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- [radeonsi] Regression: graphical artifacting on water texture in OpenGOAL +- VAAPI: EFC on VCN2 produces broken H264 video and crashes the HEVC encoder + + +Changes +------- + +Antoine Coutant (1): + +- clc: retrieve libclang path at runtime. + +Daniel Schürmann (1): + +- aco/insert_exec_mask: Fix unconditional demote at top-level control flow. + +David Heidelberg (1): + +- ci/freedreno: timestamp-get no longer fails on Adreno + +Dmitry Baryshkov (1): + +- freedreno/drm: don't crash for unsupported devices + +Eric Engestrom (8): + +- docs: add sha256sum for 23.3.4 +- .pick_status.json: Update to b75ee1a0670a3207dfd99917e4f47d064a44197f +- .pick_status.json: Update to 4cd5b2b5426e8d670fc3657eee040a79e3f9df1e +- util: rename __check_suid() to __normal_user() +- tree-wide: use __normal_user() everywhere instead of writing the check manually +- util: simplify logic in __normal_user() +- util: check for setgid() as well in __normal_user() +- .pick_status.json: Mark 321e2cee5315e94c050f8659a8cd55e0e7cd9076 as denominated + +Faith Ekstrand (1): + +- nvk: Don't exnore ExternalImageFormatInfo + +Friedrich Vock (7): + +- util: Provide a secure_getenv fallback for platforms without it +- aux/trace: Guard triggers behind __normal_user +- mesa/main: Use secure_getenv for shader dumping +- radv: Use secure_getenv in radv_builtin_cache_path +- radv: Use secure_getenv for RADV_THREAD_TRACE_TRIGGER +- util/disk_cache: Use secure_getenv to determine cache directories +- vulkan: Use secure_getenv for trigger files + +Gert Wollny (5): + +- r600: lower dround_even also on hardware that supports fp64 +- virgl: Use better reporting for mirror_clamp features +- radv: Fix compilation with gcc-13 and tsan enabled +- nir/lower_int64: Fix compilation with gcc-13 and tsan enabled +- nir/builder: Fix compilation with gcc-13 when tsan is enabled + +Haihao Xiang (1): + +- anv: Fix typo in transition_color_buffer + +Hyunjun Ko (1): + +- anv/video: fix out-of-bounds read + +Iago Toral Quiroga (3): + +- broadcom/compiler: fix incorrect flags setup in non-uniform if path +- broadcom/compiler: fix incorrect flags update for subgroup elect +- broadcom/compiler: be more careful with unifa in non-uniform control flow + +Karol Herbst (1): + +- clc: force fPIC for every user when using shared LLVM + +Lionel Landwerlin (2): + +- anv: don't prevent L1 untyped cache flush in 3D mode +- anv: fix transfer barriers flushes with compute queue + +Louis-Francis Ratté-Boulianne (4): + +- panfrost: factor out method to check whether we can discard resource +- panfrost: add copy_resource flag to pan_resource_modifier_convert +- panfrost: add can_discard flag to pan_legalize_afbc_format +- panfrost: Legalize before updating part of a AFBC-packed texture + +Mike Blumenkrantz (3): + +- zink: set more dynamic states when using shader objects +- zink: always map descriptor buffers as COHERENT +- zink: fix descriptor buffer unmaps on screen destroy + +Pierre-Eric Pelloux-Prayer (1): + +- radeonsi: emit cache flushes before draw registers + +Rhys Perry (1): + +- aco: fix labelling of s_not with constant + +Rob Clark (3): + +- freedreno: De-duplicate 19.2MHz RBBM tick conversion +- freedreno: Fix timestamp conversion +- freedreno: Implement PIPE_CAP_TIMER_RESOLUTION + +Rohan Garg (1): + +- anv: untyped data port flush required when a pipeline sets the VK_ACCESS_2_SHADER_STORAGE_READ_BIT + +Sebastian Wick (1): + +- radeonsi: Destroy queues before the aux contexts + +Tapani Pälli (1): + +- anv: move \*bits_for_access_flags to genX_cmd_buffer + +Thong Thai (1): + +- radeonsi/vcn: remove EFC support for renoir
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/meson.build ^
@@ -796,6 +796,7 @@ endif add_languages('rust', required: true) + rustc = meson.get_compiler('rust') with_clc = true endif @@ -885,9 +886,12 @@ has_mako = run_command( prog_python, '-c', ''' -from distutils.version import StrictVersion +try: + from packaging.version import Version +except: + from distutils.version import StrictVersion as Version import mako -assert StrictVersion(mako.__version__) >= StrictVersion("0.8.0") +assert Version(mako.__version__) >= Version("0.8.0") ''', check: false) if has_mako.returncode() != 0 error('Python (3.x) mako module >= 0.8.0 required to build mesa.') @@ -1333,6 +1337,7 @@ 'getrandom': '', 'qsort_s': '', 'posix_fallocate': '', + 'secure_getenv': '', } foreach f, prefix: functions_to_detect
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-navi10-aco-fails.txt ^
@@ -1,3 +1,2 @@ # New CTS failures in 1.3.7.0 dEQP-VK.api.version_check.unavailable_entry_points,Fail -dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.partial_binding_depth_stencil,Fail
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-navi21-llvm-fails.txt ^
@@ -3,8 +3,6 @@ dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_frag,Fail dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_vert,Fail -dEQP-VK.draw.renderpass.shader_invocation.helper_invocation,Fail - dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r32g32b32a32_sfloat.samples_8,Fail dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r8g8b8a8_unorm.samples_8,Fail dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_4.r32g32b32a32_sfloat.samples_8,Fail
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-polaris10-aco-fails.txt ^
@@ -20,4 +20,3 @@ # New CTS failures in 1.3.7.0. dEQP-VK.api.version_check.unavailable_entry_points,Fail -dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.partial_binding_depth_stencil,Fail
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-renoir-aco-fails.txt ^
@@ -1,3 +1,2 @@ # New CTS failures in 1.3.7.0. dEQP-VK.api.version_check.unavailable_entry_points,Fail -dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.partial_binding_depth_stencil,Fail
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.c ^
@@ -1253,6 +1253,11 @@ info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND \|\| info->family == CHIP_TONGA; + /* GFX7 CP requires 32 bytes alignment for the indirect buffer arguments on + * the compute queue. + / + info->has_async_compute_align32_bug = info->gfx_level == GFX7; + / Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the * feature version wasn't bumped. */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.h ^
@@ -102,6 +102,7 @@ bool has_vgt_flush_ngg_legacy_bug; bool has_cs_regalloc_hang_bug; bool has_async_compute_threadgroup_bug; + bool has_async_compute_align32_bug; bool has_32bit_predication; bool has_3d_cube_border_color_mipmap; bool has_image_opcodes;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_rgp.c ^
@@ -79,9 +79,9 @@ struct sqtt_file_header_flags { union { struct { - int32_t is_semaphore_queue_timing_etw : 1; - int32_t no_queue_semaphore_timestamps : 1; - int32_t reserved : 30; + uint32_t is_semaphore_queue_timing_etw : 1; + uint32_t no_queue_semaphore_timestamps : 1; + uint32_t reserved : 30; }; uint32_t value;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_surface.c ^
@@ -1869,20 +1869,18 @@ util_next_power_of_two(LINEAR_PITCH_ALIGNMENT / surf->bpe); if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch && - surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) { - /* Adjust surf_pitch to be in elements units not in pixels / + surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR && + in->numMipLevels == 1) { + / Divide surf_pitch (= pitch in pixels) by blk_w to get a + * pitch in elements instead because that's what the hardware needs + * in resource descriptors. + * See the comment in si_descriptors.c. + / surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, linear_alignment); - surf->u.gfx9.epitch = - MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch surf->blk_w - 1); - /* The surface is really a surf->bpe bytes per pixel surface even if we - * use it as a surf->bpe bytes per element one. - * Adjust surf_slice_size and surf_size to reflect the change - * made to surf_pitch. - / - surf->u.gfx9.surf_slice_size = - MAX2(surf->u.gfx9.surf_slice_size, - (uint64_t)surf->u.gfx9.surf_pitch out.height * surf->bpe * surf->blk_w); + surf->u.gfx9.epitch = surf->u.gfx9.surf_pitch - 1; + /* Adjust surf_slice_size and surf_size to reflect the change made to surf_pitch. / + surf->u.gfx9.surf_slice_size = (uint64_t)surf->u.gfx9.surf_pitch out.height * surf->bpe; surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices; for (unsigned i = 0; i < in->numMipLevels; i++) {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_insert_exec_mask.cpp ^
@@ -555,33 +555,32 @@ (ctx.info[block->index].exec[0].second & mask_type_global)); int num; - Temp cond, exit_cond; - if (instr->operands[0].isConstant()) { + Operand src; + Temp exit_cond; + if (instr->operands[0].isConstant() && !(block->kind & block_kind_top_level)) { assert(instr->operands[0].constantValue() == -1u); /* transition to exact and set exec to zero / exit_cond = bld.tmp(s1); - cond = - bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)), - Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm)); + src = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)), + Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm)); num = ctx.info[block->index].exec.size() - 2; if (!(ctx.info[block->index].exec.back().second & mask_type_exact)) { - ctx.info[block->index].exec.back().first = Operand(cond); + ctx.info[block->index].exec.back().first = src; ctx.info[block->index].exec.emplace_back(Operand(bld.lm), mask_type_exact); } } else { / demote_if: transition to exact / if (block->kind & block_kind_top_level && ctx.info[block->index].exec.size() == 2 && ctx.info[block->index].exec.back().second & mask_type_global) { - / We don't need to actually copy anything into exact, since the s_andn2 + /* We don't need to actually copy anything into exec, since the s_andn2 * instructions later will do that. / ctx.info[block->index].exec.pop_back(); } else { transition_to_Exact(ctx, bld, block->index); } - assert(instr->operands[0].isTemp()); - cond = instr->operands[0].getTemp(); + src = instr->operands[0]; num = ctx.info[block->index].exec.size() - 1; } @@ -589,7 +588,7 @@ if (ctx.info[block->index].exec[i].second & mask_type_exact) { Instruction andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), - get_exec_op(ctx.info[block->index].exec[i].first), cond); + get_exec_op(ctx.info[block->index].exec[i].first), src); if (i == (int)ctx.info[block->index].exec.size() - 1) andn2->definitions[0] = Definition(exec, bld.lm);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_optimizer.cpp ^
@@ -1440,7 +1440,7 @@ instr->operands[i].setTemp(info.temp); } else if (info.is_neg() && can_use_mod && mod_bitsize_compat && can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) { - if (!instr->isDPP() && !instr->isSDWA()) + if (!instr->isDPP16() && can_use_VOP3(ctx, instr)) instr->format = asVOP3(instr->format); instr->operands[i].setTemp(info.temp); if (!instr->valu().abs[i]) @@ -1448,7 +1448,7 @@ } if (info.is_abs() && can_use_mod && mod_bitsize_compat && can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) { - if (!instr->isDPP() && !instr->isSDWA()) + if (!instr->isDPP16() && can_use_VOP3(ctx, instr)) instr->format = asVOP3(instr->format); instr->operands[i] = Operand(info.temp); instr->valu().abs[i] = true; @@ -2003,7 +2003,8 @@ break; case aco_opcode::s_not_b32: case aco_opcode::s_not_b64: - if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) { + if (!instr->operands[0].isTemp()) { + } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) { ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise(); ctx.info[instr->definitions[1].tempId()].set_scc_invert( ctx.info[instr->operands[0].tempId()].temp);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_scheduler.cpp ^
@@ -1087,6 +1087,9 @@ for (unsigned idx = 0; idx < block->instructions.size(); idx++) { Instruction* current = block->instructions[idx].get(); + if (current->opcode == aco_opcode::p_logical_end) + break; + if (block->kind & block_kind_export_end && current->isEXP() && ctx.schedule_pos_exports) { unsigned target = current->exp().dest; if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_spill.cpp ^
@@ -1938,7 +1938,7 @@ /* calculate extra VGPRs required for spilling SGPRs / if (demand.sgpr > sgpr_limit) { unsigned sgpr_spills = demand.sgpr - sgpr_limit; - extra_vgprs = DIV_ROUND_UP(sgpr_spills, program->wave_size) + 1; + extra_vgprs = DIV_ROUND_UP(sgpr_spills 2, program->wave_size) + 1; } /* add extra SGPRs required for spilling VGPRs / if (demand.vgpr + extra_vgprs > vgpr_limit) { @@ -1949,7 +1949,7 @@ if (demand.sgpr + extra_sgprs > sgpr_limit) { / re-calculate in case something has changed / unsigned sgpr_spills = demand.sgpr + extra_sgprs - sgpr_limit; - extra_vgprs = DIV_ROUND_UP(sgpr_spills, program->wave_size) + 1; + extra_vgprs = DIV_ROUND_UP(sgpr_spills 2, program->wave_size) + 1; } } /* the spiller has to target the following register demand */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/tests/glsl_scraper.py ^
@@ -28,16 +28,16 @@ } base_layout_qualifier_id_re = r'({0}\s=\s(?P<{0}>\d+))' -id_re = '(?P<name_%d>[^(gl_)]\w+)' -type_re = '(?P<dtype_%d>\w+)' +id_re = r'(?P<name_%d>[^(gl_)]\w+)' +type_re = r'(?P<dtype_%d>\w+)' location_re = base_layout_qualifier_id_re.format('location') component_re = base_layout_qualifier_id_re.format('component') binding_re = base_layout_qualifier_id_re.format('binding') set_re = base_layout_qualifier_id_re.format('set') unk_re = r'\w+(=\d+)?' layout_qualifier_re = r'layout\W$(%s)+$' % '\|'.join([location_re, binding_re, set_re, unk_re, '[, ]+']) -ubo_decl_re = 'uniform\W+%s(\W{)?(?P<type_ubo>)' % (id_re%0) -ssbo_decl_re = 'buffer\W+%s(\W{)?(?P<type_ssbo>)' % (id_re%1) +ubo_decl_re = r'uniform\W+%s(\W{)?(?P<type_ubo>)' % (id_re%0) +ssbo_decl_re = r'buffer\W+%s(\W*{)?(?P<type_ssbo>)' % (id_re%1) image_buffer_decl_re = r'uniform\W+imageBuffer\w+%s;(?P<type_img_buf>)' % (id_re%2) image_decl_re = r'uniform\W+image\w+\W+%s;(?P<type_img>)' % (id_re%3) texture_buffer_decl_re = r'uniform\W+textureBuffer\w+%s;(?P<type_tex_buf>)' % (id_re%4)
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/llvm/ac_llvm_build.c ^
@@ -2989,7 +2989,7 @@ return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } -static LLVMValueRef ac_build_wwm(struct ac_llvm_context ctx, LLVMValueRef src) +static LLVMValueRef ac_build_mode(struct ac_llvm_context ctx, LLVMValueRef src, const char mode) { LLVMTypeRef src_type = LLVMTypeOf(src); unsigned bitsize = ac_get_elem_bits(ctx, src_type); @@ -3002,7 +3002,7 @@ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); - snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type); + snprintf(name, sizeof(name), "llvm.amdgcn.%s.%s", mode, type); ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, 0); if (bitsize < 32) @@ -3011,6 +3011,16 @@ return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } +static LLVMValueRef ac_build_wwm(struct ac_llvm_context ctx, LLVMValueRef src) +{ + return ac_build_mode(ctx, src, "wwm"); +} + +LLVMValueRef ac_build_wqm(struct ac_llvm_context ctx, LLVMValueRef src) +{ + return ac_build_mode(ctx, src, "wqm"); +} + static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context ctx, LLVMValueRef src, LLVMValueRef inactive) {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/llvm/ac_llvm_build.h ^
@@ -469,6 +469,8 @@ LLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context ctx, LLVMValueRef mask, LLVMValueRef add_src); LLVMValueRef ac_build_mbcnt(struct ac_llvm_context ctx, LLVMValueRef mask); +LLVMValueRef ac_build_wqm(struct ac_llvm_context ctx, LLVMValueRef src); + LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context ctx, LLVMValueRef src, nir_op op); LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/llvm/ac_nir_to_llvm.c ^
@@ -3415,21 +3415,26 @@ case nir_intrinsic_quad_broadcast: { unsigned lane = nir_src_as_uint(instr->src[1]); result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane); + result = ac_build_wqm(&ctx->ac, result); break; } case nir_intrinsic_quad_swap_horizontal: result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2); + result = ac_build_wqm(&ctx->ac, result); break; case nir_intrinsic_quad_swap_vertical: result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1); + result = ac_build_wqm(&ctx->ac, result); break; case nir_intrinsic_quad_swap_diagonal: result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0); + result = ac_build_wqm(&ctx->ac, result); break; case nir_intrinsic_quad_swizzle_amd: { uint32_t mask = nir_intrinsic_swizzle_mask(instr); result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3, (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3); + result = ac_build_wqm(&ctx->ac, result); break; } case nir_intrinsic_masked_swizzle_amd: {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/build_helpers.h ^
@@ -156,6 +156,7 @@ #define VK_GEOMETRY_TYPE_TRIANGLES_KHR 0 #define VK_GEOMETRY_TYPE_AABBS_KHR 1 +#define VK_GEOMETRY_TYPE_INSTANCES_KHR 2 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR 1 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR 2
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/leaf.comp ^
@@ -333,6 +333,14 @@ is_active = build_instance(bounds, src_ptr, dst_ptr, global_id); } +#if ALWAYS_ACTIVE + if (!is_active && args.geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { + bounds.min = vec3(0.0); + bounds.max = vec3(0.0); + is_active = true; + } +#endif + DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : RADV_BVH_INVALID_NODE; uvec4 ballot = subgroupBallot(is_active);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/meson.build ^
@@ -53,7 +53,12 @@ [ 'leaf.comp', 'leaf', - [], + ['ALWAYS_ACTIVE=0'], + ], + [ + 'leaf.comp', + 'leaf_always_active', + ['ALWAYS_ACTIVE=1'], ], [ 'morton.comp',
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/ploc_internal.comp ^
@@ -249,7 +249,8 @@ total_bounds.min = vec3(INFINITY); total_bounds.max = vec3(-INFINITY); - for (uint32_t i = 0; i < DEREF(args.header).active_leaf_count; i++) { + uint32_t i = 0; + for (; i < DEREF(args.header).active_leaf_count; i++) { uint32_t child_id = DEREF(INDEX(key_id_pair, src_ids, i)).id; if (child_id != RADV_BVH_INVALID_NODE) { @@ -263,6 +264,8 @@ DEREF(dst_node).children[i] = child_id; } + for (; i < 2; i++) + DEREF(dst_node).children[i] = RADV_BVH_INVALID_NODE; DEREF(dst_node).base.aabb = total_bounds; DEREF(dst_node).bvh_offset = RADV_UNKNOWN_BVH_OFFSET;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/meta/radv_meta.c ^
@@ -296,7 +296,7 @@ static bool radv_builtin_cache_path(char path) { - char xdg_cache_home = getenv("XDG_CACHE_HOME"); + char xdg_cache_home = secure_getenv("XDG_CACHE_HOME"); const char suffix = "/radv_builtin_shaders"; const char suffix2 = "/.cache/radv_builtin_shaders"; struct passwd pwd, result;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/nir/radv_nir_lower_cooperative_matrix.c ^
@@ -181,7 +181,7 @@ nir_def elem = intr->src[1].ssa; nir_def r = nir_vector_insert(&b, src1, elem, index); - nir_store_deref(&b, dst_deref, r, 0xffff); + nir_store_deref(&b, dst_deref, r, nir_component_mask(r->num_components)); nir_instr_remove(instr); progress = true; break; @@ -193,7 +193,7 @@ nir_def r = nir_replicate(&b, elem, radv_nir_cmat_length(desc, wave_size)); - nir_store_deref(&b, dst_deref, r, 0xffff); + nir_store_deref(&b, dst_deref, r, nir_component_mask(r->num_components)); nir_instr_remove(instr); progress = true; break; @@ -253,7 +253,7 @@ } nir_def mat = nir_vec(&b, vars, length); - nir_store_deref(&b, dst_deref, mat, 0xffff); + nir_store_deref(&b, dst_deref, mat, nir_component_mask(mat->num_components)); nir_instr_remove(instr); progress = true; break; @@ -332,7 +332,8 @@ ret = nir_cmat_muladd_amd(&b, A, B, C, .saturate = nir_intrinsic_saturate(intr), .cmat_signed_mask = nir_intrinsic_cmat_signed_mask(intr)); - nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, 0xffff); + nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, + nir_component_mask(ret->num_components)); nir_instr_remove(instr); progress = true; break; @@ -366,7 +367,7 @@ ret = nir_vec(&b, components, ret->num_components * 2); } - nir_store_deref(&b, dst_deref, ret, 0xffff); + nir_store_deref(&b, dst_deref, ret, nir_component_mask(ret->num_components)); nir_instr_remove(instr); progress = true; break; @@ -375,7 +376,8 @@ nir_def src1 = radv_nir_load_cmat(&b, wave_size, intr->src[1].ssa); nir_op op = nir_intrinsic_alu_op(intr); nir_def ret = nir_build_alu2(&b, op, src1, intr->src[2].ssa); - nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, 0xffff); + nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, + nir_component_mask(ret->num_components)); nir_instr_remove(instr); progress = true; break; @@ -385,14 +387,16 @@ nir_def src2 = radv_nir_load_cmat(&b, wave_size, intr->src[2].ssa); nir_op op = nir_intrinsic_alu_op(intr); nir_def ret = nir_build_alu2(&b, op, src1, src2); - nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, 0xffff); + nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, + nir_component_mask(ret->num_components)); nir_instr_remove(instr); progress = true; break; } case nir_intrinsic_cmat_bitcast: { nir_def *src1 = radv_nir_load_cmat(&b, wave_size, intr->src[1].ssa); - nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), src1, 0xffff); + nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), src1, + nir_component_mask(src1->num_components)); nir_instr_remove(instr); progress = true; break;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_acceleration_structure.c ^
@@ -41,6 +41,10 @@ #include "bvh/leaf.spv.h" }; +static const uint32_t leaf_always_active_spv[] = { +#include "bvh/leaf_always_active.spv.h" +}; + static const uint32_t morton_spv[] = { #include "bvh/morton.spv.h" }; @@ -513,9 +517,14 @@ if (device->meta_state.accel_struct_build.radix_sort) goto exit; - result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args), - &device->meta_state.accel_struct_build.leaf_pipeline, - &device->meta_state.accel_struct_build.leaf_p_layout); + if (device->instance->force_active_accel_struct_leaves) + result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv), + sizeof(struct leaf_args), &device->meta_state.accel_struct_build.leaf_pipeline, + &device->meta_state.accel_struct_build.leaf_p_layout); + else + result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args), + &device->meta_state.accel_struct_build.leaf_pipeline, + &device->meta_state.accel_struct_build.leaf_p_layout); if (result != VK_SUCCESS) goto exit;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_cmd_buffer.c ^
@@ -2271,17 +2271,19 @@ static void radv_emit_depth_control(struct radv_cmd_buffer cmd_buffer) { + const struct radv_rendering_state render = &cmd_buffer->state.render; struct radv_dynamic_state d = &cmd_buffer->state.dynamic; + const bool stencil_test_enable = + d->vk.ds.stencil.test_enable && (render->ds_att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT); - radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, - S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) \| - S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) \| - S_028800_ZFUNC(d->vk.ds.depth.compare_op) \| - S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) \| - S_028800_STENCIL_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) \| - S_028800_BACKFACE_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) \| - S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) \| - S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare)); + radeon_set_context_reg( + cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, + S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) \| + S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) \| S_028800_ZFUNC(d->vk.ds.depth.compare_op) \| + S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) \| + S_028800_STENCIL_ENABLE(stencil_test_enable) \| S_028800_BACKFACE_ENABLE(stencil_test_enable) \| + S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) \| + S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare)); } static void @@ -5861,6 +5863,11 @@ render->ds_att.format = inheritance_info->depthAttachmentFormat; if (inheritance_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) render->ds_att.format = inheritance_info->stencilAttachmentFormat; + + if (vk_format_has_depth(render->ds_att.format)) + render->ds_att_aspects \|= VK_IMAGE_ASPECT_DEPTH_BIT; + if (vk_format_has_stencil(render->ds_att.format)) + render->ds_att_aspects \|= VK_IMAGE_ASPECT_STENCIL_BIT; } cmd_buffer->state.inherited_pipeline_statistics = pBeginInfo->pInheritanceInfo->pipelineStatistics; @@ -7716,6 +7723,7 @@ } struct radv_attachment ds_att = {.iview = NULL}; + VkImageAspectFlags ds_att_aspects = 0; const VkRenderingAttachmentInfo d_att_info = pRenderingInfo->pDepthAttachment; const VkRenderingAttachmentInfo s_att_info = pRenderingInfo->pStencilAttachment; if ((d_att_info != NULL && d_att_info->imageView != VK_NULL_HANDLE) \|\| @@ -7751,7 +7759,16 @@ assert(d_iview == NULL \|\| s_iview == NULL \|\| d_iview == s_iview); ds_att.iview = d_iview ? d_iview : s_iview, ds_att.format = ds_att.iview->vk.format; - radv_initialise_ds_surface(cmd_buffer->device, &ds_att.ds, ds_att.iview); + + if (d_iview && s_iview) { + ds_att_aspects = VK_IMAGE_ASPECT_DEPTH_BIT \| VK_IMAGE_ASPECT_STENCIL_BIT; + } else if (d_iview) { + ds_att_aspects = VK_IMAGE_ASPECT_DEPTH_BIT; + } else { + ds_att_aspects = VK_IMAGE_ASPECT_STENCIL_BIT; + } + + radv_initialise_ds_surface(cmd_buffer->device, &ds_att.ds, ds_att.iview, ds_att_aspects); assert(d_res_iview == NULL \|\| s_res_iview == NULL \|\| d_res_iview == s_res_iview); ds_att.resolve_iview = d_res_iview ? d_res_iview : s_res_iview; @@ -7800,6 +7817,7 @@ render->color_att_count = pRenderingInfo->colorAttachmentCount; typed_memcpy(render->color_att, color_att, render->color_att_count); render->ds_att = ds_att; + render->ds_att_aspects = ds_att_aspects; render->vrs_att = vrs_att; render->vrs_texel_size = vrs_texel_size; cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_FRAMEBUFFER; @@ -7807,7 +7825,7 @@ if (cmd_buffer->device->physical_device->rad_info.rbplus_allowed) cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_RBPLUS; - cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; + cmd_buffer->state.dirty \|= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS \| RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE; if (render->vrs_att.iview && cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3) { if (render->ds_att.iview) { @@ -8068,18 +8086,15 @@ uint32_t draw_id_enable = !!cmd_buffer->state.uses_drawid; uint32_t mode1_enable = !cmd_buffer->device->mesh_fast_launch_2; - const bool sqtt_en = !!cmd_buffer->device->sqtt.bo; radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) \| PKT3_RESET_FILTER_CAM_S(1)); radeon_emit(cs, 0); / data_offset / radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) \| S_4C1_DRAW_INDEX_REG(draw_id_reg)); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) \| S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) \| - S_4C2_XYZ_DIM_ENABLE(xyz_dim_enable) \| S_4C2_MODE1_ENABLE(mode1_enable) \| - S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en)); + S_4C2_XYZ_DIM_ENABLE(xyz_dim_enable) \| S_4C2_MODE1_ENABLE(mode1_enable)); else - radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) \| S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) \| - S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en)); + radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) \| S_4C2_COUNT_INDIRECT_ENABLE(!!count_va)); radeon_emit(cs, draw_count); radeon_emit(cs, count_va & 0xFFFFFFFF); radeon_emit(cs, count_va >> 32); @@ -9693,11 +9708,39 @@ } if (radv_cmd_buffer_uses_mec(cmd_buffer)) { + uint64_t indirect_va = info->va; + radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted, 4 / DISPATCH_INDIRECT size /); + + if (cmd_buffer->device->physical_device->rad_info.has_async_compute_align32_bug && + cmd_buffer->qf == RADV_QUEUE_COMPUTE && !radv_is_aligned(indirect_va, 32)) { + const uint64_t unaligned_va = indirect_va; + UNUSED void ptr; + uint32_t offset; + + if (!radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, sizeof(VkDispatchIndirectCommand), 32, &offset, &ptr)) + return; + + indirect_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; + + for (uint32_t i = 0; i < 3; i++) { + const uint64_t src_va = unaligned_va + i * 4; + const uint64_t dst_va = indirect_va + i * 4; + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) \| COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) \| + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, src_va); + radeon_emit(cs, src_va >> 32); + radeon_emit(cs, dst_va); + radeon_emit(cs, dst_va >> 32); + } + } + radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) \| PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, info->va); - radeon_emit(cs, info->va >> 32); + radeon_emit(cs, indirect_va); + radeon_emit(cs, indirect_va >> 32); radeon_emit(cs, dispatch_initiator); } else { radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) \| PKT3_SHADER_TYPE_S(1)); @@ -10592,7 +10635,10 @@ } radv_gang_barrier(cmd_buffer, 0, dst_stage_mask); - radv_cp_dma_wait_for_stages(cmd_buffer, src_stage_mask); + + const bool is_gfx_or_ace = cmd_buffer->qf == RADV_QUEUE_GENERAL \|\| cmd_buffer->qf == RADV_QUEUE_COMPUTE; + if (is_gfx_or_ace) + radv_cp_dma_wait_for_stages(cmd_buffer, src_stage_mask); cmd_buffer->state.flush_bits \|= dst_flush_bits;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_device.c ^
@@ -1842,7 +1842,7 @@ void radv_initialise_ds_surface(const struct radv_device device, struct radv_ds_buffer_info ds, - struct radv_image_view iview) + struct radv_image_view iview, VkImageAspectFlags ds_aspects) { unsigned level = iview->vk.base_mip_level; unsigned format, stencil_format; @@ -1859,7 +1859,9 @@ stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; - ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) \| S_028008_SLICE_MAX(max_slice); + ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) \| S_028008_SLICE_MAX(max_slice) \| + S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) \| + S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)); if (device->physical_device->rad_info.gfx_level >= GFX10) { ds->db_depth_view \|= S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) \| S_028008_SLICE_MAX_HI(max_slice >> 11);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_image.c ^
@@ -530,7 +530,7 @@ image_info->surf_index = NULL; } - if (create_info->prime_blit_src && device->physical_device->rad_info.gfx_level == GFX9) { + if (create_info->prime_blit_src && !device->physical_device->rad_info.sdma_supports_compression) { /* Older SDMA hw can't handle DCC */ image->planes[plane].surface.flags \|= RADEON_SURF_DISABLE_DCC; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_instance.c ^
@@ -153,6 +153,8 @@ DRI_CONF_RADV_FLUSH_BEFORE_TIMESTAMP_WRITE(false) DRI_CONF_RADV_RT_WAVE64(false) DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false) + DRI_CONF_RADV_SSBO_NON_UNIFORM(false) + DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(false) DRI_CONF_RADV_APP_LAYER() DRI_CONF_SECTION_END }; @@ -203,6 +205,8 @@ instance->tex_non_uniform = driQueryOptionb(&instance->dri_options, "radv_tex_non_uniform"); + instance->ssbo_non_uniform = driQueryOptionb(&instance->dri_options, "radv_ssbo_non_uniform"); + instance->app_layer = driQueryOptionstr(&instance->dri_options, "radv_app_layer"); instance->flush_before_timestamp_write = @@ -211,6 +215,9 @@ instance->force_rt_wave64 = driQueryOptionb(&instance->dri_options, "radv_rt_wave64"); instance->dual_color_blend_by_location = driQueryOptionb(&instance->dri_options, "dual_color_blend_by_location"); + + instance->force_active_accel_struct_leaves = + driQueryOptionb(&instance->dri_options, "radv_force_active_accel_struct_leaves"); } static const struct vk_instance_extension_table radv_instance_extensions_supported = { @@ -253,7 +260,7 @@ static void radv_handle_legacy_sqtt_trigger(struct vk_instance instance) { - char trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER"); + char *trigger_file = secure_getenv("RADV_THREAD_TRACE_TRIGGER"); if (trigger_file) { instance->trace_trigger_file = trigger_file; instance->trace_mode \|= RADV_TRACE_MODE_RGP;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_pipeline.c ^
@@ -162,6 +162,7 @@ key.image_2d_view_of_3d = device->image_2d_view_of_3d && device->physical_device->rad_info.gfx_level == GFX9; key.tex_non_uniform = device->instance->tex_non_uniform; + key.ssbo_non_uniform = device->instance->ssbo_non_uniform; for (unsigned i = 0; i < num_stages; ++i) { const VkPipelineShaderStageCreateInfo *const stage = &stages[i];
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_pipeline_rt.c ^
@@ -414,6 +414,10 @@ temp_stage.nir = shaders[i]; radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device, pipeline, monolithic); + + /* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */ + nir_shader_gather_info(temp_stage.nir, nir_shader_get_entrypoint(temp_stage.nir)); + radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled); radv_postprocess_nir(device, pipeline_key, &temp_stage); @@ -609,6 +613,7 @@ radv_shader_layout_init(pipeline_layout, MESA_SHADER_INTERSECTION, &traversal_stage.layout); result = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, false, &traversal_stage, NULL, NULL, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]); + ralloc_free(traversal_module.nir); cleanup: for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_private.h ^
@@ -416,9 +416,11 @@ bool flush_before_query_copy; bool enable_unified_heap_on_apu; bool tex_non_uniform; + bool ssbo_non_uniform; bool flush_before_timestamp_write; bool force_rt_wave64; bool dual_color_blend_by_location; + bool force_active_accel_struct_leaves; char app_layer; }; @@ -1526,7 +1528,7 @@ void radv_initialise_color_surface(struct radv_device device, struct radv_color_buffer_info cb, struct radv_image_view iview); void radv_initialise_ds_surface(const struct radv_device device, struct radv_ds_buffer_info ds, - struct radv_image_view iview); + struct radv_image_view iview, VkImageAspectFlags ds_aspects); void radv_initialise_vrs_surface(struct radv_image image, struct radv_buffer htile_buffer, struct radv_ds_buffer_info *ds); @@ -1568,6 +1570,7 @@ uint32_t color_att_count; struct radv_attachment color_att[MAX_RTS]; struct radv_attachment ds_att; + VkImageAspectFlags ds_att_aspects; struct radv_attachment vrs_att; VkExtent2D vrs_texel_size; };
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_queue.c ^
@@ -1641,8 +1641,11 @@ } queue->device->ws->cs_unchain(cmd_buffer->cs); - if (!chainable \|\| !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) - cs_array[num_submitted_cs++] = cmd_buffer->cs; + if (!chainable \|\| !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) { + /* don't submit empty command buffers to the kernel. */ + if (radv_queue_ring(queue) != AMD_IP_VCN_ENC \|\| cmd_buffer->cs->cdw != 0) + cs_array[num_submitted_cs++] = cmd_buffer->cs; + } chainable = can_chain_next ? cmd_buffer->cs : NULL; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_sampler.c ^
@@ -47,6 +47,7 @@ unreachable("illegal tex wrap mode"); break; } + return 0; } static unsigned @@ -73,6 +74,7 @@ unreachable("illegal compare mode"); break; } + return 0; } static unsigned
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.c ^
@@ -93,7 +93,6 @@ .lower_ffma64 = split_fma, .lower_fpow = true, .lower_mul_2x32_64 = true, - .lower_rotate = true, .lower_iadd_sat = device->rad_info.gfx_level <= GFX8, .lower_hadd = true, .lower_mul_32x16 = true, @@ -461,6 +460,7 @@ .private_data = &spirv_debug_data, }, .force_tex_non_uniform = key->tex_non_uniform, + .force_ssbo_non_uniform = key->ssbo_non_uniform, }; nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint, &spirv_options, &device->physical_device->nir_options[stage->stage]); @@ -1328,14 +1328,14 @@ if (!hole->freelist.prev) continue; - if (hole->offset + hole->size < src->offset) - continue; - uint32_t hole_begin = hole->offset; uint32_t hole_end = hole->offset + hole->size; + if (hole_end < block_end) + continue; + /* If another allocated block overlaps the current replay block, allocation is impossible / - if (block_begin > hole_begin \|\| (hole_end < block_end && hole_end >= block_begin)) + if (hole_begin > block_begin) return NULL; union radv_shader_arena_block block = insert_block(device, hole, block_begin - hole_begin, src->size, NULL);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.h ^
@@ -95,6 +95,7 @@ uint32_t dynamic_provoking_vtx_mode : 1; uint32_t dynamic_line_rast_mode : 1; uint32_t tex_non_uniform : 1; + uint32_t ssbo_non_uniform : 1; uint32_t enable_remove_point_size : 1; uint32_t unknown_rast_prim : 1; uint32_t mesh_shader_queries : 1;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_video.c ^
@@ -112,6 +112,21 @@ sq->ib_checksum = checksum; } +static void +radv_vcn_sq_start(struct radv_cmd_buffer cmd_buffer) +{ + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256); + radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false); + rvcn_decode_ib_package_t ib_header = (rvcn_decode_ib_package_t )&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s); + cmd_buffer->cs->cdw++; + ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER); + cmd_buffer->cs->cdw++; + cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t )&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4; + memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); +} + / generate an stream handle / static unsigned si_vid_alloc_stream_handle(struct radv_physical_device pdevice) @@ -1668,19 +1683,6 @@ cmd_buffer->video.vid = vid; cmd_buffer->video.params = params; - - if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) { - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256); - radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false); - rvcn_decode_ib_package_t ib_header = (rvcn_decode_ib_package_t )&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); - ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s); - cmd_buffer->cs->cdw++; - ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER); - cmd_buffer->cs->cdw++; - cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t )&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); - cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4; - memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); - } } static void @@ -1693,6 +1695,9 @@ uint32_t out_offset; radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); + if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) + radv_vcn_sq_start(cmd_buffer); + rvcn_dec_message_create(vid, ptr, size); send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset); send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset); @@ -1702,7 +1707,8 @@ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8); for (unsigned i = 0; i < 8; i++) radeon_emit(cmd_buffer->cs, 0x81ff); - } + } else + radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); } static void @@ -1739,12 +1745,6 @@ VKAPI_ATTR void VKAPI_CALL radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR pEndCodingInfo) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - - if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) - return; - - radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); } static void @@ -1840,6 +1840,9 @@ radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); msg_bo = cmd_buffer->upload.upload_bo; + if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) + radv_vcn_sq_start(cmd_buffer); + uint32_t slice_offset; rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_ptr, &slice_offset, frame_info); rvcn_dec_message_feedback(fb_ptr); @@ -1869,7 +1872,8 @@ if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2); set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1); - } + } else + radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); } VKAPI_ATTR void VKAPI_CALL
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/si_cmd_buffer.c ^
@@ -1614,12 +1614,6 @@ /* Sync flags. */ if (flags & CP_DMA_SYNC) header \|= S_411_CP_SYNC(1); - else { - if (device->physical_device->rad_info.gfx_level >= GFX9) - command \|= S_415_DISABLE_WR_CONFIRM_GFX9(1); - else - command \|= S_415_DISABLE_WR_CONFIRM_GFX6(1); - } if (flags & CP_DMA_RAW_WAIT) command \|= S_415_RAW_WAIT(1);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/asahi/compiler/agx_compile.h ^
@@ -275,7 +275,6 @@ .lower_hadd = true, .vectorize_io = true, .use_interpolated_input_intrinsics = true, - .lower_rotate = true, .has_isub = true, .support_16bit_alu = true, .max_unroll_iterations = 32,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/broadcom/compiler/nir_to_vir.c ^
@@ -3047,6 +3047,46 @@ c->current_unifa_offset += 4; } +/* Checks if the value of a nir src is derived from a nir register / +static bool +nir_src_derived_from_reg(nir_src src) +{ + nir_def def = src.ssa; + if (nir_load_reg_for_def(def)) + return true; + + nir_instr parent = def->parent_instr; + switch (parent->type) { + case nir_instr_type_alu: { + nir_alu_instr alu = nir_instr_as_alu(parent); + int num_srcs = nir_op_infos[alu->op].num_inputs; + for (int i = 0; i < num_srcs; i++) { + if (nir_src_derived_from_reg(alu->src[i].src)) + return true; + } + return false; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr intr = nir_instr_as_intrinsic(parent); + int num_srcs = nir_intrinsic_infos[intr->intrinsic].num_srcs; + for (int i = 0; i < num_srcs; i++) { + if (nir_src_derived_from_reg(intr->src[i])) + return true; + } + return false; + } + case nir_instr_type_load_const: + case nir_instr_type_undef: + return false; + default: + / By default we assume it may come from a register, the above + * cases should be able to handle the majority of situations + * though. + / + return true; + }; +} + static bool ntq_emit_load_unifa(struct v3d_compile c, nir_intrinsic_instr instr) { @@ -3069,6 +3109,24 @@ if (nir_src_is_divergent(offset)) return false; + / Emitting loads from unifa may not be safe under non-uniform control + * flow. It seems the address that is used to write to the unifa + * register is taken from the first lane and if that lane is disabled + * by control flow then the value we read may be bogus and lead to + * invalid memory accesses on follow-up ldunifa instructions. However, + * ntq_store_def only emits conditional writes for nir registersas long + * we can be certain that the offset isn't derived from a load_reg we + * should be fine. + * + * The following CTS test can be used to trigger the problem, which + * causes a GMP violations in the sim without this check: + * dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int + / + if (vir_in_nonuniform_control_flow(c) && + nir_src_derived_from_reg(offset)) { + return false; + } + / We can only use unifa with SSBOs if they are read-only. Otherwise * ldunifa won't see the shader writes to that address (possibly * because ldunifa doesn't read from the L2T cache). @@ -3243,34 +3301,6 @@ vir_uniform_ui(c, 32 - c->local_invocation_index_bits)); } -/* Various subgroup operations rely on the A flags, so this helper ensures that - * A flags represents currently active lanes in the subgroup. - / -static void -set_a_flags_for_subgroup(struct v3d_compile c) -{ - /* MSF returns 0 for disabled lanes in compute shaders so - * PUSHZ will set A=1 for disabled lanes. We want the inverse - * of this but we don't have any means to negate the A flags - * directly, but we can do it by repeating the same operation - * with NORZ (A = ~A & ~Z). - / - assert(c->s->info.stage == MESA_SHADER_COMPUTE); - vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ); - vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ); - - / If we are under non-uniform control flow we also need to - * AND the A flags with the current execute mask. - / - if (vir_in_nonuniform_control_flow(c)) { - const uint32_t bidx = c->cur_block->index; - vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(), - c->execute, - vir_uniform_ui(c, bidx)), - V3D_QPU_UF_ANDZ); - } -} - static void ntq_emit_intrinsic(struct v3d_compile c, nir_intrinsic_instr instr) { @@ -3752,10 +3782,23 @@ break; case nir_intrinsic_elect: { - set_a_flags_for_subgroup(c); - struct qreg first = vir_FLAFIRST(c); + struct qreg first; + if (vir_in_nonuniform_control_flow(c)) { + / Sets A=1 for lanes enabled in the execution mask / + vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); + / Updates A ANDing with lanes enabled in MSF / + vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), + V3D_QPU_UF_ANDNZ); + first = vir_FLAFIRST(c); + } else { + / Sets A=1 for inactive lanes / + vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), + V3D_QPU_PF_PUSHZ); + first = vir_FLNAFIRST(c); + } - / Produce a boolean result from Flafirst / + / Produce a boolean result / vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(), first, vir_uniform_ui(c, 1)), V3D_QPU_PF_PUSHZ); @@ -3972,19 +4015,27 @@ c->execute, vir_uniform_ui(c, else_block->index)); + / Set the flags for taking the THEN block / + vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); + / Jump to ELSE if nothing is active for THEN (unless THEN block is * so small it won't pay off), otherwise fall through. / bool is_cheap = exec_list_is_singular(&if_stmt->then_list) && is_cheap_block(nir_if_first_then_block(if_stmt)); if (!is_cheap) { - vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ); vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLNA); vir_link_blocks(c->cur_block, else_block); } vir_link_blocks(c->cur_block, then_block); - / Process the THEN block. / + / Process the THEN block. + * + * Notice we don't call ntq_activate_execute_for_block here on purpose: + * c->execute is already set up to be 0 for lanes that must take the + * THEN block. + */ vir_set_emit_block(c, then_block); ntq_emit_cf_list(c, &if_stmt->then_list);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/broadcom/vulkan/v3dv_pipeline.c ^
@@ -228,7 +228,6 @@ .lower_ldexp = true, .lower_mul_high = true, .lower_wpos_pntc = false, - .lower_rotate = true, .lower_to_scalar = true, .lower_device_index_to_zero = true, .lower_fquantize2f16 = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/clc/clc_helpers.cpp ^
@@ -23,6 +23,7 @@ // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. +#include <cstdlib> #include <filesystem> #include <sstream> #include <mutex> @@ -57,6 +58,10 @@ #include "spirv.h" +#if DETECT_OS_UNIX +#include <dlfcn.h> +#endif + #ifdef USE_STATIC_OPENCL_C_H #if LLVM_VERSION_MAJOR < 15 #include "opencl-c.h.h" @@ -876,12 +881,24 @@ #endif } #else + + Dl_info info; + if (dladdr((void )clang::CompilerInvocation::CreateFromArgs, &info) == 0) { + clc_error(logger, "Couldn't find libclang path.\n"); + return {}; + } + + char clang_path = realpath(info.dli_fname, NULL); + if (clang_path == nullptr) { + clc_error(logger, "Couldn't find libclang path.\n"); + return {}; + } + // GetResourcePath is a way to retrive the actual libclang resource dir based on a given binary - // or library. The path doesn't even need to exist, we just have to put something in there, - // because we might have linked clang statically. - auto libclang_path = fs::path(LLVM_LIB_DIR) / "libclang.so"; + // or library. auto clang_res_path = - fs::path(Driver::GetResourcesPath(libclang_path.string(), CLANG_RESOURCE_DIR)) / "include"; + fs::path(Driver::GetResourcesPath(std::string(clang_path), CLANG_RESOURCE_DIR)) / "include"; + free(clang_path); c->getHeaderSearchOpts().UseBuiltinIncludes = true; c->getHeaderSearchOpts().UseStandardSystemIncludes = true;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/clc/meson.build ^
@@ -125,7 +125,13 @@ idep_mesautil, dep_spirv_tools] ) +_idep_mesaclc_link_args = [] +if _shared_llvm + _idep_mesaclc_link_args += cc.get_supported_link_arguments('-fPIC') +endif + idep_mesaclc = declare_dependency( link_with : _libmesaclc, include_directories : include_directories('.'), + link_args : _idep_mesaclc_link_args, )
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/glsl/gl_nir_link_varyings.c ^
@@ -746,7 +746,7 @@ if (!validate_explicit_variable_location(consts, output_explicit_locations, var, prog, producer)) { - return; + goto out; } } } @@ -800,7 +800,7 @@ if (!validate_explicit_variable_location(consts, input_explicit_locations, input, prog, consumer)) { - return; + goto out; } while (idx < slot_limit) { @@ -808,7 +808,7 @@ linker_error(prog, "Invalid location %u in %s shader\n", idx, _mesa_shader_stage_to_string(consumer->Stage)); - return; + goto out; } output = output_explicit_locations[idx][input->data.location_frac].var; @@ -871,6 +871,7 @@ } } + out: _mesa_symbol_table_dtor(table); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir.h ^
@@ -1655,6 +1655,12 @@ nir_def def; } nir_deref_instr; +/** + * Returns true if the cast is trivial, i.e. the source and destination type is + * the same. + / +bool nir_deref_cast_is_trivial(nir_deref_instr cast); + /** Returns true if deref might have one of the given modes * * For multi-mode derefs, this returns true if any of the possible modes for @@ -3789,8 +3795,10 @@ /* Lowers when 32x32->64 bit multiplication is not supported / bool lower_mul_2x32_64; - / Lowers when rotate instruction is not supported / - bool lower_rotate; + / Indicates that urol and uror are supported / + bool has_rotate8; + bool has_rotate16; + bool has_rotate32; /* Backend supports ternary addition */ bool has_iadd3;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_builder.h ^
@@ -1594,6 +1594,7 @@ default: unreachable("Invalid deref instruction type"); } + return NULL; } static inline nir_def *
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_clone.c ^
@@ -687,6 +687,32 @@ return nfxn; } +static u_printf_info * +clone_printf_info(void mem_ctx, const nir_shader s) +{ + u_printf_info infos = ralloc_array(mem_ctx, u_printf_info, s->printf_info_count); + + for (unsigned i = 0; i < s->printf_info_count; i++) { + const u_printf_info src_info = &s->printf_info[i]; + + infos[i].num_args = src_info->num_args; + infos[i].arg_sizes = ralloc_size(mem_ctx, + sizeof(infos[i].arg_sizes[0]) * + src_info->num_args); + memcpy(infos[i].arg_sizes, src_info->arg_sizes, + sizeof(infos[i].arg_sizes[0]) * src_info->num_args); + + + infos[i].string_size = src_info->string_size; + infos[i].strings = ralloc_size(mem_ctx, + src_info->string_size); + memcpy(infos[i].strings, src_info->strings, + src_info->string_size); + } + + return infos; +} + nir_shader * nir_shader_clone(void mem_ctx, const nir_shader s) { @@ -734,6 +760,11 @@ memcpy(ns->xfb_info, s->xfb_info, size); } + if (s->printf_info_count > 0) { + ns->printf_info = clone_printf_info(ns, s); + ns->printf_info_count = s->printf_info_count; + } + free_clone_state(&state); return ns;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_deref.c ^
@@ -26,9 +26,11 @@ #include "nir.h" #include "nir_builder.h" -static bool -is_trivial_deref_cast(nir_deref_instr cast) +bool +nir_deref_cast_is_trivial(nir_deref_instr cast) { + assert(cast->deref_type == nir_deref_type_cast); + nir_deref_instr parent = nir_src_as_deref(cast->parent); if (!parent) return false; @@ -57,7 +59,7 @@ tail = NULL; for (nir_deref_instr d = deref; d; d = nir_deref_instr_parent(d)) { - if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d)) + if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d)) continue; count++; if (count <= max_short_path_len) @@ -80,7 +82,7 @@ head = tail = path->path + count; tail = NULL; for (nir_deref_instr d = deref; d; d = nir_deref_instr_parent(d)) { - if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d)) + if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d)) continue; (--head) = d; } @@ -943,7 +945,7 @@ static bool is_trivial_array_deref_cast(nir_deref_instr cast) { - assert(is_trivial_deref_cast(cast)); + assert(nir_deref_cast_is_trivial(cast)); nir_deref_instr parent = nir_src_as_deref(cast->parent); @@ -1187,7 +1189,7 @@ return true; progress \|= opt_remove_cast_cast(cast); - if (!is_trivial_deref_cast(cast)) + if (!nir_deref_cast_is_trivial(cast)) return progress; /* If this deref still contains useful alignment information, we don't want @@ -1239,7 +1241,7 @@ */ if (parent->deref_type == nir_deref_type_cast && parent->cast.align_mul == 0 && - is_trivial_deref_cast(parent)) + nir_deref_cast_is_trivial(parent)) parent = nir_deref_instr_parent(parent); nir_def_rewrite_uses(&deref->def, &parent->def);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_divergence_analysis.c ^
@@ -616,6 +616,7 @@ case nir_intrinsic_isberd_nv: case nir_intrinsic_al2p_nv: case nir_intrinsic_ald_nv: + case nir_intrinsic_printf: is_divergent = true; break;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_instr_set.c ^
@@ -441,7 +441,7 @@ } else { alu1_actual_src = alu1->src[src1].src; - for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) + for (unsigned i = 0; i < nir_src_num_components(alu1_actual_src); i++) alu1_swizzle[i] = i; } @@ -458,7 +458,7 @@ } else { alu2_actual_src = alu2->src[src2].src; - for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu2, src2); i++) + for (unsigned i = 0; i < nir_src_num_components(alu2_actual_src); i++) alu2_swizzle[i] = i; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_loop_analyze.c ^
@@ -858,6 +858,7 @@ unsigned execution_mode) { nir_const_value span, iter; + unsigned iter_bit_size = bit_size; switch (invert_comparison_if_needed(cond_op, invert_cond)) { case nir_op_ine: @@ -911,13 +912,14 @@ iter = eval_const_binop(nir_op_fdiv, bit_size, span, step, execution_mode); iter = eval_const_unop(nir_op_f2i64, bit_size, iter, execution_mode); + iter_bit_size = 64; break; default: return -1; } - uint64_t iter_u64 = nir_const_value_as_uint(iter, bit_size); + uint64_t iter_u64 = nir_const_value_as_uint(iter, iter_bit_size); return iter_u64 > INT_MAX ? -1 : (int)iter_u64; } @@ -1138,11 +1140,13 @@ */ for (int bias = -1; bias <= 1; bias++) { const int iter_bias = iter_int + bias; + if (iter_bias < 1) + continue; if (test_iterations(iter_bias, step, limit, alu_op, bit_size, induction_base_type, initial, limit_rhs, invert_cond, execution_mode)) { - return iter_bias > 0 ? iter_bias - trip_offset : iter_bias; + return iter_bias - trip_offset; } }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_int64.c ^
@@ -1344,6 +1344,7 @@ default: unreachable("Unsupported intrinsic"); } + return NULL; } static bool
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_io.c ^
@@ -1570,7 +1570,8 @@ nir_def zero = nir_imm_zero(b, load->num_components, bit_size); / TODO: Better handle block_intel. / - const unsigned load_size = (bit_size / 8) load->num_components; + assert(load->num_components == 1); + const unsigned load_size = bit_size / 8; nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); nir_builder_instr_insert(b, &load->instr); @@ -1755,7 +1756,8 @@ if (addr_format_needs_bounds_check(addr_format)) { /* TODO: Better handle block_intel. / - const unsigned store_size = (value->bit_size / 8) store->num_components; + assert(store->num_components == 1); + const unsigned store_size = value->bit_size / 8; nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); nir_builder_instr_insert(b, &store->instr); @@ -1948,8 +1950,12 @@ nir_deref_instr deref = nir_src_as_deref(intrin->src[0]); unsigned vec_stride = glsl_get_explicit_stride(deref->type); unsigned scalar_size = type_scalar_size_bytes(deref->type); - assert(vec_stride == 0 \|\| glsl_type_is_vector(deref->type)); - assert(vec_stride == 0 \|\| vec_stride >= scalar_size); + if (vec_stride == 0) { + vec_stride = scalar_size; + } else { + assert(glsl_type_is_vector(deref->type)); + assert(vec_stride >= scalar_size); + } uint32_t align_mul, align_offset; if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) { @@ -1958,10 +1964,27 @@ align_offset = 0; } + / In order for bounds checking to be correct as per the Vulkan spec, + * we need to check at the individual component granularity. Prior to + * robustness2, we're technically allowed to be sloppy by 16B. Even with + * robustness2, UBO loads are allowed to have a granularity as high as 256B + * depending on hardware limits. However, we have none of that information + * here. Short of adding new address formats, the easiest way to do that + * is to just split any loads and stores into individual components here. + * + * TODO: At some point in the future we may want to add more ops similar to + * nir_intrinsic_load_global_constant_bounded and make bouds checking the + * back-end's problem. Another option would be to somehow plumb more of + * that information through to nir_lower_explicit_io. For now, however, + * scalarizing is at least correct. + / + bool scalarize = vec_stride > scalar_size \|\| + addr_format_needs_bounds_check(addr_format); + switch (intrin->intrinsic) { case nir_intrinsic_load_deref: { nir_def value; - if (vec_stride > scalar_size) { + if (scalarize) { nir_def comps[NIR_MAX_VEC_COMPONENTS] = { NULL, }; @@ -1990,7 +2013,7 @@ case nir_intrinsic_store_deref: { nir_def value = intrin->src[1].ssa; nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); - if (vec_stride > scalar_size) { + if (scalarize) { for (unsigned i = 0; i < intrin->num_components; i++) { if (!(write_mask & (1 << i))) continue;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_non_uniform_access.c ^
@@ -136,8 +136,12 @@ num_handles++; } - if (num_handles == 0) + if (num_handles == 0) { + /* nu_handle_init() returned false because the handles are uniform. */ + tex->texture_non_uniform = false; + tex->sampler_non_uniform = false; return false; + } b->cursor = nir_instr_remove(&tex->instr); @@ -177,8 +181,10 @@ return false; struct nu_handle handle; - if (!nu_handle_init(&handle, &intrin->src[handle_src])) + if (!nu_handle_init(&handle, &intrin->src[handle_src])) { + nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM); return false; + } b->cursor = nir_instr_remove(&intrin->instr);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_shader_calls.c ^
@@ -2050,6 +2050,8 @@ for (unsigned i = 0; i < num_calls; i++) { nir_instr resume_instr = lower_resume(resume_shaders[i], i); replace_resume_with_halt(resume_shaders[i], resume_instr); + / Remove CF after halt before nir_opt_if(). / + nir_opt_dead_cf(resume_shaders[i]); / Remove the dummy blocks added by flatten_resume_if_ladder() */ nir_opt_if(resume_shaders[i], nir_opt_if_optimize_phi_true_false); nir_opt_dce(resume_shaders[i]);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_algebraic.py ^
@@ -1380,22 +1380,22 @@ (('ishr', a, 0), a), (('ushr', 0, a), 0), (('ushr', a, 0), a), - (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), '!options->lower_rotate'), - (('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'), - (('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), '!options->lower_rotate'), - (('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'), - (('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), '!options->lower_rotate'), - (('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), 'options->lower_rotate'), - (('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), 'options->lower_rotate'), - (('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), 'options->lower_rotate'), - (('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b))), 'options->lower_rotate'), - (('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), 'options->lower_rotate'), - (('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), 'options->lower_rotate'), - (('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), 'options->lower_rotate'), - (('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b))), 'options->lower_rotate'), + (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), 'options->has_rotate16'), + (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), 'options->has_rotate16'), + (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), 'options->has_rotate32'), + (('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), 'options->has_rotate32'), + (('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), 'options->has_rotate16'), + (('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), 'options->has_rotate16'), + (('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), 'options->has_rotate32'), + (('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), 'options->has_rotate32'), + (('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), '!options->has_rotate8'), + (('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), '!options->has_rotate16'), + (('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), '!options->has_rotate32'), + (('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b)))), + (('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), '!options->has_rotate8'), + (('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), '!options->has_rotate16'), + (('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), '!options->has_rotate32'), + (('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b)))), # bfi(X, a, b) = (b & ~X) \| (a & X) # If X = ~0: (b & 0) \| (a & 0xffffffff) = a
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_copy_prop_vars.c ^
@@ -1065,6 +1065,12 @@ if (nir_deref_mode_must_be(src.instr, ignore)) break; + /* Ignore trivial casts. If trivial casts are applied to array derefs of vectors, + * not doing this causes is_array_deref_of_vector to (wrongly) return false. / + while (src.instr->deref_type == nir_deref_type_cast && + nir_deref_instr_parent(src.instr) && nir_deref_cast_is_trivial(src.instr)) + src.instr = nir_deref_instr_parent(src.instr); + / Direct array_derefs of vectors operate on the vectors (the parent * deref). Indirects will be handled like other derefs. / @@ -1157,6 +1163,12 @@ nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL }; assert(glsl_type_is_vector_or_scalar(dst.instr->type)); + / Ignore trivial casts. If trivial casts are applied to array derefs of vectors, + * not doing this causes is_array_deref_of_vector to (wrongly) return false. / + while (dst.instr->deref_type == nir_deref_type_cast && + nir_deref_instr_parent(dst.instr) && nir_deref_cast_is_trivial(dst.instr)) + dst.instr = nir_deref_instr_parent(dst.instr); + / Direct array_derefs of vectors operate on the vectors (the parent * deref). Indirects will be handled like other derefs. */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_move_discards_to_top.c ^
@@ -165,10 +165,47 @@ instr->pass_flags = STOP_PROCESSING_INSTR_FLAG; goto break_all; } - - if ((intrin->intrinsic == nir_intrinsic_discard_if && consider_discards) \|\| - intrin->intrinsic == nir_intrinsic_demote_if) + switch (intrin->intrinsic) { + case nir_intrinsic_quad_broadcast: + case nir_intrinsic_quad_swap_horizontal: + case nir_intrinsic_quad_swap_vertical: + case nir_intrinsic_quad_swap_diagonal: + case nir_intrinsic_quad_swizzle_amd: + consider_discards = false; + break; + case nir_intrinsic_vote_any: + case nir_intrinsic_vote_all: + case nir_intrinsic_vote_feq: + case nir_intrinsic_vote_ieq: + case nir_intrinsic_ballot: + case nir_intrinsic_first_invocation: + case nir_intrinsic_read_invocation: + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_elect: + case nir_intrinsic_reduce: + case nir_intrinsic_inclusive_scan: + case nir_intrinsic_exclusive_scan: + case nir_intrinsic_shuffle: + case nir_intrinsic_shuffle_xor: + case nir_intrinsic_shuffle_up: + case nir_intrinsic_shuffle_down: + case nir_intrinsic_rotate: + case nir_intrinsic_masked_swizzle_amd: + instr->pass_flags = STOP_PROCESSING_INSTR_FLAG; + goto break_all; + case nir_intrinsic_discard_if: + if (!consider_discards) { + /* assume that a shader either uses discard or demote, but not both */ + instr->pass_flags = STOP_PROCESSING_INSTR_FLAG; + goto break_all; + } + FALLTHROUGH; + case nir_intrinsic_demote_if: moved = moved \|\| try_move_discard(intrin); + break; + default: + break; + } continue; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/tests/comparison_pre_tests.cpp ^
@@ -579,3 +579,95 @@ EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl)); } + +TEST_F(comparison_pre_test, multi_comps_load) +{ + /* Before: + * + * vec1 32 ssa_0 = load_ubo (...) + * vec4 32 ssa_1 = load_ubo (...) + * vec1 1 ssa_2 = flt ssa_0, ssa_1.w + * + * if ssa_2 { + * vec1 32 ssa_3 = fneg ssa_1.x + * vec1 32 ssa_4 = fadd ssa_0, ssa_3 + * } else { + * } + / + nir_def ssa_0 = nir_load_ubo(&bld, 1, 32, + nir_imm_int(&bld, 0), + nir_imm_int(&bld, 0)); + nir_def ssa_1 = nir_load_ubo(&bld, 4, 32, + nir_imm_int(&bld, 1), + nir_imm_int(&bld, 0)); + + nir_alu_instr flt = nir_alu_instr_create(bld.shader, nir_op_flt); + flt->src[0].src = nir_src_for_ssa(ssa_0); + flt->src[1].src = nir_src_for_ssa(ssa_1); + memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx)); + memcpy(&flt->src[1].swizzle, wwww, sizeof(wwww)); + nir_builder_alu_instr_finish_and_insert(&bld, flt); + flt->def.num_components = 1; + nir_def ssa_2 = &flt->def; + + nir_if nif = nir_push_if(&bld, ssa_2); + { + nir_alu_instr fneg = nir_alu_instr_create(bld.shader, nir_op_fneg); + fneg->src[0].src = nir_src_for_ssa(ssa_1); + memcpy(&fneg->src[0].swizzle, xxxx, sizeof(xxxx)); + nir_builder_alu_instr_finish_and_insert(&bld, fneg); + fneg->def.num_components = 1; + nir_def ssa_3 = &fneg->def; + + nir_fadd(&bld, ssa_0, ssa_3); + } + nir_pop_if(&bld, nif); + + EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, multi_comps_load2) +{ + /* Before: + * + * vec1 32 ssa_0 = load_ubo (...) + * vec4 32 ssa_1 = load_ubo (...) + * vec1 1 ssa_2 = flt ssa_0, ssa_1.x + * + * if ssa_2 { + * vec1 32 ssa_3 = fneg ssa_1.w + * vec1 32 ssa_4 = fadd ssa_0, ssa_3 + * } else { + * } + / + nir_def ssa_0 = nir_load_ubo(&bld, 1, 32, + nir_imm_int(&bld, 0), + nir_imm_int(&bld, 0)); + nir_def ssa_1 = nir_load_ubo(&bld, 4, 32, + nir_imm_int(&bld, 1), + nir_imm_int(&bld, 0)); + + nir_alu_instr flt = nir_alu_instr_create(bld.shader, nir_op_flt); + flt->src[0].src = nir_src_for_ssa(ssa_0); + flt->src[1].src = nir_src_for_ssa(ssa_1); + memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx)); + memcpy(&flt->src[1].swizzle, xxxx, sizeof(xxxx)); + nir_builder_alu_instr_finish_and_insert(&bld, flt); + flt->def.num_components = 1; + nir_def ssa_2 = &flt->def; + + nir_if nif = nir_push_if(&bld, ssa_2); + { + nir_alu_instr fneg = nir_alu_instr_create(bld.shader, nir_op_fneg); + fneg->src[0].src = nir_src_for_ssa(ssa_1); + memcpy(&fneg->src[0].swizzle, wwww, sizeof(wwww)); + nir_builder_alu_instr_finish_and_insert(&bld, fneg); + fneg->def.num_components = 1; + nir_def ssa_3 = &fneg->def; + + nir_fadd(&bld, ssa_0, ssa_3); + } + nir_pop_if(&bld, nif); + + EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl)); +}
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/tests/loop_analyze_tests.cpp ^
@@ -285,6 +285,7 @@ INOT_COMPARE(ilt_rev) INOT_COMPARE(ine) +INOT_COMPARE(uge_rev) #define KNOWN_COUNT_TEST(_init_value, _cond_value, _incr_value, cond, incr, count) \ TEST_F(nir_loop_analyze_test, incr ## _ ## cond ## _known_count_ ## count) \ @@ -569,6 +570,16 @@ /* uint i = 0; * while (true) { + * if (!(0 >= i)) + * break; + * + * i += 1; + * } + / +KNOWN_COUNT_TEST(0x00000000, 0x00000000, 0x00000001, inot_uge_rev, iadd, 1) + +/ uint i = 0; + * while (true) { * if (i != 0) * break; *
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/nir_spirv.h ^
@@ -116,6 +116,8 @@ /* Force texture sampling to be non-uniform. / bool force_tex_non_uniform; + / Force SSBO accesses to be non-uniform. / + bool force_ssbo_non_uniform; / In Debug Builds, instead of emitting an OS break on failure, just return NULL from * spirv_to_nir(). This is useful for the unit tests that want to report a test failed
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/spirv_to_nir.c ^
@@ -156,7 +156,7 @@ if (len < 0 \|\| len >= sizeof(filename)) return; - FILE f = fopen(filename, "w"); + FILE f = fopen(filename, "wb"); if (f == NULL) return; @@ -4375,9 +4375,13 @@ w + 5, count - 5); break; - case SpvOpCopyLogical: + case SpvOpCopyLogical: { ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + struct vtn_type *dst_type = vtn_get_value_type(b, w[2]); + vtn_assert(vtn_types_compatible(b, type, dst_type)); + ssa->type = glsl_get_bare_type(dst_type->type); break; + } case SpvOpCopyObject: vtn_copy_value(b, w[3], w[2]); return;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/vtn_alu.c ^
@@ -94,38 +94,16 @@ transpose_result = true; } - if (src0_transpose && !src1_transpose && - glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { - /* We already have the rows of src0 and the columns of src1 available, - * so we can just take the dot product of each row with each column to - * get the result. - / - - for (unsigned i = 0; i < src1_columns; i++) { - nir_def vec_src[4]; - for (unsigned j = 0; j < src0_rows; j++) { - vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, - src1->elems[i]->def); - } - dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); - } - } else { - /* We don't handle the case where src1 is transposed but not src0, since - * the general case only uses individual components of src1 so the - * optimizer should chew through the transpose we emitted for src1. - / - - for (unsigned i = 0; i < src1_columns; i++) { - / dest[i] = sum(src0[j] * src1[i][j] for all j) / + for (unsigned i = 0; i < src1_columns; i++) { + / dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def, + nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1)); + for (int j = src0_columns - 2; j >= 0; j--) { dest->elems[i]->def = - nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def, - nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1)); - for (int j = src0_columns - 2; j >= 0; j--) { - dest->elems[i]->def = - nir_ffma(&b->nb, src0->elems[j]->def, - nir_channel(&b->nb, src1->elems[i]->def, j), - dest->elems[i]->def); - } + nir_ffma(&b->nb, src0->elems[j]->def, + nir_channel(&b->nb, src1->elems[i]->def, j), + dest->elems[i]->def); } }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/vtn_variables.c ^
@@ -2632,6 +2632,9 @@ /* Workaround for https://gitlab.freedesktop.org/mesa/mesa/-/issues/3406 / access \|= base->access & ACCESS_NON_UNIFORM; + if (base->mode == vtn_variable_mode_ssbo && b->options->force_ssbo_non_uniform) + access \|= ACCESS_NON_UNIFORM; + struct vtn_pointer ptr = vtn_pointer_dereference(b, base, chain); ptr->ptr_type = ptr_type; ptr->access \|= access;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/egl/drivers/dri2/egl_dri2.c ^
@@ -1067,6 +1067,8 @@ dri2_dpy->dri3_major_version != -1 && !dri2_dpy->multibuffers_available && #endif + (disp->Platform == EGL_PLATFORM_X11_KHR \|\| + disp->Platform == EGL_PLATFORM_XCB_EXT) && !debug_get_bool_option("LIBGL_KOPPER_DRI2", false)) return EGL_FALSE;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/egl/main/eglapi.c ^
@@ -695,7 +695,7 @@ if (disp->Options.ForceSoftware) RETURN_EGL_ERROR(disp, EGL_NOT_INITIALIZED, EGL_FALSE); else { - bool success = disp->Options.Zink; + bool success = false; if (!disp->Options.Zink && !getenv("GALLIUM_DRIVER")) { disp->Options.Zink = EGL_TRUE; success = _eglDriver.Initialize(disp);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/egl/main/egldisplay.c ^
@@ -642,6 +642,7 @@ _eglGetSurfacelessDisplay(void native_display, const EGLAttrib attrib_list) { _EGLDisplay dpy; + _EGLDevice dev = NULL; /* Any native display must be an EGLDeviceEXT we know about / if (native_display != NULL) { @@ -657,8 +658,8 @@ switch (attrib) { case EGL_DEVICE_EXT: - if ((native_display && native_display != (void )value) \|\| - (native_display != _eglLookupDevice(native_display))) { + dev = _eglLookupDevice((void *)value); + if (!dev) { _eglError(EGL_BAD_DEVICE_EXT, "eglGetPlatformDisplay"); return NULL; } @@ -671,10 +672,9 @@ } } - dpy = - _eglFindDisplay(_EGL_PLATFORM_SURFACELESS, native_display, attrib_list); + dpy = _eglFindDisplay(_EGL_PLATFORM_SURFACELESS, NULL, attrib_list); if (dpy) { - dpy->Device = native_display; + dpy->Device = dev; } return dpy;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a530-fails.txt ^
@@ -208,7 +208,6 @@ spec@arb_separate_shader_objects@400 combinations by name,Fail spec@arb_texture_rectangle@1-1-linear-texture,Fail spec@arb_timer_query@query gl_timestamp,Fail -spec@arb_timer_query@timestamp-get,Fail spec@arb_transform_feedback3@gl_skipcomponents1-1,Fail spec@arb_transform_feedback3@gl_skipcomponents1-2,Fail spec@arb_transform_feedback3@gl_skipcomponents1-3,Fail
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a618-fails.txt ^
@@ -91,6 +91,7 @@ spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail # Same results w/ zink-on-tu as with freedreno: +spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail spec@arb_sample_shading@samplemask 2 all@noms partition,Fail spec@arb_sample_shading@samplemask 2@noms partition,Fail spec@arb_sample_shading@samplemask 4 all@noms partition,Fail @@ -110,8 +111,6 @@ spec@arb_texture_rectangle@1-1-linear-texture,Fail -spec@arb_timer_query@timestamp-get,Fail - spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail # fails unrelated to GL_ARB_enhanced_layouts
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a630-fails.txt ^
@@ -114,8 +114,6 @@ spec@arb_texture_rectangle@1-1-linear-texture,Fail -spec@arb_timer_query@timestamp-get,Fail - spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail # fails unrelated to GL_ARB_enhanced_layouts
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a660-fails.txt ^
@@ -11,24 +11,6 @@ dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail gmem-dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bvec2_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bvec3_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bvec4_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_float_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec2_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec3_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec4_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec2_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec3_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec4_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec2_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec3_fragment,Fail -dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec4_fragment,Fail -gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec4_fragment,Fail -gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec2_fragment,Fail -gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec2_fragment,Fail -gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec4_fragment,Fail - # New CTS fails in 1.3.6.3 gmem-dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage:struct_mixed_types.uniform_buffer_block_geom,Fail gmem-dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage.struct_mixed_types.uniform_buffer_block_geom,Fail
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/drm/freedreno_device.c ^
@@ -104,6 +104,9 @@ if (!use_heap) { struct fd_pipe pipe = fd_pipe_new(dev, FD_PIPE_3D); + if (!pipe) + goto fail; + / Userspace fences don't appear to be reliable enough (missing some * cache flushes?) on older gens, so limit sub-alloc heaps to a6xx+ * for now: @@ -119,6 +122,10 @@ } return dev; + +fail: + fd_device_del(dev); + return NULL; } /* like fd_device_new() but creates it's own private dup() of the fd
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_compiler.c ^
@@ -109,7 +109,6 @@ .lower_unpack_unorm_2x16 = true, .lower_pack_split = true, .use_interpolated_input_intrinsics = true, - .lower_rotate = true, .lower_to_scalar = true, .has_imul24 = true, .has_fsub = true, @@ -133,7 +132,7 @@ ir3_shader_debug = debug_get_option_ir3_shader_debug(); ir3_shader_override_path = - !__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL; + __normal_user() ? debug_get_option_ir3_shader_override_path() : NULL; if (ir3_shader_override_path) { ir3_shader_debug \|= IR3_DBG_NOCACHE;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_legalize.c ^
@@ -998,6 +998,7 @@ if (block->brtype == IR3_BRANCH_ALL \|\| block->brtype == IR3_BRANCH_ANY \|\| block->brtype == IR3_BRANCH_GETONE) { + bd->uses_helpers_beginning = true; bd->uses_helpers_end = true; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/driver_trace/tr_dump.c ^
@@ -284,7 +284,7 @@ atexit(trace_dump_trace_close); const char *trigger = debug_get_option("GALLIUM_TRACE_TRIGGER", NULL); - if (trigger) { + if (trigger && __normal_user()) { trigger_filename = strdup(trigger); trigger_active = false; } else
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp ^
@@ -62,6 +62,7 @@ #include <llvm/Support/PrettyStackTrace.h> #include <llvm/ExecutionEngine/ObjectCache.h> #include <llvm/Support/TargetSelect.h> +#include <llvm/CodeGen/SelectionDAGNodes.h> #if LLVM_VERSION_MAJOR >= 15 #include <llvm/Support/MemoryBuffer.h> #endif @@ -100,6 +101,8 @@ #include "lp_bld_misc.h" #include "lp_bld_debug.h" +static void lp_run_atexit_for_destructors(void); + namespace { class LLVMEnsureMultithreaded { @@ -147,6 +150,7 @@ } } #endif + lp_run_atexit_for_destructors(); } extern "C" void @@ -623,3 +627,33 @@ M->setOverrideStackAlignment(align); #endif } + +using namespace llvm; + +class GallivmRunAtExitForStaticDestructors : public SDNode +{ +public: + /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. / + GallivmRunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other)) + { + } +}; + +static void +lp_run_atexit_for_destructors(void) +{ + / LLVM >= 16 registers static variable destructors on the first compile, which gcc + * implements by calling atexit there. Before that, u_queue registers its atexit + * handler to kill all threads. Since exit() runs atexit handlers in the reverse order, + * the LLVM destructors are called first while shader compiler threads may still be + * running, which crashes in LLVM in SelectionDAG.cpp. + * + * The solution is to run the code that declares the LLVM static variables first, + * so that atexit for LLVM is registered first and u_queue is registered after that, + * which ensures that all u_queue threads are terminated before LLVM destructors are + * called. + * + * This just executes the code that declares static variables. + */ + GallivmRunAtExitForStaticDestructors(); +}
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c ^
@@ -3689,13 +3689,15 @@ !options->lower_fdph \|\| !options->lower_flrp64 \|\| !options->lower_fmod \|\| - !options->lower_rotate \|\| !options->lower_uadd_carry \|\| !options->lower_usub_borrow \|\| !options->lower_uadd_sat \|\| !options->lower_usub_sat \|\| !options->lower_uniforms_to_ubo \|\| !options->lower_vector_cmp \|\| + options->has_rotate8 \|\| + options->has_rotate16 \|\| + options->has_rotate32 \|\| options->lower_fsqrt != lower_fsqrt \|\| options->force_indirect_unrolling != no_indirects_mask \|\| force_indirect_unrolling_sampler) { @@ -3709,7 +3711,6 @@ new_options->lower_fdph = true; new_options->lower_flrp64 = true; new_options->lower_fmod = true; - new_options->lower_rotate = true; new_options->lower_uadd_carry = true; new_options->lower_usub_borrow = true; new_options->lower_uadd_sat = true; @@ -3717,6 +3718,9 @@ new_options->lower_uniforms_to_ubo = true; new_options->lower_vector_cmp = true; new_options->lower_fsqrt = lower_fsqrt; + new_options->has_rotate8 = false; + new_options->has_rotate16 = false; + new_options->has_rotate32 = false; new_options->force_indirect_unrolling = no_indirects_mask; new_options->force_indirect_unrolling_sampler = force_indirect_unrolling_sampler; @@ -4062,7 +4066,6 @@ .lower_fdph = true, .lower_flrp64 = true, .lower_fmod = true, - .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_uadd_carry = true, .lower_usub_borrow = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/vl/vl_video_buffer.c ^
@@ -296,13 +296,19 @@ nr_components = 3; for (j = 0; j < nr_components && component < VL_NUM_COMPONENTS; ++j, ++component) { + unsigned pipe_swizzle; + if (buf->sampler_view_components[component]) continue; memset(&sv_templ, 0, sizeof(sv_templ)); u_sampler_view_default_template(&sv_templ, res, sampler_format[plane_order[i]]); - sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = PIPE_SWIZZLE_X + j; + pipe_swizzle = (buf->base.buffer_format == PIPE_FORMAT_YUYV \|\| buf->base.buffer_format == PIPE_FORMAT_UYVY) ? + (PIPE_SWIZZLE_X + j + 1) % 3 : + (PIPE_SWIZZLE_X + j); + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = pipe_swizzle; sv_templ.swizzle_a = PIPE_SWIZZLE_1; + buf->sampler_view_components[component] = pipe->create_sampler_view(pipe, res, &sv_templ); if (!buf->sampler_view_components[component]) goto error;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_screen.cpp ^
@@ -735,10 +735,6 @@ screen->dev->Release(); screen->dev = nullptr; } - if (screen->winsys) { - screen->winsys->destroy(screen->winsys); - screen->winsys = nullptr; - } } void
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_video_enc_av1.cpp ^
@@ -2189,7 +2189,7 @@ writtenTemporalDelimBytes // Bytes Written AFTER placingPositionStart arg above ); assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == writtenTemporalDelimBytes); - debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", writtenTemporalDelimBytes); + debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenTemporalDelimBytes)); } size_t writtenSequenceBytes = 0; @@ -2208,7 +2208,7 @@ writtenSequenceBytes // Bytes Written AFTER placingPositionStart arg above ); assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == (writtenSequenceBytes + writtenTemporalDelimBytes)); - debug_printf("Written OBU_SEQUENCE_HEADER bytes: %" PRIu64 "\n", writtenSequenceBytes); + debug_printf("Written OBU_SEQUENCE_HEADER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenSequenceBytes)); } // Only supported bitstream format is with obu_size for now. @@ -2254,14 +2254,14 @@ writtenFrameBytes // Bytes Written AFTER placingPositionStart arg above ); - debug_printf("Written OBU_FRAME bytes: %" PRIu64 "\n", writtenFrameBytes); + debug_printf("Written OBU_FRAME bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenFrameBytes)); assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == (writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes)); debug_printf("Uploading %" PRIu64 " bytes from OBU sequence and/or picture headers to comp_bit_destination %p at offset 0\n", - pD3D12Enc->m_BitstreamHeadersBuffer.size(), + static_cast<uint64_t>(pD3D12Enc->m_BitstreamHeadersBuffer.size()), associatedMetadata.comp_bit_destination); // Upload headers to the finalized compressed bitstream buffer @@ -2330,13 +2330,13 @@ writtenFrameBytes // Bytes Written AFTER placingPositionStart arg above ); - debug_printf("Written OBU_FRAME_HEADER bytes: %" PRIu64 "\n", writtenFrameBytes); + debug_printf("Written OBU_FRAME_HEADER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenFrameBytes)); assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == (writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes)); debug_printf("Uploading %" PRIu64 " bytes from OBU headers to comp_bit_destination %p at offset 0\n", - pD3D12Enc->m_BitstreamHeadersBuffer.size(), + static_cast<uint64_t>(pD3D12Enc->m_BitstreamHeadersBuffer.size()), associatedMetadata.comp_bit_destination); // Upload headers to the finalized compressed bitstream buffer @@ -2361,7 +2361,7 @@ debug_printf("Uploading tile group %d to comp_bit_destination %p at offset %" PRIu64 "\n", tg_idx, associatedMetadata.comp_bit_destination, - comp_bitstream_offset); + static_cast<uint64_t>(comp_bitstream_offset)); size_t tile_group_obu_size = 0; size_t decode_tile_elements_size = 0; @@ -2387,9 +2387,9 @@ debug_printf("Written %" PRIu64 " bytes for OBU_TILE_GROUP open_bitstream_unit() prefix with obu_header() and " "obu_size to staging_bitstream_buffer %p at offset %" PRIu64 "\n", - writtenTileObuPrefixBytes, + static_cast<uint64_t>(writtenTileObuPrefixBytes), associatedMetadata.m_StagingBitstreamConstruction.data(), - staging_bitstream_buffer_offset); + static_cast<uint64_t>(staging_bitstream_buffer_offset)); writtenTileBytes += writtenTileObuPrefixBytes; @@ -2404,10 +2404,10 @@ debug_printf("Uploading %" PRIu64 " bytes for OBU_TILE_GROUP open_bitstream_unit() prefix with obu_header() " "and obu_size: %" PRIu64 " to comp_bit_destination %p at offset %" PRIu64 "\n", - writtenTileObuPrefixBytes, - tile_group_obu_size, + static_cast<uint64_t>(writtenTileObuPrefixBytes), + static_cast<uint64_t>(tile_group_obu_size), associatedMetadata.comp_bit_destination, - comp_bitstream_offset); + static_cast<uint64_t>(comp_bitstream_offset)); staging_bitstream_buffer_offset += writtenTileObuPrefixBytes; @@ -2517,7 +2517,7 @@ // Add current pending frame being processed in the loop extra_show_existing_frame_payload_bytes += writtenTemporalDelimBytes; - debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", writtenTemporalDelimBytes); + debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenTemporalDelimBytes)); size_t writtenShowExistingFrameBytes = 0; av1_pic_header_t showExistingPicHdr = {}; @@ -2561,7 +2561,7 @@ "in current frame ref_frame_idx[%" PRIu32 "]) bytes: %" PRIu64 "\n", pendingFrameIt /PictureIndex*/, showExistingPicHdr.frame_to_show_map_idx, - writtenShowExistingFrameBytes); + static_cast<uint64_t>(writtenShowExistingFrameBytes)); // Remove it from the list of pending frames pendingFrameIt = @@ -2628,7 +2628,7 @@ tileGroup.tg_start, tileGroup.tg_end, comp_bit_destination, - comp_bit_destination_offset); + static_cast<uint64_t>(comp_bit_destination_offset)); debug_printf("[Tile group start %d to end %d] Using staging_bitstream_buffer %p at offset %" PRIu64 " to write the tile_obu_group() prefix syntax: tile_start_and_end_present_flag, tg_start, tg_end and " @@ -2636,7 +2636,7 @@ tileGroup.tg_start, tileGroup.tg_end, staging_bitstream_buffer.data(), - staging_bitstream_buffer_offset); + static_cast<uint64_t>(staging_bitstream_buffer_offset)); // Reserve space upfront in the scratch storage // Do not modify anything before staging_bitstream_buffer_offset @@ -2673,9 +2673,9 @@ " for tile_obu_group() prefix syntax: tile_start_and_end_present_flag, tg_start, tg_end\n", tileGroup.tg_start, tileGroup.tg_end, - bitstream_tile_group_obu_bytes, + static_cast<uint64_t>(bitstream_tile_group_obu_bytes), staging_bitstream_buffer.data(), - staging_bitstream_buffer_offset); + static_cast<uint64_t>(staging_bitstream_buffer_offset)); // Save this to compare the final written destination byte size against the expected tile_group_obu_size @@ -2699,11 +2699,11 @@ " to comp_bit_destination %p at offset %" PRIu64 "\n", tileGroup.tg_start, tileGroup.tg_end, - bitstream_tile_group_obu_bytes, + static_cast<uint64_t>(bitstream_tile_group_obu_bytes), staging_bitstream_buffer.data(), - staging_bitstream_buffer_offset, + static_cast<uint64_t>(staging_bitstream_buffer_offset), comp_bit_destination, - comp_bit_destination_offset); + static_cast<uint64_t>(comp_bit_destination_offset)); comp_bit_destination_offset += bitstream_tile_group_obu_bytes; written_bytes_to_staging_bitstream_buffer += bitstream_tile_group_obu_bytes; @@ -2729,9 +2729,9 @@ tileGroup.tg_start, tileGroup.tg_end, TileIdx, - TileSizeBytes, + static_cast<uint64_t>(TileSizeBytes), staging_bitstream_buffer.data(), - (written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset)); + static_cast<uint64_t>(written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset)); // Upload current tile_size_minus_1 // Note: The buffer_subdata is queued in pD3D12Enc->base.context but doesn't execute immediately @@ -2751,11 +2751,11 @@ tileGroup.tg_start, tileGroup.tg_end, TileIdx, - TileSizeBytes, + static_cast<uint64_t>(TileSizeBytes), staging_bitstream_buffer.data(), - (written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset), + static_cast<uint64_t>(written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset), comp_bit_destination, - comp_bit_destination_offset); + static_cast<uint64_t>(comp_bit_destination_offset)); comp_bit_destination_offset += TileSizeBytes; written_bytes_to_staging_bitstream_buffer += TileSizeBytes; @@ -2788,11 +2788,11 @@ tileGroup.tg_start, tileGroup.tg_end, TileIdx, - tile_size, + static_cast<uint64_t>(tile_size), src_driver_bitstream, - src_buf_tile_position, + static_cast<uint64_t>(src_buf_tile_position), comp_bit_destination, - comp_bit_destination_offset); + static_cast<uint64_t>(comp_bit_destination_offset)); comp_bit_destination_offset += tile_size; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_av1.cpp ^
@@ -153,7 +153,7 @@ write_obu_header(&bitstream_full_obu, OBU_TEMPORAL_DELIMITER, obu_extension_flag, temporal_id, spatial_id); // Write the data size - const size_t obu_size_in_bytes = 0; + const uint64_t obu_size_in_bytes = 0; debug_printf("obu_size: %" PRIu64 " (temporal_delimiter_obu() has empty payload as per AV1 codec spec)\n", obu_size_in_bytes); pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes); @@ -197,7 +197,7 @@ write_obu_header(&bitstream_full_obu, OBU_SEQUENCE_HEADER, obu_extension_flag, temporal_id, spatial_id); // Write the data size - const size_t obu_size_in_bytes = static_cast<size_t>(bitstream_seq.get_byte_count()); + const uint64_t obu_size_in_bytes = bitstream_seq.get_byte_count(); debug_printf("obu_size: %" PRIu64 "\n", obu_size_in_bytes); pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes); @@ -802,7 +802,7 @@ debug_printf("frame_header_obu() bytes (without OBU_FRAME nor OBU_FRAME_HEADER alignment padding): %" PRId32 "\n", bitstream_pic.get_byte_count()); // May be bit unaligned at this point (see padding below) debug_printf("extra_obu_size_bytes (ie. tile_group_obu_size if writing OBU_FRAME ): %" PRIu64 "\n", - extra_obu_size_bytes); + static_cast<uint64_t>(extra_obu_size_bytes)); // Write the obu_header constexpr uint32_t obu_extension_flag = 0; @@ -825,7 +825,7 @@ bitstream_pic.flush(); // Write the obu_size element - const size_t obu_size_in_bytes = bitstream_pic.get_byte_count() + extra_obu_size_bytes; + const uint64_t obu_size_in_bytes = bitstream_pic.get_byte_count() + extra_obu_size_bytes; debug_printf("obu_size: %" PRIu64 "\n", obu_size_in_bytes); pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes); @@ -913,7 +913,7 @@ // Write the obu_size element pack_obu_header_size(&bitstream_full_obu, tile_group_obu_size); - debug_printf("obu_size: %" PRIu64 "\n", tile_group_obu_size); + debug_printf("obu_size: %" PRIu64 "\n", static_cast<uint64_t>(tile_group_obu_size)); bitstream_full_obu.flush();
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_av1.cpp ^
@@ -213,7 +213,7 @@ "Number of DPB virtual entries is %" PRIu64 " entries for frame with OrderHint " "%d (PictureIndex %d) are: \n%s \n", m_PhysicalAllocationsStorage.get_number_of_pics_in_dpb(), - m_CurrentFrameReferencesData.pVirtualDPBEntries.size(), + static_cast<uint64_t>(m_CurrentFrameReferencesData.pVirtualDPBEntries.size()), m_CurrentFramePicParams.OrderHint, m_CurrentFramePicParams.PictureIndex, dpbContents.c_str());
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler.c ^
@@ -54,8 +54,6 @@ .lower_fmod = true, .lower_vector_cmp = true, .lower_fdph = true, - .lower_extract_byte = true, - .lower_extract_word = true, .lower_insert_byte = true, .lower_insert_word = true, .lower_fdiv = true, /* !specs->has_new_transcendentals */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_screen.c ^
@@ -458,6 +458,11 @@ { bool supported = true; + /* Requires split sampler support, which the driver doesn't support, yet. / + if (!util_format_is_compressed(format) && + util_format_get_blocksizebits(format) > 32) + return false; + if (fmt == TEXTURE_FORMAT_ETC1) supported = VIV_FEATURE(screen, chipFeatures, ETC1_TEXTURE_COMPRESSION); @@ -500,6 +505,10 @@ if (fmt == ETNA_NO_MATCH) return false; + / Requires split target support, which the driver doesn't support, yet. / + if (util_format_get_blocksizebits(format) > 32) + return false; + if (sample_count > 1) { / Explicitly enabled. */ if (!DBG_ENABLED(ETNA_DBG_MSAA))
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_shader.c ^
@@ -147,6 +147,7 @@ COND(last_varying_2x, VIVS_RA_CONTROL_LAST_VARYING_2X); cs->PA_ATTRIBUTE_ELEMENT_COUNT = VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(link.num_varyings); + STATIC_ASSERT(VIVS_PA_SHADER_ATTRIBUTES__LEN >= ETNA_NUM_VARYINGS); for (int idx = 0; idx < link.num_varyings; ++idx) cs->PA_SHADER_ATTRIBUTES[idx] = link.varyings[idx].pa_attributes;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h ^
@@ -10,7 +10,7 @@ The rules-ng-ng source files this header was generated from are: - cmdstream.xml ( 16930 bytes, from 2019-01-04 11:37:39) - copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) +- common.xml ( 35465 bytes, from 2023-11-13 11:29:31) Copyright (C) 2012-2019 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com>
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/common.xml.h ^
@@ -10,10 +10,10 @@ The rules-ng-ng source files this header was generated from are: - texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) - copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) -- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53) +- common.xml ( 35465 bytes, from 2023-11-13 11:29:31) +- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31) -Copyright (C) 2012-2020 by the following authors: +Copyright (C) 2012-2023 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de>
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/common_3d.xml.h ^
@@ -10,10 +10,10 @@ The rules-ng-ng source files this header was generated from are: - texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) - copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) -- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53) +- common.xml ( 35465 bytes, from 2023-11-13 11:29:31) +- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31) -Copyright (C) 2012-2022 by the following authors: +Copyright (C) 2012-2023 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de>
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/isa.xml.h ^
@@ -8,10 +8,10 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- isa.xml ( 38205 bytes, from 2022-09-03 22:41:40) +- isa.xml ( 39261 bytes, from 2023-11-13 11:29:31) - copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2022 by the following authors: +Copyright (C) 2012-2023 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de> @@ -90,8 +90,8 @@ #define INST_OPCODE_CMP 0x00000031 #define INST_OPCODE_LOAD 0x00000032 #define INST_OPCODE_STORE 0x00000033 -#define INST_OPCODE_COPYSIGN 0x00000034 -#define INST_OPCODE_GETEXP 0x00000035 +#define INST_OPCODE_IMG_LOAD_3D 0x00000034 +#define INST_OPCODE_IMG_STORE_3D 0x00000035 #define INST_OPCODE_GETMANT 0x00000036 #define INST_OPCODE_NAN 0x00000037 #define INST_OPCODE_NEXTAFTER 0x00000038 @@ -159,8 +159,8 @@ #define INST_OPCODE_NORM_DP4 0x00000076 #define INST_OPCODE_NORM_MUL 0x00000077 #define INST_OPCODE_STORE_ATTR 0x00000078 -#define INST_OPCODE_LOAD_ATTR 0x00000079 -#define INST_OPCODE_EMIT 0x0000007a +#define INST_OPCODE_IMG_LOAD 0x00000079 +#define INST_OPCODE_IMG_STORE 0x0000007a #define INST_OPCODE_RESTART 0x0000007b #define INST_OPCODE_NOP7C 0x0000007c #define INST_OPCODE_NOP7D 0x0000007d
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/state.xml.h ^
@@ -8,17 +8,17 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state.xml ( 27198 bytes, from 2022-08-16 16:28:18) -- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) -- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53) -- state_hi.xml ( 34803 bytes, from 2022-08-16 16:28:18) +- state.xml ( 28218 bytes, from 2023-11-13 11:29:31) +- common.xml ( 35465 bytes, from 2023-11-13 11:29:31) +- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31) +- state_hi.xml ( 34935 bytes, from 2023-11-13 11:29:31) - copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26) -- state_3d.xml ( 84326 bytes, from 2022-10-07 06:11:53) -- state_blt.xml ( 14424 bytes, from 2022-10-07 06:11:53) +- state_2d.xml ( 52271 bytes, from 2023-09-13 13:37:23) +- state_3d.xml ( 86123 bytes, from 2023-11-13 12:42:26) +- state_blt.xml ( 14424 bytes, from 2023-09-13 13:37:23) - state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2022 by the following authors: +Copyright (C) 2012-2023 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de> @@ -400,7 +400,16 @@ #define VIVS_GL_FENCE_OUT_DATA_LOW 0x0000386c -#define VIVS_GL_HALTI5_UNK03884 0x00003884 +#define VIVS_GL_USC_CONTROL 0x00003884 +#define VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__MASK 0x00000007 +#define VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__SHIFT 0 +#define VIVS_GL_USC_CONTROL_L1_CACHE_RATIO(x) (((x) << VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__SHIFT) & VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__MASK) +#define VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__MASK 0x00000f00 +#define VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__SHIFT 8 +#define VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO(x) (((x) << VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__SHIFT) & VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__MASK) +#define VIVS_GL_USC_CONTROL_UNK16__MASK 0x001f0000 +#define VIVS_GL_USC_CONTROL_UNK16__SHIFT 16 +#define VIVS_GL_USC_CONTROL_UNK16(x) (((x) << VIVS_GL_USC_CONTROL_UNK16__SHIFT) & VIVS_GL_USC_CONTROL_UNK16__MASK) #define VIVS_GL_HALTI5_SH_SPECIALS 0x00003888 #define VIVS_GL_HALTI5_SH_SPECIALS_VS_PSIZE_OUT__MASK 0x0000007f @@ -434,6 +443,30 @@ #define VIVS_GL_SECURITY_UNK3904 0x00003904 +#define VIVS_GL_NN_CONFIG 0x00003930 +#define VIVS_GL_NN_CONFIG_UNK0__MASK 0x00000003 +#define VIVS_GL_NN_CONFIG_UNK0__SHIFT 0 +#define VIVS_GL_NN_CONFIG_UNK0(x) (((x) << VIVS_GL_NN_CONFIG_UNK0__SHIFT) & VIVS_GL_NN_CONFIG_UNK0__MASK) +#define VIVS_GL_NN_CONFIG_DISABLE_ZDPN 0x00000004 +#define VIVS_GL_NN_CONFIG_DISABLE_SWTILING 0x00000008 +#define VIVS_GL_NN_CONFIG_SMALL_BATCH 0x00000010 +#define VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__MASK 0x00000060 +#define VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__SHIFT 5 +#define VIVS_GL_NN_CONFIG_DDR_BURST_SIZE(x) (((x) << VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__SHIFT) & VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__MASK) +#define VIVS_GL_NN_CONFIG_UNK7 0x00000080 +#define VIVS_GL_NN_CONFIG_NN_CORE_COUNT__MASK 0x00000f00 +#define VIVS_GL_NN_CONFIG_NN_CORE_COUNT__SHIFT 8 +#define VIVS_GL_NN_CONFIG_NN_CORE_COUNT(x) (((x) << VIVS_GL_NN_CONFIG_NN_CORE_COUNT__SHIFT) & VIVS_GL_NN_CONFIG_NN_CORE_COUNT__MASK) +#define VIVS_GL_NN_CONFIG_UNK12 0x00001000 + +#define VIVS_GL_SRAM_REMAP_ADDRESS 0x00003938 + +#define VIVS_GL_OCB_REMAP_START 0x0000393c + +#define VIVS_GL_OCB_REMAP_END 0x00003940 + +#define VIVS_GL_TP_CONFIG 0x0000394c + #define VIVS_GL_UNK03A00 0x00003a00 #define VIVS_GL_UNK03A04 0x00003a04
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/state_3d.xml.h ^
@@ -8,17 +8,17 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state.xml ( 27198 bytes, from 2022-04-22 10:35:24) -- common.xml ( 35468 bytes, from 2020-10-28 12:56:03) -- common_3d.xml ( 15058 bytes, from 2020-10-28 12:56:03) -- state_hi.xml ( 34803 bytes, from 2020-10-28 12:56:03) -- copyright.xml ( 1597 bytes, from 2020-10-28 12:56:03) -- state_2d.xml ( 51552 bytes, from 2020-10-28 12:56:03) -- state_3d.xml ( 84445 bytes, from 2022-11-15 15:59:38) -- state_blt.xml ( 14424 bytes, from 2022-11-07 11:18:41) -- state_vg.xml ( 5975 bytes, from 2020-10-28 12:56:03) +- state.xml ( 28218 bytes, from 2023-11-13 11:29:31) +- common.xml ( 35465 bytes, from 2023-11-13 11:29:31) +- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31) +- state_hi.xml ( 34935 bytes, from 2023-11-13 11:29:31) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- state_2d.xml ( 52271 bytes, from 2023-09-13 13:37:23) +- state_3d.xml ( 86123 bytes, from 2023-11-13 12:42:26) +- state_blt.xml ( 14424 bytes, from 2023-09-13 13:37:23) +- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2022 by the following authors: +Copyright (C) 2012-2023 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de> @@ -159,6 +159,15 @@ #define VIVS_VS_END_PC 0x00000800 #define VIVS_VS_OUTPUT_COUNT 0x00000804 +#define VIVS_VS_OUTPUT_COUNT_COUNT__MASK 0x000000ff +#define VIVS_VS_OUTPUT_COUNT_COUNT__SHIFT 0 +#define VIVS_VS_OUTPUT_COUNT_COUNT(x) (((x) << VIVS_VS_OUTPUT_COUNT_COUNT__SHIFT) & VIVS_VS_OUTPUT_COUNT_COUNT__MASK) +#define VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__MASK 0x0000ff00 +#define VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__SHIFT 8 +#define VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG(x) (((x) << VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__SHIFT) & VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__MASK) +#define VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__MASK 0x00ff0000 +#define VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__SHIFT 16 +#define VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG(x) (((x) << VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__SHIFT) & VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__MASK) #define VIVS_VS_INPUT_COUNT 0x00000808 #define VIVS_VS_INPUT_COUNT_COUNT__MASK 0x0000000f @@ -270,7 +279,7 @@ #define VIVS_VS_ICACHE_PREFETCH 0x0000088c -#define VIVS_VS_ICACHE_UNK00890 0x00000890 +#define VIVS_VS_ICACHE_PREFETCH_INSTRUCTIONS 0x00000890 #define VIVS_VS_HALTI5_UNK00898(i0) (0x00000898 + 0x4(i0)) #define VIVS_VS_HALTI5_UNK00898__ESIZE 0x00000004 @@ -421,17 +430,23 @@ #define VIVS_CL_UNK00924 0x00000924 -#define VIVS_CL_UNK00940 0x00000940 +#define VIVS_CL_GLOBAL_WORK_OFFSET_X 0x0000092c -#define VIVS_CL_UNK00944 0x00000944 +#define VIVS_CL_GLOBAL_WORK_OFFSET_Y 0x00000934 -#define VIVS_CL_UNK00948 0x00000948 +#define VIVS_CL_GLOBAL_WORK_OFFSET_Z 0x0000093c -#define VIVS_CL_UNK0094C 0x0000094c +#define VIVS_CL_WORKGROUP_COUNT_X 0x00000940 -#define VIVS_CL_UNK00950 0x00000950 +#define VIVS_CL_WORKGROUP_COUNT_Y 0x00000944 -#define VIVS_CL_UNK00954 0x00000954 +#define VIVS_CL_WORKGROUP_COUNT_Z 0x00000948 + +#define VIVS_CL_WORKGROUP_SIZE_X 0x0000094c + +#define VIVS_CL_WORKGROUP_SIZE_Y 0x00000950 + +#define VIVS_CL_WORKGROUP_SIZE_Z 0x00000954 #define VIVS_CL_HALTI5_UNK00958 0x00000958 @@ -504,7 +519,7 @@ #define VIVS_PA_SHADER_ATTRIBUTES(i0) (0x00000a40 + 0x4(i0)) #define VIVS_PA_SHADER_ATTRIBUTES__ESIZE 0x00000004 -#define VIVS_PA_SHADER_ATTRIBUTES__LEN 0x0000000a +#define VIVS_PA_SHADER_ATTRIBUTES__LEN 0x00000010 #define VIVS_PA_SHADER_ATTRIBUTES_BYPASS_FLAT 0x00000001 #define VIVS_PA_SHADER_ATTRIBUTES_UNK4__MASK 0x000000f0 #define VIVS_PA_SHADER_ATTRIBUTES_UNK4__SHIFT 4 @@ -593,7 +608,7 @@ #define VIVS_PS_OUTPUT_REG 0x00001004 #define VIVS_PS_INPUT_COUNT 0x00001008 -#define VIVS_PS_INPUT_COUNT_COUNT__MASK 0x0000000f +#define VIVS_PS_INPUT_COUNT_COUNT__MASK 0x0000001f #define VIVS_PS_INPUT_COUNT_COUNT__SHIFT 0 #define VIVS_PS_INPUT_COUNT_COUNT(x) (((x) << VIVS_PS_INPUT_COUNT_COUNT__SHIFT) & VIVS_PS_INPUT_COUNT_COUNT__MASK) #define VIVS_PS_INPUT_COUNT_UNK8__MASK 0x00001f00 @@ -628,6 +643,8 @@ #define VIVS_PS_RANGE_HIGH__SHIFT 16 #define VIVS_PS_RANGE_HIGH(x) (((x) << VIVS_PS_RANGE_HIGH__SHIFT) & VIVS_PS_RANGE_HIGH__MASK) +#define VIVS_PS_REG_COUNT 0x0000101e + #define VIVS_PS_UNIFORM_BASE 0x00001024 #define VIVS_PS_INST_ADDR 0x00001028 @@ -676,7 +693,7 @@ #define VIVS_PS_ICACHE_PREFETCH 0x00001048 -#define VIVS_PS_ICACHE_UNK0104C 0x0000104c +#define VIVS_PS_ICACHE_PREFETCH_INSTRUCTIONS 0x0000104c #define VIVS_PS_MSAA_CONFIG 0x00001054 @@ -694,6 +711,12 @@ #define VIVS_PS_HALTI5_UNK01098 0x00001098 +#define VIVS_PS_PSCS_THROTTLE 0x0000109c + +#define VIVS_PS_NN_INST_ADDR 0x000010a0 + +#define VIVS_PS_TP_INST_ADDR 0x000010b8 + #define VIVS_PS_INST_MEM(i0) (0x00006000 + 0x4*(i0)) #define VIVS_PS_INST_MEM__ESIZE 0x00000004 #define VIVS_PS_INST_MEM__LEN 0x00000400
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/state_blt.xml.h ^
@@ -8,17 +8,17 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state.xml ( 27198 bytes, from 2022-08-16 16:28:18) -- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) -- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53) -- state_hi.xml ( 34803 bytes, from 2022-08-16 16:28:18) +- state.xml ( 28218 bytes, from 2023-11-13 11:29:31) +- common.xml ( 35465 bytes, from 2023-11-13 11:29:31) +- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31) +- state_hi.xml ( 34935 bytes, from 2023-11-13 11:29:31) - copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26) -- state_3d.xml ( 84326 bytes, from 2022-10-07 06:11:53) -- state_blt.xml ( 14424 bytes, from 2022-10-07 06:11:53) +- state_2d.xml ( 52271 bytes, from 2023-09-13 13:37:23) +- state_3d.xml ( 86123 bytes, from 2023-11-13 12:42:26) +- state_blt.xml ( 14424 bytes, from 2023-09-13 13:37:23) - state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2022 by the following authors: +Copyright (C) 2012-2023 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de>
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h ^
@@ -10,8 +10,8 @@ The rules-ng-ng source files this header was generated from are: - texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) - copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) -- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53) +- common.xml ( 35465 bytes, from 2023-11-13 11:29:31) +- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31) Copyright (C) 2012-2018 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com>
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a2xx/ir2_nir.c ^
@@ -43,7 +43,6 @@ .lower_all_io_to_temps = true, .vertex_id_zero_based = true, /* its not implemented anyway */ .lower_bitops = true, - .lower_rotate = true, .lower_vector_cmp = true, .lower_fdph = true, .has_fsub = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c ^
@@ -206,16 +206,6 @@ OUT_RELOC(ring, query_sample(aq, start)); /* srcC / } -static uint64_t -ticks_to_ns(uint32_t ts) -{ - / This is based on the 19.2MHz always-on rbbm timer. - * - * TODO we should probably query this value from kernel.. - / - return ts (1000000000 / 19200000); -} - static void time_elapsed_accumulate_result(struct fd_acc_query aq, struct fd_acc_query_sample s,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_query.cc ^
@@ -267,16 +267,6 @@ OUT_RING(ring, 0x00000000); } -static uint64_t -ticks_to_ns(uint64_t ts) -{ - /* This is based on the 19.2MHz always-on rbbm timer. - * - * TODO we should probably query this value from kernel.. - / - return ts (1000000000 / 19200000); -} - static void time_elapsed_accumulate_result(struct fd_acc_query aq, struct fd_acc_query_sample s,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_screen.c ^
@@ -141,8 +141,7 @@ if (screen->has_timestamp) { uint64_t n; fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n); - assert(screen->max_freq > 0); - return n * 1000000000 / screen->max_freq; + return ticks_to_ns(n); } else { int64_t cpu_time = os_time_get_nano(); return cpu_time + screen->cpu_gpu_time_delta; @@ -590,6 +589,8 @@ /* only a4xx, requires new enough kernel so we know max_freq: */ return (screen->max_freq > 0) && (is_a4xx(screen) \|\| is_a5xx(screen) \|\| is_a6xx(screen)); + case PIPE_CAP_TIMER_RESOLUTION: + return ticks_to_ns(1); case PIPE_CAP_QUERY_BUFFER_OBJECT: case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: @@ -1113,10 +1114,11 @@ screen->max_freq = 0; } else { screen->max_freq = val; - if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0) - screen->has_timestamp = true; } + if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0) + screen->has_timestamp = true; + screen->dev_id = fd_pipe_dev_id(screen->pipe); if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_util.h ^
@@ -507,6 +507,13 @@ return INDEX4_SIZE_32_BIT; } +/* Convert 19.2MHz RBBM always-on timer ticks to ns / +static inline uint64_t +ticks_to_ns(uint64_t ts) +{ + return ts (1000000000 / 19200000); +} + #ifdef __cplusplus } #endif
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/i915/i915_screen.c ^
@@ -117,7 +117,6 @@ .lower_fdph = true, .lower_flrp32 = true, .lower_fmod = true, - .lower_rotate = true, .lower_sincos = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, @@ -161,7 +160,6 @@ .lower_unpack_half_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_mul_2x32_64 = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_batch.c ^
@@ -278,6 +278,9 @@ { unsigned index = READ_ONCE(bo->index); + if (index == -1) + return -1; + if (index < batch->exec_count && batch->exec_bos[index] == bo) return index;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_bufmgr.c ^
@@ -1413,6 +1413,7 @@ bo->bufmgr = bufmgr; bo->gem_handle = open_arg.handle; bo->name = name; + bo->index = -1; bo->real.global_name = handle; bo->real.prime_fd = -1; bo->real.reusable = false; @@ -1974,6 +1975,7 @@ bo->bufmgr = bufmgr; bo->name = "prime"; + bo->index = -1; bo->real.reusable = false; bo->real.imported = true; bo->real.mmap_mode = IRIS_MMAP_NONE;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_resolve.c ^
@@ -682,7 +682,7 @@ /* A data cache flush is not suggested by HW docs, but we found it to fix * a number of failures. */ - unsigned wa_flush = intel_device_info_is_dg2(batch->screen->devinfo) && + unsigned wa_flush = devinfo->verx10 >= 125 && res->aux.usage == ISL_AUX_USAGE_HIZ_CCS ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_state.c ^
@@ -6806,13 +6806,16 @@ bool program_needs_wa_14015055625 = false; +#if INTEL_WA_14015055625_GFX_VER /* Check if FS stage will use primitive ID overrides for Wa_14015055625. / const struct brw_vue_map last_vue_map = &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map; if ((wm_prog_data->inputs & VARYING_BIT_PRIMITIVE_ID) && - last_vue_map->varying_to_slot[VARYING_SLOT_PRIMITIVE_ID] == -1) { + last_vue_map->varying_to_slot[VARYING_SLOT_PRIMITIVE_ID] == -1 && + intel_needs_workaround(batch->screen->devinfo, 14015055625)) { program_needs_wa_14015055625 = true; } +#endif for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (!(stage_dirty & (IRIS_STAGE_DIRTY_VS << stage))) @@ -6828,8 +6831,10 @@ uint32_t scratch_addr = pin_scratch_space(ice, batch, prog_data, stage); +#if INTEL_WA_14015055625_GFX_VER shader_program_needs_wa_14015055625(ice, batch, prog_data, stage, &program_needs_wa_14015055625); +#endif if (stage == MESA_SHADER_FRAGMENT) { UNUSED struct iris_rasterizer_state cso = ice->state.cso_rast; @@ -7864,6 +7869,11 @@ #endif } + if (indirect) { + struct mi_builder b; + uint32_t mocs; + mi_builder_init(&b, batch->screen->devinfo, batch); + #define _3DPRIM_END_OFFSET 0x2420 #define _3DPRIM_START_VERTEX 0x2430 #define _3DPRIM_VERTEX_COUNT 0x2434 @@ -7871,103 +7881,100 @@ #define _3DPRIM_START_INSTANCE 0x243C #define _3DPRIM_BASE_VERTEX 0x2440 - struct mi_builder b; - uint32_t mocs; - mi_builder_init(&b, batch->screen->devinfo, batch); + if (!indirect->count_from_stream_output) { + if (indirect->indirect_draw_count) { + use_predicate = true; + + struct iris_bo draw_count_bo = + iris_resource_bo(indirect->indirect_draw_count); + unsigned draw_count_offset = + indirect->indirect_draw_count_offset; + mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0); + mi_builder_set_mocs(&b, mocs); + + if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { + /* comparison = draw id < draw count / + struct mi_value comparison = + mi_ult(&b, mi_imm(drawid_offset), + mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); + + / predicate = comparison & conditional rendering predicate / + mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), + mi_iand(&b, comparison, mi_reg32(CS_GPR(15)))); + } else { + uint32_t mi_predicate; + + / Upload the id of the current primitive to MI_PREDICATE_SRC1. / + mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset)); + / Upload the current draw count from the draw parameters buffer + * to MI_PREDICATE_SRC0. Zero the top 32-bits of + * MI_PREDICATE_SRC0. + / + mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), + mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); - if (indirect && !indirect->count_from_stream_output) { - if (indirect->indirect_draw_count) { - use_predicate = true; - - struct iris_bo draw_count_bo = - iris_resource_bo(indirect->indirect_draw_count); - unsigned draw_count_offset = - indirect->indirect_draw_count_offset; - mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0); + if (drawid_offset == 0) { + mi_predicate = MI_PREDICATE \| MI_PREDICATE_LOADOP_LOADINV \| + MI_PREDICATE_COMBINEOP_SET \| + MI_PREDICATE_COMPAREOP_SRCS_EQUAL; + } else { + /* While draw_index < draw_count the predicate's result will be + * (draw_index == draw_count) ^ TRUE = TRUE + * When draw_index == draw_count the result is + * (TRUE) ^ TRUE = FALSE + * After this all results will be: + * (FALSE) ^ FALSE = FALSE + / + mi_predicate = MI_PREDICATE \| MI_PREDICATE_LOADOP_LOAD \| + MI_PREDICATE_COMBINEOP_XOR \| + MI_PREDICATE_COMPAREOP_SRCS_EQUAL; + } + iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); + } + } + struct iris_bo bo = iris_resource_bo(indirect->buffer); + assert(bo); + + mocs = iris_mocs(bo, &batch->screen->isl_dev, 0); mi_builder_set_mocs(&b, mocs); - if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { - /* comparison = draw id < draw count / - struct mi_value comparison = - mi_ult(&b, mi_imm(drawid_offset), - mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); - - / predicate = comparison & conditional rendering predicate / - mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), - mi_iand(&b, comparison, mi_reg32(CS_GPR(15)))); + mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), + mi_mem32(ro_bo(bo, indirect->offset + 0))); + mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT), + mi_mem32(ro_bo(bo, indirect->offset + 4))); + mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), + mi_mem32(ro_bo(bo, indirect->offset + 8))); + if (draw->index_size) { + mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), + mi_mem32(ro_bo(bo, indirect->offset + 12))); + mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), + mi_mem32(ro_bo(bo, indirect->offset + 16))); } else { - uint32_t mi_predicate; - - / Upload the id of the current primitive to MI_PREDICATE_SRC1. / - mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset)); - / Upload the current draw count from the draw parameters buffer - * to MI_PREDICATE_SRC0. Zero the top 32-bits of - * MI_PREDICATE_SRC0. - / - mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), - mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); + mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), + mi_mem32(ro_bo(bo, indirect->offset + 12))); + mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0)); + } + } else if (indirect->count_from_stream_output) { + struct iris_stream_output_target so = + (void ) indirect->count_from_stream_output; + struct iris_bo so_bo = iris_resource_bo(so->offset.res); - if (drawid_offset == 0) { - mi_predicate = MI_PREDICATE \| MI_PREDICATE_LOADOP_LOADINV \| - MI_PREDICATE_COMBINEOP_SET \| - MI_PREDICATE_COMPAREOP_SRCS_EQUAL; - } else { - /* While draw_index < draw_count the predicate's result will be - * (draw_index == draw_count) ^ TRUE = TRUE - * When draw_index == draw_count the result is - * (TRUE) ^ TRUE = FALSE - * After this all results will be: - * (FALSE) ^ FALSE = FALSE - / - mi_predicate = MI_PREDICATE \| MI_PREDICATE_LOADOP_LOAD \| - MI_PREDICATE_COMBINEOP_XOR \| - MI_PREDICATE_COMPAREOP_SRCS_EQUAL; - } - iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); - } - } - struct iris_bo bo = iris_resource_bo(indirect->buffer); - assert(bo); + mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0); + mi_builder_set_mocs(&b, mocs); - mocs = iris_mocs(bo, &batch->screen->isl_dev, 0); - mi_builder_set_mocs(&b, mocs); + iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ); - mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), - mi_mem32(ro_bo(bo, indirect->offset + 0))); - mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT), - mi_mem32(ro_bo(bo, indirect->offset + 4))); - mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), - mi_mem32(ro_bo(bo, indirect->offset + 8))); - if (draw->index_size) { - mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), - mi_mem32(ro_bo(bo, indirect->offset + 12))); - mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), - mi_mem32(ro_bo(bo, indirect->offset + 16))); - } else { - mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), - mi_mem32(ro_bo(bo, indirect->offset + 12))); + struct iris_address addr = ro_bo(so_bo, so->offset.offset); + struct mi_value offset = + mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset); + mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/lima/lima_program.c ^
@@ -57,7 +57,6 @@ /* could be implemented by clamp */ .lower_fsat = true, .lower_bitops = true, - .lower_rotate = true, .lower_sincos = true, .lower_fceil = true, .lower_insert_byte = true, @@ -78,7 +77,6 @@ .lower_flrp32 = true, .lower_flrp64 = true, .lower_fsign = true, - .lower_rotate = true, .lower_fdot = true, .lower_fdph = true, .lower_insert_byte = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_screen.c ^
@@ -616,7 +616,6 @@ .lower_extract_word = true, .lower_insert_byte = true, .lower_insert_word = true, - .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_mul_2x32_64 = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_texture_handle.c ^
@@ -202,6 +202,7 @@ static void * compile_function(struct llvmpipe_context ctx, struct gallivm_state gallivm, LLVMValueRef function, + bool needs_caching, uint8_t cache_key[SHA1_DIGEST_LENGTH]) { gallivm_verify_function(gallivm, function); @@ -209,7 +210,7 @@ void function_ptr = func_to_pointer(gallivm_jit_function(gallivm, function)); - if (!gallivm->cache->data_size) + if (needs_caching) lp_disk_cache_insert_shader(llvmpipe_screen(ctx->pipe.screen), gallivm->cache, cache_key); gallivm_free_ir(gallivm); @@ -251,10 +252,12 @@ _mesa_sha1_update(&hash_ctx, image_function_base_hash, strlen(image_function_base_hash)); _mesa_sha1_update(&hash_ctx, texture, sizeof(texture)); _mesa_sha1_update(&hash_ctx, &op, sizeof(op)); + _mesa_sha1_update(&hash_ctx, &ms, sizeof(ms)); _mesa_sha1_final(&hash_ctx, cache_key); struct lp_cached_code cached = { 0 }; lp_disk_cache_find_shader(llvmpipe_screen(ctx->pipe.screen), &cached, cache_key); + bool needs_caching = !cached.data_size; struct gallivm_state gallivm = gallivm_create("sample_function", ctx->context, &cached); @@ -333,7 +336,7 @@ free(image_soa); - return compile_function(ctx, gallivm, function, cache_key); + return compile_function(ctx, gallivm, function, needs_caching, cache_key); } static void @@ -407,6 +410,7 @@ struct lp_cached_code cached = { 0 }; lp_disk_cache_find_shader(llvmpipe_screen(ctx->pipe.screen), &cached, cache_key); + bool needs_caching = !cached.data_size; struct gallivm_state gallivm = gallivm_create("sample_function", ctx->context, &cached); @@ -480,7 +484,7 @@ free(sampler_soa); - return compile_function(ctx, gallivm, function, cache_key); + return compile_function(ctx, gallivm, function, needs_caching, cache_key); } static void @@ -496,6 +500,7 @@ struct lp_cached_code cached = { 0 }; lp_disk_cache_find_shader(llvmpipe_screen(ctx->pipe.screen), &cached, cache_key); + bool needs_caching = !cached.data_size; struct gallivm_state *gallivm = gallivm_create("sample_function", ctx->context, &cached); @@ -560,7 +565,7 @@ free(sampler_soa); - return compile_function(ctx, gallivm, function, cache_key); + return compile_function(ctx, gallivm, function, needs_caching, cache_key); } static void
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/nouveau/nv30/nv30_screen.c ^
@@ -477,7 +477,6 @@ .lower_flrp64 = true, .lower_fmod = true, .lower_fpow = true, /* In hardware as of nv40 FS */ - .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, .force_indirect_unrolling = nir_var_all,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_blit.c ^
@@ -92,7 +92,7 @@ /* Legalize here because it could trigger a recursive blit otherwise */ pan_legalize_afbc_format(ctx, pan_resource(info->dst.resource), - info->dst.format, true); + info->dst.format, true, false); panfrost_blitter_save(ctx, info->render_condition_enable ? PAN_RENDER_BLIT_COND
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_cmdstream.c ^
@@ -4224,7 +4224,7 @@ struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); - pan_legalize_afbc_format(ctx, pan_resource(texture), template->format, + pan_legalize_afbc_format(ctx, pan_resource(texture), template->format, false, false); pipe_reference(NULL, &texture->reference);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_context.c ^
@@ -283,7 +283,7 @@ */ if (drm_is_afbc(rsrc->image.layout.modifier)) { pan_resource_modifier_convert( - ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, + ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, true, "Shader image"); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_job.c ^
@@ -68,7 +68,7 @@ { if (surf) { struct panfrost_resource rsrc = pan_resource(surf->texture); - pan_legalize_afbc_format(batch->ctx, rsrc, surf->format, true); + pan_legalize_afbc_format(batch->ctx, rsrc, surf->format, true, false); panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT); } } @@ -493,6 +493,19 @@ fb->rts[i].discard = !reserve && !(batch->resolve & mask); + / Clamp the rendering area to the damage extent. The + * KHR_partial_update spec states that trying to render outside of + * the damage region is "undefined behavior", so we should be safe. + */ + if (!fb->rts[i].discard) { + fb->extent.minx = MAX2(fb->extent.minx, prsrc->damage.extent.minx); + fb->extent.miny = MAX2(fb->extent.miny, prsrc->damage.extent.miny); + fb->extent.maxx = MIN2(fb->extent.maxx, prsrc->damage.extent.maxx - 1); + fb->extent.maxy = MIN2(fb->extent.maxy, prsrc->damage.extent.maxy - 1); + assert(fb->extent.minx <= fb->extent.maxx); + assert(fb->extent.miny <= fb->extent.maxy); + } + rts[i].format = surf->format; rts[i].dim = MALI_TEXTURE_DIMENSION_2D; rts[i].last_level = rts[i].first_level = surf->u.tex.level;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_resource.c ^
@@ -1072,6 +1072,19 @@ box->width, box->height, box->depth); } +static bool +panfrost_can_discard(struct pipe_resource resource, const struct pipe_box box, + unsigned usage) +{ + struct panfrost_resource rsrc = pan_resource(resource); + + return ((usage & PIPE_MAP_DISCARD_RANGE) && + !(usage & PIPE_MAP_UNSYNCHRONIZED) && + !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) && + panfrost_box_covers_resource(resource, box) && + !(rsrc->image.data.bo->flags & PAN_BO_SHARED)); +} + static void panfrost_ptr_map(struct pipe_context pctx, struct pipe_resource resource, unsigned level, @@ -1155,11 +1168,7 @@ /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is * being mapped. / - if ((usage & PIPE_MAP_DISCARD_RANGE) && !(usage & PIPE_MAP_UNSYNCHRONIZED) && - !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) && - panfrost_box_covers_resource(resource, box) && - !(rsrc->image.data.bo->flags & PAN_BO_SHARED)) { - + if (panfrost_can_discard(resource, box, usage)) { usage \|= PIPE_MAP_DISCARD_WHOLE_RESOURCE; } @@ -1298,13 +1307,10 @@ void pan_resource_modifier_convert(struct panfrost_context ctx, struct panfrost_resource rsrc, uint64_t modifier, - const char reason) + bool copy_resource, const char reason) { assert(!rsrc->modifier_constant); - perf_debug_ctx(ctx, "%s AFBC with a blit. Reason: %s", - drm_is_afbc(modifier) ? "Unpacking" : "Disabling", reason); - struct pipe_resource tmp_prsrc = panfrost_resource_create_with_modifier( ctx->base.screen, &rsrc->base, modifier); struct panfrost_resource tmp_rsrc = pan_resource(tmp_prsrc); @@ -1316,31 +1322,33 @@ struct pipe_box box = {0, 0, 0, rsrc->base.width0, rsrc->base.height0, depth}; - struct pipe_blit_info blit = { - .dst.resource = &tmp_rsrc->base, - .dst.format = tmp_rsrc->base.format, - .dst.box = box, - .src.resource = &rsrc->base, - .src.format = rsrc->base.format, - .src.box = box, - .mask = util_format_get_mask(tmp_rsrc->base.format), - .filter = PIPE_TEX_FILTER_NEAREST, - }; - - for (int i = 0; i <= rsrc->base.last_level; i++) { - if (BITSET_TEST(rsrc->valid.data, i)) { - blit.dst.level = blit.src.level = i; - panfrost_blit(&ctx->base, &blit); + if (copy_resource) { + struct pipe_blit_info blit = { + .dst.resource = &tmp_rsrc->base, + .dst.format = tmp_rsrc->base.format, + .dst.box = box, + .src.resource = &rsrc->base, + .src.format = rsrc->base.format, + .src.box = box, + .mask = util_format_get_mask(tmp_rsrc->base.format), + .filter = PIPE_TEX_FILTER_NEAREST, + }; + + for (int i = 0; i <= rsrc->base.last_level; i++) { + if (BITSET_TEST(rsrc->valid.data, i)) { + blit.dst.level = blit.src.level = i; + panfrost_blit(&ctx->base, &blit); + } } - } - panfrost_bo_unreference(rsrc->image.data.bo); + panfrost_bo_unreference(rsrc->image.data.bo); + } rsrc->image.data.bo = tmp_rsrc->image.data.bo; panfrost_bo_reference(rsrc->image.data.bo); panfrost_resource_setup(pan_device(ctx->base.screen), rsrc, modifier, - blit.dst.format); + tmp_rsrc->base.format); / panfrost_resource_setup will force the modifier to stay constant when * called with a specific modifier. We don't want that here, we want to * be able to convert back to another modifier if needed / @@ -1355,7 +1363,7 @@ void pan_legalize_afbc_format(struct panfrost_context ctx, struct panfrost_resource rsrc, - enum pipe_format format, bool write) + enum pipe_format format, bool write, bool discard) { struct panfrost_device dev = pan_device(ctx->base.screen); @@ -1365,7 +1373,7 @@ if (panfrost_afbc_format(dev->arch, rsrc->base.format) != panfrost_afbc_format(dev->arch, format)) { pan_resource_modifier_convert( - ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, + ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, !discard, "Reinterpreting AFBC surface as incompatible format"); return; } @@ -1373,7 +1381,7 @@ if (write && (rsrc->image.layout.modifier & AFBC_FORMAT_MOD_SPARSE) == 0) pan_resource_modifier_convert( ctx, rsrc, rsrc->image.layout.modifier \| AFBC_FORMAT_MOD_SPARSE, - "Legalizing resource to allow writing"); + !discard, "Legalizing resource to allow writing"); } static bool @@ -1580,6 +1588,10 @@ pan_resource(trans->staging.rsrc)->image.data.bo; panfrost_bo_reference(prsrc->image.data.bo); } else { + bool discard = panfrost_can_discard(&prsrc->base, &transfer->box, + transfer->usage); + pan_legalize_afbc_format(ctx, prsrc, prsrc->image.layout.format, + true, discard); pan_blit_from_staging(pctx, trans); panfrost_flush_batches_accessing_rsrc( ctx, pan_resource(trans->staging.rsrc),
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_resource.h ^
@@ -189,11 +189,13 @@ void pan_resource_modifier_convert(struct panfrost_context ctx, struct panfrost_resource rsrc, - uint64_t modifier, const char reason); + uint64_t modifier, bool copy_resource, + const char reason); void pan_legalize_afbc_format(struct panfrost_context ctx, struct panfrost_resource rsrc, - enum pipe_format format, bool write); + enum pipe_format format, bool write, + bool discard); void pan_dump_resource(struct panfrost_context ctx, struct panfrost_resource rsc);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_fs.c ^
@@ -65,11 +65,13 @@ case TGSI_SEMANTIC_TEXCOORD: assert(index < ATTR_TEXCOORD_COUNT); fs_inputs->texcoord[index] = i; + fs_inputs->num_texcoord++; break; case TGSI_SEMANTIC_GENERIC: assert(index < ATTR_GENERIC_COUNT); fs_inputs->generic[index] = i; + fs_inputs->num_generic++; break; case TGSI_SEMANTIC_FOG:
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_screen.c ^
@@ -503,7 +503,6 @@ .lower_ftrunc = true, \ .lower_insert_byte = true, \ .lower_insert_word = true, \ - .lower_rotate = true, \ .lower_uniforms_to_ubo = true, \ .lower_vector_cmp = true, \ .no_integers = true, \
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_state_derived.c ^
@@ -541,6 +541,14 @@ } } + for (; i < ATTR_GENERIC_COUNT; i++) { + if (fs_inputs->generic[i] != ATTR_UNUSED) { + fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, " + "not enough hardware slots (it's not a bug, do not " + "report it).\n", i); + } + } + gen_offset = 0; /* Re-use color varyings for texcoords if possible. * @@ -645,6 +653,14 @@ } } + for (; i < ATTR_TEXCOORD_COUNT; i++) { + if (fs_inputs->texcoord[i] != ATTR_UNUSED) { + fprintf(stderr, "r300: ERROR: FS input texcoord %i unassigned, " + "not enough hardware slots (it's not a bug, do not " + "report it).\n", i); + } + } + /* Rasterize pointcoord. / if (fs_inputs->pcoord != ATTR_UNUSED && tex_count < 8) { @@ -666,14 +682,6 @@ tex_ptr += 2; } - for (; i < ATTR_GENERIC_COUNT; i++) { - if (fs_inputs->generic[i] != ATTR_UNUSED) { - fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, " - "not enough hardware slots (it's not a bug, do not " - "report it).\n", i); - } - } - / Rasterize fog coordinates. / if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) { / Set up the fog coordinates in VAP. */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r600/r600_pipe_common.c ^
@@ -1394,7 +1394,6 @@ .lower_insert_byte = true, .lower_insert_word = true, .lower_ldexp = true, - .lower_rotate = true, /* due to a bug in the shader compiler, some loops hang * if they are not unrolled, see: * https://bugs.freedesktop.org/show_bug.cgi?id=86720 @@ -1453,7 +1452,8 @@ nir_lower_dceil \| nir_lower_dmod \| nir_lower_dsub \| - nir_lower_dtrunc; + nir_lower_dtrunc \| + nir_lower_dround_even; } rscreen->nir_options_fs = rscreen->nir_options;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp ^
@@ -52,7 +52,9 @@ for (int i = 0; i < 3; ++i) { m_local_invocation_id[i] = vf.allocate_pinned_register(thread_id_sel, i); + m_local_invocation_id[i]->set_flag(Register::pin_end); m_workgroup_id[i] = vf.allocate_pinned_register(wg_id_sel, i); + m_workgroup_id[i]->set_flag(Register::pin_end); } return 2; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c ^
@@ -918,7 +918,8 @@ radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5); /* ref_pic_list_modification() / - if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) { + if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR && + enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) { radeon_enc_code_fixed_bits(enc, 0x0, 1); / long-term reference */ @@ -960,6 +961,7 @@ } if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) && + (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) && (enc->enc_pic.spec_misc.cabac_enable)) radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/radeon_vcn_enc_3_0.c ^
@@ -292,7 +292,8 @@ radeon_enc_code_fixed_bits(enc, 0x1, 1); /* direct_spatial_mv_pred_flag / / ref_pic_list_modification() / - if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) { + if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR && + enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) { radeon_enc_code_fixed_bits(enc, 0x0, 1); / long-term reference */ @@ -338,6 +339,7 @@ } if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) && + (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) && (enc->enc_pic.spec_misc.cabac_enable)) radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_descriptors.c ^
@@ -382,17 +382,33 @@ state[3] \|= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode); state[4] \|= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch); } else { + state[3] \|= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode); + + uint32_t hw_format = G_008F14_DATA_FORMAT(state[1]); uint16_t epitch = tex->surface.u.gfx9.epitch; - if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM && - block_width == 1) { - /* epitch is patched in ac_surface for sdma/vcn blocks to get - * a value expressed in elements unit. - * But here the texture is used with block_width == 1 so we - * need epitch in pixel units. - / - epitch = (epitch + 1) / tex->surface.blk_w - 1; + + / epitch is surf_pitch - 1 and are in elements unit. + * For some reason I don't understand, when a packed YUV format + * like UYUV is used, we have to double epitch (making it a pixel + * pitch instead of an element pitch). Note that it's only done + * when sampling the texture using its native format; we don't + * need to do this when sampling it as UINT32 (as done by + * SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT). + * This looks broken, so it's possible that surf_pitch / epitch + * are computed incorrectly, but that's the only way I found + * to get these use cases to work properly: + * - yuyv dmabuf import (#6131) + * - jpeg vaapi decode + * - yuyv texture sampling (!26947) + * - jpeg vaapi get image (#10375) + / + if ((tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM \|\| + tex->buffer.b.b.format == PIPE_FORMAT_G8R8_B8R8_UNORM) && + (hw_format == V_008F14_IMG_DATA_FORMAT_GB_GR \|\| + hw_format == V_008F14_IMG_DATA_FORMAT_BG_RG)) { + epitch = (epitch + 1) 2 - 1; } - state[3] \|= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode); + state[4] \|= S_008F20_PITCH(epitch); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_get.c ^
@@ -749,7 +749,7 @@ else return 0; case PIPE_VIDEO_CAP_EFC_SUPPORTED: - return ((sscreen->info.family >= CHIP_RENOIR) && + return ((sscreen->info.family > CHIP_RENOIR) && !(sscreen->debug_flags & DBG(NO_EFC))); case PIPE_VIDEO_CAP_ENC_MAX_REFERENCES_PER_FRAME: @@ -1323,7 +1323,6 @@ .lower_hadd = true, .lower_hadd64 = true, .lower_fisnormal = true, - .lower_rotate = true, .lower_to_scalar = true, .lower_to_scalar_filter = sscreen->info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pipe.c ^
@@ -967,6 +967,9 @@ si_resource_reference(&sscreen->attribute_ring, NULL); + util_queue_destroy(&sscreen->shader_compiler_queue); + util_queue_destroy(&sscreen->shader_compiler_queue_low_priority); + for (unsigned i = 0; i < ARRAY_SIZE(sscreen->aux_contexts); i++) { if (!sscreen->aux_contexts[i].ctx) continue; @@ -989,9 +992,6 @@ sscreen->async_compute_context->destroy(sscreen->async_compute_context); } - util_queue_destroy(&sscreen->shader_compiler_queue); - util_queue_destroy(&sscreen->shader_compiler_queue_low_priority); - /* Release the reference on glsl types of the compiler threads. */ glsl_type_singleton_decref();
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pm4.c ^
@@ -135,7 +135,7 @@ if (strstr(ac_get_register_name(state->screen->info.gfx_level, state->screen->info.family, reg_offset), "SPI_SHADER_PGM_LO_")) { - state->reg_va_low_idx = get_packed_reg_valueN_idx(state, i); + state->spi_shader_pgm_lo_reg = reg_offset; break; } } @@ -162,7 +162,8 @@ if (strstr(ac_get_register_name(state->screen->info.gfx_level, state->screen->info.family, reg_base_offset + i * 4), "SPI_SHADER_PGM_LO_")) { - state->reg_va_low_idx = state->last_pm4 + 2 + i; + state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4; + break; } }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pm4.h ^
@@ -45,7 +45,7 @@ uint16_t max_dw; /* Used by SQTT to override the shader address / - uint16_t reg_va_low_idx; + uint32_t spi_shader_pgm_lo_reg; / This must be the last field because the array can continue after the structure. */ uint32_t pm4[64];
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_sqtt.c ^
@@ -677,12 +677,14 @@ list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record, list) { list_del(&record->list); + pso_correlation->record_count--; free(record); } list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list) { list_del(&record->list); + loader_events->record_count--; free(record); } @@ -698,6 +700,7 @@ } list_del(&record->list); free(record); + code_object->record_count--; } ac_sqtt_finish(sctx->sqtt); @@ -1028,7 +1031,7 @@ struct rgp_code_object code_object = &sctx->sqtt->rgp_code_object; struct rgp_code_object_record record; - record = malloc(sizeof(struct rgp_code_object_record)); + record = calloc(1, sizeof(struct rgp_code_object_record)); if (!record) return false;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state.c ^
@@ -5441,6 +5441,7 @@ sctx->atoms.s.pm4_states[SI_STATE_IDX(rasterizer)].emit = si_pm4_emit_state; sctx->atoms.s.pm4_states[SI_STATE_IDX(dsa)].emit = si_pm4_emit_state; sctx->atoms.s.pm4_states[SI_STATE_IDX(poly_offset)].emit = si_pm4_emit_state; + sctx->atoms.s.pm4_states[SI_STATE_IDX(sqtt_pipeline)].emit = si_pm4_emit_state; sctx->atoms.s.pm4_states[SI_STATE_IDX(ls)].emit = si_pm4_emit_shader; sctx->atoms.s.pm4_states[SI_STATE_IDX(hs)].emit = si_pm4_emit_shader; sctx->atoms.s.pm4_states[SI_STATE_IDX(es)].emit = si_pm4_emit_shader;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_draw.cpp ^
@@ -370,9 +370,8 @@ struct si_pm4_state pm4 = &shader->pm4; - uint32_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8; - assert(PKT3_IT_OPCODE_G(pm4->pm4[pm4->reg_va_low_idx - 2]) == PKT3_SET_SH_REG); - uint32_t reg = (pm4->pm4[pm4->reg_va_low_idx - 1] << 2) + SI_SH_REG_OFFSET; + uint64_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8; + uint32_t reg = pm4->spi_shader_pgm_lo_reg; si_pm4_set_reg(&pipeline->pm4, reg, va_low); } } @@ -883,8 +882,10 @@ sctx->family == CHIP_HAWAII && G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) && num_instanced_prims_less_than<IS_DRAW_VERTEX_STATE>(indirect, prim, min_vertex_count, instance_count, 2, sctx->patch_vertices)) { - sctx->flags \|= SI_CONTEXT_VGT_FLUSH; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + / The cache flushes should have been emitted already. / + assert(sctx->flags == 0); + sctx->flags = SI_CONTEXT_VGT_FLUSH; + si_emit_cache_flush_direct(sctx); } } @@ -2238,12 +2239,13 @@ / Emit states. / si_emit_rasterizer_prim_state<GFX_VERSION, HAS_GS, NGG>(sctx); - / This must be done before si_emit_all_states because it can set cache flush flags. / + / This emits states and flushes caches. / + si_emit_all_states(sctx, masked_atoms); + / This can be done after si_emit_all_states because it doesn't set cache flush flags. / si_emit_draw_registers<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE> (sctx, indirect, prim, index_size, instance_count, primitive_restart, info->restart_index, min_direct_count); - / This emits states and flushes caches. / - si_emit_all_states(sctx, masked_atoms); + / <-- CUs are idle here if the cache_flush state waited. / / This must be done after si_emit_all_states, which can affect this. */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp ^
@@ -1983,7 +1983,7 @@ assert(0); } - assert(!(sscreen->debug_flags & DBG(SQTT)) \|\| shader->pm4.reg_va_low_idx != 0); + assert(!(sscreen->debug_flags & DBG(SQTT)) \|\| shader->pm4.spi_shader_pgm_lo_reg != 0); } static void si_clear_vs_key_inputs(struct si_context sctx, union si_shader_key key,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c ^
@@ -33,8 +33,14 @@ line_width = roundf(line_width); line_width = MAX2(line_width, 1); - info.clip_half_line_width[0] = line_width * 0.5 / fabs(info.scale[0]); - info.clip_half_line_width[1] = line_width * 0.5 / fabs(info.scale[1]); + float half_line_width = line_width * 0.5; + if (info.scale[0] == 0 \|\| info.scale[1] == 0) { + info.clip_half_line_width[0] = 0; + info.clip_half_line_width[1] = 0; + } else { + info.clip_half_line_width[0] = half_line_width / fabs(info.scale[0]); + info.clip_half_line_width[1] = half_line_width / fabs(info.scale[1]); + } /* If the Y axis is inverted (OpenGL default framebuffer), reverse it. * This is because the viewport transformation inverts the clip space
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/sp_screen.c ^
@@ -85,7 +85,6 @@ .lower_fdph = true, .lower_flrp64 = true, .lower_fmod = true, - .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, .lower_int64_options = nir_lower_imul_2x32_64,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/svga/svga_screen.c ^
@@ -737,7 +737,6 @@ .lower_fdph = true, \ .lower_flrp64 = true, \ .lower_ldexp = true, \ - .lower_rotate = true, \ .lower_uniforms_to_ubo = true, \ .lower_vector_cmp = true, \ .lower_cs_local_index_to_id = true, \
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/v3d/v3d_screen.c ^
@@ -729,7 +729,6 @@ .lower_ldexp = true, .lower_mul_high = true, .lower_wpos_pntc = true, - .lower_rotate = true, .lower_to_scalar = true, .lower_int64_options = nir_lower_imul_2x32_64, .lower_fquantize2f16 = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/vc4/vc4_program.c ^
@@ -2174,7 +2174,6 @@ .lower_ldexp = true, .lower_fneg = true, .lower_ineg = true, - .lower_rotate = true, .lower_to_scalar = true, .lower_umax = true, .lower_umin = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/virgl/virgl_screen.c ^
@@ -97,6 +97,8 @@ return vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_MIRROR_CLAMP_TO_EDGE; FALLTHROUGH; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + if (vscreen->caps.caps.v2.host_feature_check_version >= 22) + return vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_MIRROR_CLAMP; return vscreen->caps.caps.v1.bset.mirror_clamp && !(vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_HOST_IS_GLES); case PIPE_CAP_TEXTURE_SWIZZLE:
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json ^
@@ -1,5 +1,5 @@ { - "$schema": "https://schema.khronos.org/vulkan/profiles-0.8.1-251.json", + "$schema": "https://schema.khronos.org/vulkan/profiles-0.8.2-271.json", "capabilities": { "vulkan10requirements": { "features": { @@ -164,7 +164,7 @@ "VkPhysicalDeviceMaintenance4Features": { "maintenance4": true }, - "VkPhysicalDeviceMaintenance5Features": { + "VkPhysicalDeviceMaintenance5FeaturesKHR": { "maintenance5": true } },
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_batch.c ^
@@ -414,6 +414,18 @@ if (bs == ctx->last_free_batch_state) ctx->last_free_batch_state = NULL; } + /* try from the ones that are given back to the screen next / + if (!bs) { + simple_mtx_lock(&screen->free_batch_states_lock); + if (screen->free_batch_states) { + bs = screen->free_batch_states; + bs->ctx = ctx; + screen->free_batch_states = bs->next; + if (bs == screen->last_free_batch_state) + screen->last_free_batch_state = NULL; + } + simple_mtx_unlock(&screen->free_batch_states_lock); + } if (!bs && ctx->batch_states) { / states are stored sequentially, so if the first one doesn't work, none of them will */ if (zink_screen_check_last_finished(screen, ctx->batch_states->fence.batch_id) \|\|
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_compiler.c ^
@@ -1227,7 +1227,6 @@ .lower_ldexp = true, .lower_mul_high = true, - .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_uadd_sat = true, @@ -4912,7 +4911,7 @@ if (var->data.location == VARYING_SLOT_VAR0) var->data.driver_location = 0; else if (var->data.patch) - var->data.driver_location = var->data.location - VARYING_SLOT_VAR0; + var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; else var->data.driver_location = var->data.location; } @@ -4939,7 +4938,7 @@ size += glsl_count_vec4_slots(var->type, false, false); } if (var->data.patch) - var->data.driver_location = var->data.location - VARYING_SLOT_VAR0; + var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; else var->data.driver_location = slot; found = true;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_context.c ^
@@ -124,7 +124,9 @@ if (util_queue_is_initialized(&screen->flush_queue)) util_queue_finish(&screen->flush_queue); if (ctx->batch.state && !screen->device_lost) { + simple_mtx_lock(&screen->queue_lock); VkResult result = VKSCR(QueueWaitIdle)(screen->queue); + simple_mtx_unlock(&screen->queue_lock); if (result != VK_SUCCESS) mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(result)); @@ -159,16 +161,42 @@ while (bs) { struct zink_batch_state bs_next = bs->next; zink_clear_batch_state(ctx, bs); - zink_batch_state_destroy(screen, bs); + / restore link as we insert them into the screens free_batch_states + * list below + / + bs->next = bs_next; bs = bs_next; } bs = ctx->free_batch_states; while (bs) { struct zink_batch_state bs_next = bs->next; zink_clear_batch_state(ctx, bs); - zink_batch_state_destroy(screen, bs); + bs->ctx = NULL; + /* restore link as we insert them into the screens free_batch_states + * list below + / + bs->next = bs_next; bs = bs_next; } + simple_mtx_lock(&screen->free_batch_states_lock); + if (ctx->batch_states) { + if (screen->free_batch_states) + screen->last_free_batch_state->next = ctx->batch_states; + else { + screen->free_batch_states = ctx->batch_states; + screen->last_free_batch_state = screen->free_batch_states; + } + while (screen->last_free_batch_state->next) + screen->last_free_batch_state = screen->last_free_batch_state->next; + } + if (ctx->free_batch_states) { + if (screen->free_batch_states) + screen->last_free_batch_state->next = ctx->free_batch_states; + else + screen->free_batch_states = ctx->free_batch_states; + screen->last_free_batch_state = ctx->last_free_batch_state; + } + simple_mtx_unlock(&screen->free_batch_states_lock); if (ctx->batch.state) { zink_clear_batch_state(ctx, ctx->batch.state); zink_batch_state_destroy(screen, ctx->batch.state); @@ -707,7 +735,7 @@ if (res->obj->is_buffer) { if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { ctx->di.db.tbos[shader][slot].address = res->obj->bda + ctx->sampler_views[shader][slot]->u.buf.offset; - ctx->di.db.tbos[shader][slot].range = ctx->sampler_views[shader][slot]->u.buf.size; + ctx->di.db.tbos[shader][slot].range = zink_sampler_view(ctx->sampler_views[shader][slot])->tbo_size; ctx->di.db.tbos[shader][slot].format = zink_get_format(screen, ctx->sampler_views[shader][slot]->format); } else { struct zink_buffer_view bv = get_bufferview_for_binding(ctx, shader, type, slot); @@ -1197,8 +1225,12 @@ } err = !sampler_view->image_view; } else { - if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) + if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) { + /* always enforce limit clamping / + unsigned blocksize = util_format_get_blocksize(state->format); + sampler_view->tbo_size = MIN2(state->u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) blocksize; return &sampler_view->base; + } VkBufferViewCreateInfo bvci = create_bvci(ctx, res, state->format, state->u.buf.offset, state->u.buf.size); sampler_view->buffer_view = get_buffer_view(ctx, res, &bvci); err = !sampler_view->buffer_view; @@ -1236,9 +1268,10 @@ struct pipe_sampler_view pview) { struct zink_sampler_view view = zink_sampler_view(pview); - if (pview->texture->target == PIPE_BUFFER) - zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL); - else { + if (pview->texture->target == PIPE_BUFFER) { + if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB) + zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL); + } else { zink_surface_reference(zink_screen(pctx->screen), &view->image_view, NULL); zink_surface_reference(zink_screen(pctx->screen), &view->cube_array, NULL); zink_surface_reference(zink_screen(pctx->screen), &view->zs_view, NULL); @@ -1919,6 +1952,11 @@ zink_resource_access_is_write(access), false); } memcpy(&a->base, images + i, sizeof(struct pipe_image_view)); + if (b->resource->target == PIPE_BUFFER) { + /* always enforce limit clamping / + unsigned blocksize = util_format_get_blocksize(a->base.format); + a->base.u.buf.size = MIN2(a->base.u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) blocksize; + } update = true; res->image_binds[shader_type] \|= BITFIELD_BIT(start_slot + i); } else if (a->base.resource) { @@ -3812,7 +3850,8 @@ } } - if (!batch->has_work) { + /* TODO: if swapchains gain timeline semaphore semantics, `flags` can be eliminated and no-op fence can return timeline id / + if (!batch->has_work && flags) { if (pfence) { / reuse last fence / fence = ctx->last_fence; @@ -4322,7 +4361,7 @@ } rebind_mask &= ~BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER); } - if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask) + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; if ((rebind_mask & BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER)) \|\| (!rebind_mask && res->vbo_bind_mask)) { @@ -4337,7 +4376,7 @@ rebind_mask &= ~BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER); ctx->vertex_buffers_dirty = true; } - if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask) + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const uint32_t ubo_mask = rebind_mask ? @@ -4353,7 +4392,7 @@ } } rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_UBO_VS, MESA_SHADER_STAGES); - if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask) + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const unsigned ssbo_mask = rebind_mask ? @@ -4370,7 +4409,7 @@ } } rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES); - if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask) + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const unsigned sampler_mask = rebind_mask ? rebind_mask & BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES) : @@ -4385,7 +4424,7 @@ } } rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES); - if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask) + if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask) goto end; const unsigned image_mask = rebind_mask ? @@ -4894,6 +4933,11 @@ zink_resource_copies_reset(d); / force counter buffer reset / d->so_valid = false; + / FIXME: tc buffer sharedness tracking */ + if (!num_rebinds) { + num_rebinds = d->bind_count[0] + d->bind_count[1]; + rebind_mask = 0; + } if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) < num_rebinds) ctx->buffer_rebind_counter = p_atomic_inc_return(&screen->buffer_rebind_counter); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_descriptors.c ^
@@ -1501,7 +1501,7 @@ } if (bs->dd.db_xfer) - pipe_buffer_unmap(&bs->ctx->base, bs->dd.db_xfer); + zink_screen_buffer_unmap(&screen->base, bs->dd.db_xfer); bs->dd.db_xfer = NULL; if (bs->dd.db) screen->base.resource_destroy(&screen->base, &bs->dd.db->base.b); @@ -1593,7 +1593,7 @@ if (!pres) return false; bs->dd.db = zink_resource(pres); - bs->dd.db_map = pipe_buffer_map(&bs->ctx->base, pres, PIPE_MAP_READ \| PIPE_MAP_WRITE \| PIPE_MAP_PERSISTENT \| PIPE_MAP_THREAD_SAFE, &bs->dd.db_xfer); + bs->dd.db_map = pipe_buffer_map(&bs->ctx->base, pres, PIPE_MAP_READ \| PIPE_MAP_WRITE \| PIPE_MAP_PERSISTENT \| PIPE_MAP_COHERENT \| PIPE_MAP_THREAD_SAFE, &bs->dd.db_xfer); } return true; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_draw.cpp ^
@@ -435,6 +435,8 @@ /* always rebind all stages */ VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects); VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE); + VKCTX(CmdSetTessellationDomainOriginEXT)(bs->cmdbuf, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT); + VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled); } ctx->shobj_draw = true; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_kopper.c ^
@@ -318,7 +318,9 @@ if (error == VK_ERROR_NATIVE_WINDOW_IN_USE_KHR) { if (util_queue_is_initialized(&screen->flush_queue)) util_queue_finish(&screen->flush_queue); + simple_mtx_lock(&screen->queue_lock); VkResult result = VKSCR(QueueWaitIdle)(screen->queue); + simple_mtx_unlock(&screen->queue_lock); if (result != VK_SUCCESS) mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(result)); error = VKSCR(CreateSwapchainKHR)(screen->dev, &cswap->scci, NULL,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_resource.c ^
@@ -359,6 +359,7 @@ mod_info.drmFormatModifier = modifier; mod_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; mod_info.queueFamilyIndexCount = 0; + mod_info.pQueueFamilyIndices = NULL; info.pNext = &mod_info; } @@ -697,6 +698,7 @@ ici->flags \|= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; ici->usage = 0; ici->queueFamilyIndexCount = 0; + ici->pQueueFamilyIndices = NULL; /* assume we're going to be doing some CompressedTexSubImage / if (util_format_is_compressed(templ->format) && (ici->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) && @@ -2826,6 +2828,16 @@ unmap_resource(screen, res); } +void +zink_screen_buffer_unmap(struct pipe_screen pscreen, struct pipe_transfer ptrans) +{ + struct zink_screen screen = zink_screen(pscreen); + struct zink_transfer trans = (struct zink_transfer )ptrans; + if (trans->base.b.usage & PIPE_MAP_ONCE && !trans->staging_res) + do_transfer_unmap(screen, trans); + transfer_unmap(NULL, ptrans); +} + static void zink_buffer_unmap(struct pipe_context pctx, struct pipe_transfer ptrans) {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_resource.h ^
@@ -44,7 +44,8 @@ void zink_context_resource_init(struct pipe_context pctx); - +void +zink_screen_buffer_unmap(struct pipe_screen pscreen, struct pipe_transfer ptrans); void zink_get_depth_stencil_resources(struct pipe_resource res, struct zink_resource **out_z,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_screen.c ^
@@ -55,8 +55,13 @@ #include <xf86drm.h> #include <fcntl.h> #include <sys/stat.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS #include <sys/sysmacros.h> #endif +#endif static int num_screens = 0; bool zink_tracing = false; @@ -1456,6 +1461,12 @@ zink_destroy_screen(struct pipe_screen pscreen) { struct zink_screen screen = zink_screen(pscreen); + struct zink_batch_state bs = screen->free_batch_states; + while (bs) { + struct zink_batch_state bs_next = bs->next; + zink_batch_state_destroy(screen, bs); + bs = bs_next; + } #ifdef HAVE_RENDERDOC_APP_H if (screen->renderdoc_capture_all && p_atomic_dec_zero(&num_screens)) @@ -2444,8 +2455,12 @@ { struct zink_screen screen = zink_screen(pscreen); count = screen->modifier_props[format].drmFormatModifierCount; - for (int i = 0; i < MIN2(max, count); i++) + for (int i = 0; i < MIN2(max, count); i++) { + if (external_only) + external_only[i] = 0; + modifiers[i] = screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier; + } } static bool @@ -3241,10 +3256,10 @@ { uint64_t biggest_vis_vram = 0; for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL_VISIBLE]; i++) - biggest_vis_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[i].heapIndex].size); + biggest_vis_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL_VISIBLE][i]].heapIndex].size); uint64_t biggest_vram = 0; for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL]; i++) - biggest_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[i].heapIndex].size); + biggest_vram = MAX2(biggest_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL][i]].heapIndex].size); /* determine if vis vram is roughly equal to total vram / if (biggest_vis_vram > biggest_vram 0.9) screen->resizable_bar = true; @@ -3491,6 +3506,7 @@ screen->base_descriptor_size = MAX4(screen->db_size[0], screen->db_size[1], screen->db_size[2], screen->db_size[3]); } + simple_mtx_init(&screen->free_batch_states_lock, mtx_plain); simple_mtx_init(&screen->dt_lock, mtx_plain); util_idalloc_mt_init_tc(&screen->buffer_ids);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_types.h ^
@@ -1401,6 +1401,10 @@ simple_mtx_t copy_context_lock; struct zink_context copy_context; + struct zink_batch_state free_batch_states; //unused batch states + struct zink_batch_state last_free_batch_state; //for appending + simple_mtx_t free_batch_states_lock; + simple_mtx_t semaphores_lock; struct util_dynarray semaphores; struct util_dynarray fd_semaphores; @@ -1664,6 +1668,7 @@ union { struct zink_surface image_view; struct zink_buffer_view buffer_view; + unsigned tbo_size; }; struct zink_surface cube_array; /* Optional sampler view returning red (depth) in all channels, for shader rewrites. */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/d3d10umd/Draw.cpp ^
@@ -59,9 +59,12 @@ if (!pDevice->velems_changed) return; - for (unsigned i = 0; i < pDevice->velems.count; i++) - pDevice->element_layout->velems.velems[i].src_stride = pDevice->vertex_strides[pDevice->element_layout->velems.velems[i].vertex_buffer_index]; - cso_set_vertex_elements(pDevice->cso, &pDevice->element_layout->velems); + if(pDevice->element_layout) { + struct cso_velems_state *state = &pDevice->element_layout->state; + for (unsigned i = 0; i < state->count; i++) + state->velems[i].src_stride = pDevice->vertex_strides[state->velems[i].vertex_buffer_index]; + cso_set_vertex_elements(pDevice->cso, state); + } pDevice->velems_changed = false; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/d3d10umd/InputAssembly.cpp ^
@@ -126,7 +126,6 @@ LOG_ENTRYPOINT(); Device pDevice = CastDevice(hDevice); - struct pipe_context pipe = pDevice->pipe; unsigned i; for (i = 0; i < NumBuffers; i++) { @@ -169,7 +168,7 @@ /* XXX this is odd... / if (!vb->is_user_buffer && !vb->buffer.resource) { - pDevice->vertex_strides[i]->stride = 0; + pDevice->vertex_strides[i] = 0; vb->buffer_offset = 0; vb->is_user_buffer = true; vb->buffer.user = dummy; @@ -271,9 +270,7 @@ LOG_ENTRYPOINT(); ElementLayout pElementLayout = CastElementLayout(hElementLayout); - - struct cso_velems_state elements; - memset(elements, 0, sizeof elements); + memset(pElementLayout, 0, sizeof pElementLayout); unsigned num_elements = pCreateElementLayout->NumElements; unsigned max_elements = 0; @@ -281,7 +278,7 @@ const D3D10DDIARG_INPUT_ELEMENT_DESC pVertexElement = &pCreateElementLayout->pVertexElements[i]; struct pipe_vertex_element ve = - &elements.velems[pVertexElement->InputRegister]; + &pElementLayout->state.velems[pVertexElement->InputRegister]; ve->src_offset = pVertexElement->AlignedByteOffset; ve->vertex_buffer_index = pVertexElement->InputSlot; @@ -312,8 +309,7 @@ DebugPrintf("%s: gap\n", __func__); } - elements.count = max_elements; - pElementLayout->velems = mem_dup(elements, sizeof(elements)); + pElementLayout->state.count = max_elements; } @@ -335,10 +331,6 @@ { LOG_ENTRYPOINT(); - struct pipe_context pipe = CastPipeContext(hDevice); - ElementLayout *pElementLayout = CastElementLayout(hElementLayout); - - free(pElementLayout->velems); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/d3d10umd/State.h ^
@@ -62,6 +62,7 @@ }; struct Query; +struct ElementLayout; struct Device { @@ -105,6 +106,7 @@ Query pPredicate; BOOL PredicateValue; + ElementLayout element_layout; BOOL velems_changed; }; @@ -326,7 +328,7 @@ struct ElementLayout { - struct cso_velems_state velems; + struct cso_velems_state state; }; @@ -336,14 +338,6 @@ return static_cast<ElementLayout >(hElementLayout.pDrvPrivate); } -static inline void * -CastPipeInputLayout(D3D10DDI_HELEMENTLAYOUT hElementLayout) -{ - ElementLayout pElementLayout = CastElementLayout(hElementLayout); - return pElementLayout ? pElementLayout->handle : NULL; -} - - struct SamplerState { void handle;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/dri/dri_context.c ^
@@ -169,7 +169,7 @@ if (debug_get_bool_option("MESA_NO_ERROR", false) \|\| driQueryOptionb(&screen->dev->option_cache, "mesa_no_error")) #if !defined(_WIN32) - if (geteuid() == getuid()) + if (__normal_user()) #endif attribs.flags \|= ST_CONTEXT_FLAG_NO_ERROR;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/dri/dri_util.c ^
@@ -326,7 +326,7 @@ * for the X server's sake, and EGL will expect us to handle it because * it iterates all __DRI_ATTRIBs. / - value = __DRI_ATTRIB_SWAP_EXCHANGE; + value = __DRI_ATTRIB_SWAP_UNDEFINED; break; case __DRI_ATTRIB_MAX_SWAP_INTERVAL: value = INT_MAX;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_device.c ^
@@ -2509,6 +2509,8 @@ } } + size *= pInfo->maxSequencesCount; + pMemoryRequirements->memoryRequirements.memoryTypeBits = 1; pMemoryRequirements->memoryRequirements.alignment = 4; pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_execute.c ^
@@ -501,6 +501,7 @@ if (state->vb_strides_dirty) { for (unsigned i = 0; i < state->velem.count; i++) state->velem.velems[i].src_stride = state->vb_strides[state->velem.velems[i].vertex_buffer_index]; + state->ve_dirty = true; state->vb_strides_dirty = false; } @@ -3857,7 +3858,7 @@ } case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: { VkBindVertexBufferIndirectCommandNV data = input; - cmd_size += sizeof(cmd->u.bind_vertex_buffers.buffers) + sizeof(cmd->u.bind_vertex_buffers.offsets); + cmd_size += sizeof(cmd->u.bind_vertex_buffers2.buffers) + sizeof(cmd->u.bind_vertex_buffers2.offsets); cmd_size += sizeof(cmd->u.bind_vertex_buffers2.sizes) + sizeof(cmd->u.bind_vertex_buffers2.strides); if (max_size < size + cmd_size) abort(); @@ -3866,12 +3867,20 @@ cmd->u.bind_vertex_buffers2.binding_count = 1; cmd->u.bind_vertex_buffers2.buffers = (void)cmdptr; - cmd->u.bind_vertex_buffers2.offsets = (void)(cmdptr + sizeof(cmd->u.bind_vertex_buffers2.buffers)); + uint32_t alloc_offset = sizeof(cmd->u.bind_vertex_buffers2.buffers); + + cmd->u.bind_vertex_buffers2.offsets = (void)(cmdptr + alloc_offset); + alloc_offset += sizeof(cmd->u.bind_vertex_buffers2.offsets); + + cmd->u.bind_vertex_buffers2.sizes = (void)(cmdptr + alloc_offset); + alloc_offset += sizeof(cmd->u.bind_vertex_buffers2.sizes); + cmd->u.bind_vertex_buffers2.offsets[0] = 0; cmd->u.bind_vertex_buffers2.buffers[0] = data->bufferAddress ? get_buffer(state, (void)(uintptr_t)data->bufferAddress, (size_t)&cmd->u.bind_vertex_buffers2.offsets[0]) : VK_NULL_HANDLE; + cmd->u.bind_vertex_buffers2.sizes[0] = data->size; if (token->vertexDynamicStride) { - cmd->u.bind_vertex_buffers2.strides = (void)(cmdptr + sizeof(cmd->u.bind_vertex_buffers2.buffers) + sizeof(cmd->u.bind_vertex_buffers2.offsets) + sizeof(cmd->u.bind_vertex_buffers2.sizes)); + cmd->u.bind_vertex_buffers2.strides = (void)(cmdptr + alloc_offset); cmd->u.bind_vertex_buffers2.strides[0] = data->stride; } else { cmd->u.bind_vertex_buffers2.strides = NULL;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/api/kernel.rs ^
@@ -535,11 +535,14 @@ let device_bits = q.device.address_bits(); let device_max = u64::MAX >> (u64::BITS - device_bits); + let mut threads = 0; for i in 0..work_dim as usize { let lws = local_work_size[i]; let gws = global_work_size[i]; let gwo = global_work_offset[i]; + threads *= lws; + // CL_INVALID_WORK_ITEM_SIZE if the number of work-items specified in any of // local_work_size[0], … local_work_size[work_dim - 1] is greater than the corresponding // values specified by @@ -580,6 +583,14 @@ } } + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items + // in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater + // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries + // table. + if threads != 0 && threads > k.max_threads_per_block(q.device) { + return Err(CL_INVALID_WORK_GROUP_SIZE); + } + // If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel // command will trivially succeed after its event dependencies are satisfied and subsequently // update its completion event. @@ -598,7 +609,6 @@ create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb) //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source. - //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries table. //• CL_MISALIGNED_SUB_BUFFER_OFFSET if a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code //• CL_INVALID_IMAGE_SIZE if an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue. //• CL_IMAGE_FORMAT_NOT_SUPPORTED if an image object is specified as an argument value and the image format (image channel order and data type) is not supported by device associated with queue.
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/core/kernel.rs ^
@@ -317,7 +317,7 @@ res } -fn opt_nir(nir: &mut NirShader, dev: &Device) { +fn opt_nir(nir: &mut NirShader, dev: &Device, has_explicit_types: bool) { let nir_options = unsafe { &dev .screen @@ -342,7 +342,9 @@ } progress \|= nir_pass!(nir, nir_opt_deref); - progress \|= nir_pass!(nir, nir_opt_memcpy); + if has_explicit_types { + progress \|= nir_pass!(nir, nir_opt_memcpy); + } progress \|= nir_pass!(nir, nir_opt_dce); progress \|= nir_pass!(nir, nir_opt_undef); progress \|= nir_pass!(nir, nir_opt_constant_folding); @@ -452,11 +454,10 @@ printf_opts.max_buffer_size = dev.printf_buffer_size() as u32; nir_pass!(nir, nir_lower_printf, &printf_opts); - opt_nir(nir, dev); + opt_nir(nir, dev, false); let mut args = KernelArg::from_spirv_nir(args, nir); let mut internal_args = Vec::new(); - nir_pass!(nir, nir_lower_memcpy); let dv_opts = nir_remove_dead_variables_options { can_remove_var: Some(can_remove_var), @@ -627,7 +628,8 @@ Some(glsl_get_cl_type_size_align), ); - opt_nir(nir, dev); + opt_nir(nir, dev, true); + nir_pass!(nir, nir_lower_memcpy); nir_pass!( nir, @@ -656,7 +658,7 @@ nir_pass!(nir, nir_lower_convert_alu_types, None); - opt_nir(nir, dev); + opt_nir(nir, dev, true); / before passing it into drivers, assign locations as drivers might remove nir_variables or * other things we depend on @@ -734,6 +736,10 @@ */ nir.preserve_fp16_denorms(); + // Set to rtne for now until drivers are able to report their prefered rounding mode, that + // also matches what we report via the API. + nir.set_fp_rounding_mode_rtne(); + let (args, internal_args) = lower_and_optimize_nir(dev, &mut nir, args, &dev.lib_clc); if let Some(cache) = cache {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/core/platform.rs ^
@@ -79,6 +79,7 @@ "clc" => debug.clc = true, "program" => debug.program = true, "sync" => debug.sync_every_event = true, + "" => (), _ => eprintln!("Unknown RUSTICL_DEBUG flag found: {}", flag), } } @@ -90,6 +91,7 @@ match flag { "fp16" => features.fp16 = true, "fp64" => features.fp64 = true, + "" => (), _ => eprintln!("Unknown RUSTICL_FEATURES flag found: {}", flag), } }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/mesa/compiler/nir.rs ^
@@ -446,6 +446,15 @@ } } + pub fn set_fp_rounding_mode_rtne(&mut self) { + unsafe { + self.nir.as_mut().info.float_controls_execution_mode \|= + float_controls::FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 as u32 + \| float_controls::FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 as u32 + \| float_controls::FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64 as u32; + } + } + pub fn reads_sysval(&self, sysval: gl_system_value) -> bool { let nir = unsafe { self.nir.as_ref() }; bitset::test_bit(&nir.info.system_values_read, sysval as u32)
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/meson.build ^
@@ -91,6 +91,13 @@ '-Aclippy::type_complexity', ] +if rustc.version().version_compare('>=1.72') + rusticl_args += [ + # Needs to be fixed + '-Aclippy::arc-with-non-send-sync' + ] +endif + rusticl_gen_args = [ # can't do anything about it anyway '-Aclippy::all',
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/targets/lavapipe/meson.build ^
@@ -50,7 +50,7 @@ command : [ prog_python, '@INPUT0@', '--api-version', '1.1', '--xml', '@INPUT1@', - '--lib-path', meson.current_build_dir() / 'libvulkan_lvp.so', + '--lib-path', meson.current_build_dir() / icd_file_name, '--out', '@OUTPUT@', ], build_by_default : true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/glx/glxext.c ^
@@ -563,12 +563,11 @@ case GLX_SAMPLES_SGIS: config->samples = bp++; break; -#ifdef GLX_USE_APPLEGL case IGNORE_GLX_SWAP_METHOD_OML: / We ignore this tag. See the comment above this function. / ++bp; break; -#else +#ifndef GLX_USE_APPLEGL case GLX_BIND_TO_TEXTURE_RGB_EXT: config->bindToTextureRgb = bp++; break;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/common/i915/intel_engine.c ^
@@ -25,7 +25,7 @@ #include <stdlib.h> -#include "intel_gem.h" +#include "i915/intel_gem.h" static enum intel_engine_class i915_engine_class_to_intel(enum drm_i915_gem_engine_class i915)
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/common/intel_measure.c ^
@@ -108,7 +108,7 @@ *sep = '\0'; } - if (filename && !__check_suid()) { + if (filename && __normal_user()) { filename += 5; config.file = fopen(filename, "w"); if (!config.file) {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_compiler.c ^
@@ -189,7 +189,8 @@ nir_options->has_bfm = devinfo->ver >= 7; nir_options->has_bfi = devinfo->ver >= 7; - nir_options->lower_rotate = devinfo->ver < 11; + nir_options->has_rotate16 = devinfo->ver >= 11; + nir_options->has_rotate32 = devinfo->ver >= 11; nir_options->lower_bitfield_reverse = devinfo->ver < 7; nir_options->lower_find_lsb = devinfo->ver < 7; nir_options->lower_ifind_msb = devinfo->ver < 7; @@ -240,6 +241,9 @@ insert_u64_bit(&config, compiler->precise_trig); bits++; + insert_u64_bit(&config, compiler->mesh.mue_compaction); + bits++; + uint64_t mask = DEBUG_DISK_CACHE_MASK; bits += util_bitcount64(mask); while (mask != 0) { @@ -256,6 +260,12 @@ mask &= ~bit; } + mask = 3; + bits += util_bitcount64(mask); + + u_foreach_bit64(bit, mask) + insert_u64_bit(&config, (compiler->mesh.mue_header_packing & (1ULL << bit)) != 0); + assert(bits <= util_bitcount64(UINT64_MAX)); return config;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_fs.cpp ^
@@ -2788,6 +2788,29 @@ if (brw_reg_type_is_floating_point(inst->src[1].type)) break; + /* From the BDW PRM, Vol 2a, "mul - Multiply": + * + * "When multiplying integer datatypes, if src0 is DW and src1 + * is W, irrespective of the destination datatype, the + * accumulator maintains full 48-bit precision." + * ... + * "When multiplying integer data types, if one of the sources + * is a DW, the resulting full precision data is stored in + * the accumulator." + * + * There are also similar notes in earlier PRMs. + * + * The MOV instruction can copy the bits of the source, but it + * does not clear the higher bits of the accumulator. So, because + * we might use the full accumulator in the MUL/MACH macro, we + * shouldn't replace such MULs with MOVs. + / + if ((brw_reg_type_to_size(inst->src[0].type) == 4 \|\| + brw_reg_type_to_size(inst->src[1].type) == 4) && + (inst->dst.is_accumulator() \|\| + inst->writes_accumulator_implicitly(devinfo))) + break; + / a * 1.0 = a / if (inst->src[1].is_one()) { inst->opcode = BRW_OPCODE_MOV; @@ -6811,7 +6834,7 @@ static void restore_instruction_order(struct cfg_t cfg, fs_inst *inst_arr) { - int num_insts = cfg->last_block()->end_ip + 1; + ASSERTED int num_insts = cfg->last_block()->end_ip + 1; int ip = 0; foreach_block (block, cfg) { @@ -7576,7 +7599,17 @@ case FRAG_DEPTH_LAYOUT_LESS: return BRW_PSCDEPTH_ON_LE; case FRAG_DEPTH_LAYOUT_UNCHANGED: - return BRW_PSCDEPTH_OFF; + / We initially set this to OFF, but having the shader write the + * depth means we allocate register space in the SEND message. The + * difference between the SEND register count and the OFF state + * programming makes the HW hang. + * + * Removing the depth writes also leads to test failures. So use + * LesserThanOrEqual, which fits writing the same value + * (unchanged/equal). + * + */ + return BRW_PSCDEPTH_ON_LE; } } return BRW_PSCDEPTH_OFF;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_nir.cpp ^
@@ -361,6 +361,17 @@ invert = true; cond_reg = get_nir_src(cond->src[0].src); cond_reg = offset(cond_reg, bld, cond->src[0].swizzle[0]); + + if (devinfo->ver <= 5 && + (cond->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { + /* redo boolean resolve on gen5 */ + fs_reg masked = bld.vgrf(BRW_REGISTER_TYPE_D); + bld.AND(masked, cond_reg, brw_imm_d(1)); + masked.negate = true; + fs_reg tmp = bld.vgrf(cond_reg.type); + bld.MOV(retype(tmp, BRW_REGISTER_TYPE_D), masked); + cond_reg = tmp; + } } else { invert = false; cond_reg = get_nir_src(if_stmt->condition);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_nir.c ^
@@ -1753,14 +1753,6 @@ if (OPT(nir_opt_rematerialize_compares)) OPT(nir_opt_dce); - /* This is the last pass we run before we start emitting stuff. It - * determines when we need to insert boolean resolves on Gen <= 5. We - * run it last because it stashes data in instr->pass_flags and we don't - * want that to be squashed by other NIR passes. - / - if (devinfo->ver <= 5) - brw_nir_analyze_boolean_resolves(nir); - OPT(nir_opt_dce); / The mesh stages require this pass to be called at the last minute, @@ -1773,6 +1765,15 @@ brw_nir_adjust_payload(nir, compiler); nir_trivialize_registers(nir); + + /* This is the last pass we run before we start emitting stuff. It + * determines when we need to insert boolean resolves on Gen <= 5. We + * run it last because it stashes data in instr->pass_flags and we don't + * want that to be squashed by other NIR passes. + */ + if (devinfo->ver <= 5) + brw_nir_analyze_boolean_resolves(nir); + nir_sweep(nir); if (unlikely(debug_enabled)) {
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_shader.cpp ^
@@ -29,6 +29,7 @@ #include "brw_vec4_tes.h" #include "dev/intel_debug.h" #include "util/macros.h" +#include "util/u_debug.h" enum brw_reg_type brw_type_for_base_type(const struct glsl_type type) @@ -1243,7 +1244,7 @@ backend_shader::dump_instructions(const char name) const { FILE *file = stderr; - if (name && geteuid() != 0) { + if (name && __normal_user()) { file = fopen(name, "w"); if (!file) file = stderr;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/isl/isl.c ^
@@ -2548,19 +2548,42 @@ if (tile_info->tiling == ISL_TILING_GFX12_CCS) base_alignment_B = MAX(base_alignment_B, 4096); - /* Platforms using an aux map require that images be granularity-aligned - * if they're going to used with CCS. This is because the Aux - * translation table maps main surface addresses to aux addresses at a - * granularity in the main surface. Because we don't know for sure in - * ISL if a surface will use CCS, we have to guess based on the - * DISABLE_AUX usage bit. The one thing we do know is that we haven't - * enable CCS on linear images yet so we can avoid the extra alignment - * there. - / if (dev->info->has_aux_map && !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) { + / Wa_22015614752: + * + * Due to L3 cache being tagged with (engineID, vaID) and the CCS + * block/cacheline being 256 bytes, 2 engines accessing a 64Kb range + * with compression will generate 2 different CCS cacheline entries + * in L3, this will lead to corruptions. To avoid this, we need to + * ensure 2 images do not share a 256 bytes CCS cacheline. With a + * ratio of compression of 1/256, this is 64Kb alignment (even for + * Tile4...) + * + * ATS-M PRMS, Vol 2a: Command Reference: Instructions, + * XY_CTRL_SURF_COPY_BLT, "Size of Control Surface Copy" field, the + * CCS blocks are 256 bytes : + * + * "This field indicates size of the Control Surface or CCS copy. + * It is expressed in terms of number of 256B block of CCS, where + * each 256B block of CCS corresponds to 64KB of main surface." + / + if (intel_needs_workaround(dev->info, 22015614752)) { + base_alignment_B = MAX(base_alignment_B, + 256 / cacheline / 256 /* AUX ratio /); + } + + / Platforms using an aux map require that images be + * granularity-aligned if they're going to used with CCS. This is + * because the Aux translation table maps main surface addresses to + * aux addresses at a granularity in the main surface. Because we + * don't know for sure in ISL if a surface will use CCS, we have to + * guess based on the DISABLE_AUX usage bit. The one thing we do know + * is that we haven't enable CCS on linear images yet so we can avoid + * the extra alignment there. + / base_alignment_B = MAX(base_alignment_B, dev->info->verx10 >= 125 ? - 1024 1024 : 64 * 1024); + 1024 * 1024 : 64 * 1024); } }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_batch_chain.c ^
@@ -1016,27 +1016,9 @@ const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start; if (cmd_buffer->device->physical->use_call_secondary) { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN; - /* If the secondary command buffer begins & ends in the same BO and - * its length is less than the length of CS prefetch, add some NOOPs - * instructions so the last MI_BATCH_BUFFER_START is outside the CS - * prefetch. - / - if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) { - const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class; - / Careful to have everything in signed integer. / - int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class]; - int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start; - - for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4) - anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop); - } void jump_addr = - anv_batch_emitn(&cmd_buffer->batch, - GFX9_MI_BATCH_BUFFER_START_length, - GFX9_MI_BATCH_BUFFER_START, - .AddressSpaceIndicator = ASI_PPGTT, - .SecondLevelBatchBuffer = Firstlevelbatch) + + anv_genX(devinfo, batch_emit_return)(&cmd_buffer->batch) + (GFX9_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8); cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr); @@ -1156,18 +1138,10 @@ struct anv_batch_bo first_bbo = list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - uint64_t write_return_addr = - anv_batch_emitn(&primary->batch, - GFX9_MI_STORE_DATA_IMM_length + 1 /* QWord write /, - GFX9_MI_STORE_DATA_IMM, - .Address = secondary->return_addr) - + (GFX9_MI_STORE_DATA_IMM_ImmediateData_start / 8); - - emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0); - - write_return_addr = - anv_address_physical(anv_batch_address(&primary->batch, - primary->batch.next)); + anv_genX(primary->device->info, batch_emit_secondary_call)( + &primary->batch, + (struct anv_address) { .bo = first_bbo->bo }, + secondary->return_addr); anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); break;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_blorp.c ^
@@ -1793,7 +1793,7 @@ * experiment shows that flusing the data cache helps to resolve the * corruption. */ - unsigned wa_flush = intel_device_info_is_dg2(cmd_buffer->device->info) ? + unsigned wa_flush = cmd_buffer->device->info->verx10 >= 125 ? ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0; anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_DEPTH_CACHE_FLUSH_BIT \|
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_device.c ^
@@ -77,7 +77,7 @@ DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false) DRI_CONF_VK_KHR_PRESENT_WAIT(false) DRI_CONF_VK_XWAYLAND_WAIT_READY(true) - DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false) + DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0) DRI_CONF_ANV_DISABLE_FCV(false) DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false) DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false) @@ -1216,7 +1216,7 @@ /* Increase count below when other families are added as a reminder to * increase the ANV_MAX_QUEUE_FAMILIES value. / - STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 4); + STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 5); } else { / Default to a single render queue */ pdevice->queue.families[family_count++] = (struct anv_queue_family) { @@ -1580,7 +1580,7 @@ instance->vk.app_info.engine_version); instance->assume_full_subgroups = - driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups"); + driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups"); instance->limit_trig_input_range = driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); instance->sample_mask_out_opengl_behaviour =
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_genX.h ^
@@ -173,6 +173,12 @@ void genX(blorp_exec)(struct blorp_batch batch, const struct blorp_params params); +void genX(batch_emit_secondary_call)(struct anv_batch batch, + struct anv_address secondary_addr, + struct anv_address secondary_return_addr); + +void genX(batch_emit_return)(struct anv_batch batch); + void genX(cmd_emit_timestamp)(struct anv_batch batch, struct anv_device *device, struct anv_address addr,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_image.c ^
@@ -2301,8 +2301,14 @@ if (!bo \|\| !isl_aux_usage_has_ccs(image->planes[p].aux_usage)) continue; - /* Do nothing if flat CCS requirements are satisfied. / - if (device->info->has_flat_ccs && bo->vram_only) + / Do nothing if flat CCS requirements are satisfied. + * + * Also, assume that imported BOs with a modifier including + * CCS live only in local memory. Otherwise the exporter should + * have failed the creation of the BO. + / + if (device->info->has_flat_ccs && + (bo->vram_only \|\| bo->is_external)) continue; / Add the plane to the aux map when applicable. */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_pipeline.c ^
@@ -771,7 +771,7 @@ } if (stages[MESA_SHADER_MESH].info \|\| stages[MESA_SHADER_TASK].info) { - const bool afs = device->physical->instance->assume_full_subgroups; + const uint8_t afs = device->physical->instance->assume_full_subgroups; _mesa_sha1_update(&ctx, &afs, sizeof(afs)); } @@ -789,7 +789,7 @@ anv_pipeline_hash_common(&ctx, &pipeline->base); - const bool afs = device->physical->instance->assume_full_subgroups; + const uint8_t afs = device->physical->instance->assume_full_subgroups; _mesa_sha1_update(&ctx, &afs, sizeof(afs)); _mesa_sha1_update(&ctx, stage->shader_sha1, @@ -1628,8 +1628,7 @@ static void anv_pipeline_add_executables(struct anv_pipeline pipeline, - struct anv_pipeline_stage stage, - struct anv_shader_bin bin) + struct anv_pipeline_stage stage) { if (stage->stage == MESA_SHADER_FRAGMENT) { /* We pull the prog data and stats out of the anv_shader_bin because @@ -1637,8 +1636,8 @@ * looked up the shader in a cache. / const struct brw_wm_prog_data wm_prog_data = - (const struct brw_wm_prog_data )bin->prog_data; - struct brw_compile_stats stats = bin->stats; + (const struct brw_wm_prog_data )stage->bin->prog_data; + struct brw_compile_stats stats = stage->bin->stats; if (wm_prog_data->dispatch_8) { anv_pipeline_add_executable(pipeline, stage, stats++, 0); @@ -1654,18 +1653,27 @@ wm_prog_data->prog_offset_32); } } else { - anv_pipeline_add_executable(pipeline, stage, bin->stats, 0); + anv_pipeline_add_executable(pipeline, stage, stage->bin->stats, 0); } +} + +static void +anv_pipeline_account_shader(struct anv_pipeline pipeline, + struct anv_shader_bin shader) +{ + pipeline->scratch_size = MAX2(pipeline->scratch_size, + shader->prog_data->total_scratch); - pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries); + pipeline->ray_queries = MAX2(pipeline->ray_queries, + shader->prog_data->ray_queries); - if (bin->push_desc_info.used_set_buffer) { + if (shader->push_desc_info.used_set_buffer) { pipeline->use_push_descriptor_buffer \|= - BITFIELD_BIT(mesa_to_vk_shader_stage(stage->stage)); + BITFIELD_BIT(mesa_to_vk_shader_stage(shader->stage)); } - if (bin->push_desc_info.used_descriptors & - ~bin->push_desc_info.fully_promoted_ubo_descriptors) - pipeline->use_push_descriptor \|= mesa_to_vk_shader_stage(stage->stage); + if (shader->push_desc_info.used_descriptors & + ~shader->push_desc_info.fully_promoted_ubo_descriptors) + pipeline->use_push_descriptor \|= mesa_to_vk_shader_stage(shader->stage); } /* This function return true if a shader should not be looked at because of @@ -1823,12 +1831,12 @@ int64_t stage_start = os_time_get_nano(); bool cache_hit; - struct anv_shader_bin bin = + stages[s].bin = anv_device_search_for_kernel(device, cache, &stages[s].cache_key, sizeof(stages[s].cache_key), &cache_hit); - if (bin) { + if (stages[s].bin) { found++; - pipeline->shaders[s] = bin; + pipeline->shaders[s] = stages[s].bin; } if (cache_hit) { @@ -1853,6 +1861,7 @@ if (stages[s].imported.bin == NULL) continue; + stages[s].bin = stages[s].imported.bin; pipeline->shaders[s] = anv_shader_bin_ref(stages[s].imported.bin); imported++; } @@ -1868,8 +1877,12 @@ if (pipeline->shaders[s] == NULL) continue; - anv_pipeline_add_executables(&pipeline->base, &stages[s], - pipeline->shaders[s]); + / Only add the executables when we're not importing or doing link + * optimizations. The imported executables are added earlier. Link + * optimization can produce different binaries. + / + if (stages[s].imported.bin == NULL \|\| link_optimize) + anv_pipeline_add_executables(&pipeline->base, &stages[s]); pipeline->source_hashes[s] = stages[s].source_hash; } return true; @@ -1991,7 +2004,9 @@ a size. / if (info->subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS) - info->subgroup_size = BRW_SUBGROUP_SIZE; + info->subgroup_size = + device->physical->instance->assume_full_subgroups != 0 ? + device->physical->instance->assume_full_subgroups : BRW_SUBGROUP_SIZE; } static void @@ -2331,7 +2346,6 @@ cur_info->patch_inputs_read \|= prev_info->patch_outputs_written; } - anv_fixup_subgroup_size(device, cur_info); stage->feedback.duration += os_time_get_nano() - stage_start; @@ -2436,7 +2450,7 @@ anv_nir_validate_push_layout(&stage->prog_data.base, &stage->bind_map); - struct anv_shader_bin bin = + stage->bin = anv_device_upload_kernel(device, cache, s, &stage->cache_key, sizeof(stage->cache_key), @@ -2449,15 +2463,15 @@ &stage->bind_map, &stage->push_desc_info, stage->dynamic_push_values); - if (!bin) { + if (!stage->bin) { ralloc_free(stage_ctx); result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); goto fail; } - anv_pipeline_add_executables(&pipeline->base, stage, bin); + anv_pipeline_add_executables(&pipeline->base, stage); pipeline->source_hashes[s] = stage->source_hash; - pipeline->shaders[s] = bin; + pipeline->shaders[s] = stage->bin; ralloc_free(stage_ctx); @@ -2481,7 +2495,6 @@ struct anv_pipeline_stage stage = &stages[s]; - anv_pipeline_add_executables(&pipeline->base, stage, stage->imported.bin); pipeline->source_hashes[s] = stage->source_hash; pipeline->shaders[s] = anv_shader_bin_ref(stage->imported.bin); } @@ -2498,6 +2511,8 @@ struct anv_pipeline_stage stage = &stages[s]; pipeline->feedback_index[s] = stage->feedback_idx; pipeline->robust_flags[s] = stage->robust_flags; + + anv_pipeline_account_shader(&pipeline->base, pipeline->shaders[s]); } pipeline_feedback->duration = os_time_get_nano() - pipeline_start; @@ -2550,8 +2565,6 @@ }; anv_stage_write_shader_hash(&stage, device); - struct anv_shader_bin *bin = NULL; - populate_cs_prog_key(&stage, device); const bool skip_cache_lookup = @@ -2561,18 +2574,18 @@ bool cache_hit = false; if (!skip_cache_lookup) { - bin = anv_device_search_for_kernel(device, cache, - &stage.cache_key, - sizeof(stage.cache_key), - &cache_hit); + stage.bin = anv_device_search_for_kernel(device, cache, + &stage.cache_key, + sizeof(stage.cache_key), + &cache_hit); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_private.h ^
@@ -805,7 +805,7 @@ enum intel_engine_class engine_class; }; -#define ANV_MAX_QUEUE_FAMILIES 4 +#define ANV_MAX_QUEUE_FAMILIES 5 struct anv_memory_type { /* Standard bits passed on to the client / @@ -1022,7 +1022,7 @@ /* * Workarounds for game bugs. / - bool assume_full_subgroups; + uint8_t assume_full_subgroups; bool limit_trig_input_range; bool sample_mask_out_opengl_behaviour; bool fp64_workaround_enabled; @@ -1342,6 +1342,7 @@ bool RenderingDisable; uint32_t RenderStreamSelect; uint32_t ReorderMode; + uint32_t ForceRendering; } so; / 3DSTATE_SAMPLE_MASK / @@ -2825,215 +2826,43 @@ / PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode. * For more details see genX(emit_apply_pipe_flushes). + * + * Documentation says that untyped L1 dataport cache flush is controlled by + * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register: + * + * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush: + * + * "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to + * "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1 + * cache based on the programming of HDC_Chicken0 register bits 13:11." + * + * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1 + * cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the + * PIPE_CONTROL command." + * + * As part of Wa_22010960976 & Wa_14013347512, i915 is programming + * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol + * Dataport flush, and UAV coherency barrier event"). So there is no need + * to set "Untyped Data-Port Cache" in 3D mode. + * + * On MTL the HDC_CHICKEN0 default values changed to match what was programmed + * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the + * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0 + * register to force L1 untyped flush with HDC pipeline flush has no effect on + * MTL. + * + * It seems like the HW change completely disconnected L1 untyped flush from + * HDC pipeline flush with no way to bring that behavior back. So leave the L1 + * untyped flush active in 3D mode on all platforms since it doesn't seems to + * cause issues there too. + * + * Maybe we'll have some GPGPU only bits here at some point. / -#define ANV_PIPE_GPGPU_BITS ( \ - (GFX_VERx10 >= 125 ? ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT : 0)) +#define ANV_PIPE_GPGPU_BITS (0) enum intel_ds_stall_flag anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits); -static inline enum anv_pipe_bits -anv_pipe_flush_bits_for_access_flags(struct anv_device device, - VkAccessFlags2 flags) -{ - enum anv_pipe_bits pipe_bits = 0; - - u_foreach_bit64(b, flags) { - switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { - case VK_ACCESS_2_SHADER_WRITE_BIT: - case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: - case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: - /* We're transitioning a buffer that was previously used as write - * destination through the data port. To make its content available - * to future operations, flush the hdc pipeline. - / - pipe_bits \|= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; - pipe_bits \|= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT: - / We're transitioning a buffer that was previously used as render - * target. To make its content available to future operations, flush - * the render target cache. - / - pipe_bits \|= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - / We're transitioning a buffer that was previously used as depth - * buffer. To make its content available to future operations, flush - * the depth cache. - / - pipe_bits \|= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_TRANSFER_WRITE_BIT: - / We're transitioning a buffer that was previously used as a - * transfer write destination. Generic write operations include color - * & depth operations as well as buffer operations like : - * - vkCmdClearColorImage() - * - vkCmdClearDepthStencilImage() - * - vkCmdBlitImage() - * - vkCmdCopy(), vkCmdUpdate(), vkCmdFill() - - * Most of these operations are implemented using Blorp which writes - * through the render target, so flush that cache to make it visible - * to future operations. And for depth related operations we also - * need to flush the depth cache. - / - pipe_bits \|= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; - pipe_bits \|= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_MEMORY_WRITE_BIT: - / We're transitioning a buffer for generic write operations. Flush - * all the caches. - / - pipe_bits \|= ANV_PIPE_FLUSH_BITS; - break; - case VK_ACCESS_2_HOST_WRITE_BIT: - / We're transitioning a buffer for access by CPU. Invalidate - * all the caches. Since data and tile caches don't have invalidate, - * we are forced to flush those as well. - / - pipe_bits \|= ANV_PIPE_FLUSH_BITS; - pipe_bits \|= ANV_PIPE_INVALIDATE_BITS; - break; - case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: - case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: - / We're transitioning a buffer written either from VS stage or from - * the command streamer (see CmdEndTransformFeedbackEXT), we just - * need to stall the CS. - * - * Streamout writes apparently bypassing L3, in order to make them - * visible to the destination, we need to invalidate the other - * caches. - / - pipe_bits \|= ANV_PIPE_CS_STALL_BIT \| ANV_PIPE_INVALIDATE_BITS; - break; - default: - break; / Nothing to do / - } - } - - return pipe_bits; -} - -static inline enum anv_pipe_bits -anv_pipe_invalidate_bits_for_access_flags(struct anv_device device, - VkAccessFlags2 flags) -{ - enum anv_pipe_bits pipe_bits = 0; - - u_foreach_bit64(b, flags) { - switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { - case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT: - /* Indirect draw commands take a buffer as input that we're going to - * read from the command streamer to load some of the HW registers - * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a - * command streamer stall so that all the cache flushes have - * completed before the command streamer loads from memory. - / - pipe_bits \|= ANV_PIPE_CS_STALL_BIT; - / Indirect draw commands also set gl_BaseVertex & gl_BaseIndex - * through a vertex buffer, so invalidate that cache. - / - pipe_bits \|= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; - / For CmdDipatchIndirect, we also load gl_NumWorkGroups through a - * UBO from the buffer, so we need to invalidate constant cache. - / - pipe_bits \|= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; - pipe_bits \|= ANV_PIPE_DATA_CACHE_FLUSH_BIT; - / Tile cache flush needed For CmdDipatchIndirect since command - * streamer and vertex fetch aren't L3 coherent. - / - pipe_bits \|= ANV_PIPE_TILE_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_INDEX_READ_BIT: - case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT: - / We transitioning a buffer to be used for as input for vkCmdDraw* - * commands, so we invalidate the VF cache to make sure there is no - * stale data when we start rendering. - / - pipe_bits \|= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; - break; - case VK_ACCESS_2_UNIFORM_READ_BIT: - case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR: - / We transitioning a buffer to be used as uniform data. Because - * uniform is accessed through the data port & sampler, we need to - * invalidate the texture cache (sampler) & constant cache (data - * port) to avoid stale data. - */ - pipe_bits \|= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; - if (device->physical->compiler->indirect_ubos_use_sampler) { - pipe_bits \|= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_cmd_buffer.c ^
@@ -399,6 +399,23 @@ anv_image_hiz_op(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT, 0, base_layer, layer_count, ISL_AUX_OP_AMBIGUATE); } + +#if GFX_VER == 12 + /* Depth/Stencil writes by the render pipeline to D16 & S8 formats use a + * different pairing bit for the compression cache line. This means that + * there is potential for aliasing with the wrong cache if you use another + * format OR a piece of HW that does not use the same pairing. To avoid + * this, flush the tile cache as the compression data does not live in the + * color/depth cache. + / + if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_HIZ_CCS && + final_needs_depth && !initial_depth_valid && + anv_image_format_is_d16_or_s8(image)) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_TILE_CACHE_FLUSH_BIT, + "D16 or S8 HIZ-CCS flush"); + } +#endif } / Transitions a HiZ-enabled depth buffer from one layout to another. Unless @@ -454,6 +471,19 @@ clear_rect, 0 /* Stencil clear value /); } } + + / Depth/Stencil writes by the render pipeline to D16 & S8 formats use a + * different pairing bit for the compression cache line. This means that + * there is potential for aliasing with the wrong cache if you use another + * format OR a piece of HW that does not use the same pairing. To avoid + * this, flush the tile cache as the compression data does not live in the + * color/depth cache. + / + if (anv_image_format_is_d16_or_s8(image)) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_TILE_CACHE_FLUSH_BIT, + "D16 or S8 HIZ-CCS flush"); + } #endif } @@ -868,7 +898,7 @@ 0 : src_queue_family].queueFlags; const VkQueueFlagBits dst_queue_flags = device->physical->queue.families[ - (dst_queue_external \|\| src_queue_family == VK_QUEUE_FAMILY_IGNORED) ? + (dst_queue_external \|\| dst_queue_family == VK_QUEUE_FAMILY_IGNORED) ? 0 : dst_queue_family].queueFlags; / Simultaneous acquire and release on external queues is illegal. / @@ -915,7 +945,7 @@ dst_queue_family != VK_QUEUE_FAMILY_IGNORED && src_queue_family != dst_queue_family) { enum intel_engine_class src_engine = - cmd_buffer->queue_family[src_queue_family].engine_class; + cmd_buffer->queue_family->engine_class; if (src_engine != INTEL_ENGINE_CLASS_RENDER) return; } @@ -1514,36 +1544,20 @@ ANV_PIPE_END_OF_PIPE_SYNC_BIT); #if GFX_VERx10 >= 125 - / BSpec 47112: PIPE_CONTROL::Untyped Data-Port Cache Flush: - * - * "'HDC Pipeline Flush' bit must be set for this bit to take - * effect." - * - * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush: - * - * "When the "Pipeline Select" mode in PIPELINE_SELECT command is - * set to "3D", HDC Pipeline Flush can also flush/invalidate the - * LSC Untyped L1 cache based on the programming of HDC_Chicken0 - * register bits 13:11." - * - * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC - * Untyped L1 cache flush is controlled by 'Untyped Data-Port - * Cache Flush' bit in the PIPE_CONTROL command." - * - * As part of Wa_1608949956 & Wa_14010198302, i915 is programming - * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D - * Pipecontrol Dataport flush, and UAV coherency barrier event"). - * So there is no need to set "Untyped Data-Port Cache" in 3D - * mode. - / if (current_pipeline != GPGPU) { - flush_bits &= ~ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + if (flush_bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT) + flush_bits \|= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; } else { if (flush_bits & (ANV_PIPE_HDC_PIPELINE_FLUSH_BIT \| ANV_PIPE_DATA_CACHE_FLUSH_BIT)) flush_bits \|= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; } + / BSpec 47112: PIPE_CONTROL::Untyped Data-Port Cache Flush: + * + * "'HDC Pipeline Flush' bit must be set for this bit to take + * effect." + / if (flush_bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) flush_bits \|= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; #endif @@ -3295,6 +3309,19 @@ if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; +#if GFX_VER >= 12 + / Reenable prefetching at the beginning of secondary command buffers. We + * do this so that the return instruction edition is not prefetched before + * completion. + / + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) { + arb.PreParserDisableMask = true; + arb.PreParserDisable = false; + } + } +#endif + trace_intel_begin_cmd_buffer(&cmd_buffer->trace); if (anv_cmd_buffer_is_video_queue(cmd_buffer) \|\| @@ -3722,6 +3749,232 @@ } } +static inline enum anv_pipe_bits +anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer cmd_buffer, + VkAccessFlags2 flags) +{ + enum anv_pipe_bits pipe_bits = 0; + + u_foreach_bit64(b, flags) { + switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { + case VK_ACCESS_2_SHADER_WRITE_BIT: + case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: + case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: + /* We're transitioning a buffer that was previously used as write + * destination through the data port. To make its content available + * to future operations, flush the hdc pipeline. + / + pipe_bits \|= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits \|= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT: + / We're transitioning a buffer that was previously used as render + * target. To make its content available to future operations, flush + * the render target cache. + / + pipe_bits \|= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + / We're transitioning a buffer that was previously used as depth + * buffer. To make its content available to future operations, flush + * the depth cache. + / + pipe_bits \|= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_TRANSFER_WRITE_BIT: + / We're transitioning a buffer that was previously used as a + * transfer write destination. Generic write operations include color + * & depth operations as well as buffer operations like : + * - vkCmdClearColorImage() + * - vkCmdClearDepthStencilImage() + * - vkCmdBlitImage() + * - vkCmdCopy(), vkCmdUpdate(), vkCmdFill() + + * Most of these operations are implemented using Blorp which writes + * through the render target cache or the depth cache on the graphics + * queue. On the compute queue, the writes are done through the data + * port. + / + if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) { + pipe_bits \|= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits \|= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + } else { + pipe_bits \|= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + pipe_bits \|= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; + } + break; + case VK_ACCESS_2_MEMORY_WRITE_BIT: + / We're transitioning a buffer for generic write operations. Flush + * all the caches. + / + pipe_bits \|= ANV_PIPE_FLUSH_BITS; + break; + case VK_ACCESS_2_HOST_WRITE_BIT: + / We're transitioning a buffer for access by CPU. Invalidate + * all the caches. Since data and tile caches don't have invalidate, + * we are forced to flush those as well. + */ + pipe_bits \|= ANV_PIPE_FLUSH_BITS; + pipe_bits \|= ANV_PIPE_INVALIDATE_BITS; + break; + case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: + case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_gfx_state.c ^
@@ -354,8 +354,8 @@ SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable); SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream); -#if INTEL_NEEDS_WA_14017076903 - /* Wa_14017076903 : +#if INTEL_NEEDS_WA_18022508906 + /* Wa_18022508906 : * * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage: * @@ -383,8 +383,9 @@ * Here we force rendering to get SOL_INT::Render_Enable when occlusion * queries are active. */ - if (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) - SET(STREAMOUT, so.ForceRendering, Force_on); + SET(STREAMOUT, so.ForceRendering, + (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ? + Force_on : 0); #endif switch (dyn->rs.provoking_vertex) { @@ -1304,6 +1305,7 @@ SET(so, so, RenderingDisable); SET(so, so, RenderStreamSelect); SET(so, so, ReorderMode); + SET(so, so, ForceRendering); } }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_gpu_memcpy.c ^
@@ -272,7 +272,7 @@ void genX(emit_so_memcpy_end)(struct anv_memcpy_state *state) { - if (intel_device_info_is_dg2(state->device->info)) + if (intel_needs_workaround(state->device->info, 16013994831)) genX(batch_set_preemption)(state->batch, state->device->info, true); anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_video.c ^
@@ -551,7 +551,8 @@ cum += pps->column_width_minus1[4 * i + 2] + 1; tile.ColumnPosition[i].CtbPos3i = cum; - if ((4 * i + 3) == pps->num_tile_columns_minus1) + if ((4 * i + 3) >= MIN2(pps->num_tile_columns_minus1, + ARRAY_SIZE(pps->column_width_minus1))) break; cum += pps->column_width_minus1[4 * i + 3] + 1;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan_hasvk/anv_device.c ^
@@ -67,7 +67,7 @@ DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false) DRI_CONF_VK_XWAYLAND_WAIT_READY(true) - DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false) + DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0) DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false) DRI_CONF_NO_16BIT(false) DRI_CONF_SECTION_END @@ -1324,7 +1324,7 @@ instance->vk.app_info.engine_version); instance->assume_full_subgroups = - driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups"); + driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups"); instance->limit_trig_input_range = driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); instance->sample_mask_out_opengl_behaviour =
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan_hasvk/anv_pipeline.c ^
@@ -472,7 +472,7 @@ const bool rba = device->vk.enabled_features.robustBufferAccess; _mesa_sha1_update(&ctx, &rba, sizeof(rba)); - const bool afs = device->physical->instance->assume_full_subgroups; + const uint8_t afs = device->physical->instance->assume_full_subgroups; _mesa_sha1_update(&ctx, &afs, sizeof(afs)); _mesa_sha1_update(&ctx, stage->shader_sha1, @@ -1581,7 +1581,9 @@ * a size. */ if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS) - stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE; + stage.nir->info.subgroup_size = + device->physical->instance->assume_full_subgroups != 0 ? + device->physical->instance->assume_full_subgroups : BRW_SUBGROUP_SIZE; stage.num_stats = 1;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan_hasvk/anv_private.h ^
@@ -944,7 +944,7 @@ /** * Workarounds for game bugs. */ - bool assume_full_subgroups; + uint8_t assume_full_subgroups; bool limit_trig_input_range; bool sample_mask_out_opengl_behaviour; float lower_depth_range_rate;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/loader/loader.c ^
@@ -675,7 +675,7 @@ * user's problem, but this allows vc4 simulator to run on an i965 host, * and may be useful for some touch testing of i915 on an i965 host. / - if (geteuid() == getuid()) { + if (__normal_user()) { const char override = os_get_option("MESA_LOADER_DRIVER_OVERRIDE"); if (override) return strdup(override); @@ -780,7 +780,7 @@ const char search_paths, next, *end; search_paths = NULL; - if (geteuid() == getuid() && search_path_vars) { + if (__normal_user() && search_path_vars) { for (int i = 0; search_path_vars[i] != NULL; i++) { search_paths = getenv(search_path_vars[i]); if (search_paths)
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/get_hash_params.py ^
@@ -494,6 +494,10 @@ # GL_EXT_framebuffer_EXT / GLES 3.0 + EXT_sRGB_write_control [ "FRAMEBUFFER_SRGB_EXT", "CONTEXT_BOOL(Color.sRGBEnabled), extra_EXT_framebuffer_sRGB" ], + +# GL_ARB_cull_distance, GL_EXT_clip_cull_distance + [ "MAX_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ], + [ "MAX_COMBINED_CLIP_AND_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ], ]}, { "apis": ["GLES", "GLES2"], "params": [ @@ -1009,10 +1013,6 @@ [ "GPU_MEMORY_INFO_EVICTION_COUNT_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ], [ "GPU_MEMORY_INFO_EVICTED_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ], -# GL_ARB_cull_distance - [ "MAX_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ], - [ "MAX_COMBINED_CLIP_AND_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ], - # GL_ARB_compute_variable_group_size [ "MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB", "CONTEXT_INT(Const.MaxComputeVariableGroupInvocations), extra_ARB_compute_variable_group_size" ],
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/glthread_draw.c ^
@@ -813,7 +813,8 @@ * Others prevent syncing, such as disallowing buffer objects because we * can't map them without syncing. / - return util_is_vbo_upload_ratio_too_large(count, num_upload_vertices) && + return ctx->API == API_OPENGL_COMPAT && + util_is_vbo_upload_ratio_too_large(count, num_upload_vertices) && instance_count == 1 && / no instancing / vao->CurrentElementBufferName == 0 && / only user indices / !ctx->GLThread._PrimitiveRestart && / no primitive restart */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/shaderapi.c ^
@@ -177,7 +177,7 @@ static const char *path = NULL; if (!read_env_var) { - path = getenv("MESA_SHADER_CAPTURE_PATH"); + path = secure_getenv("MESA_SHADER_CAPTURE_PATH"); read_env_var = true; #if ANDROID_SHADER_CAPTURE @@ -1971,7 +1971,7 @@ if (!path_exists) return; - dump_path = getenv("MESA_SHADER_DUMP_PATH"); + dump_path = secure_getenv("MESA_SHADER_DUMP_PATH"); if (!dump_path) { path_exists = false; return;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/texobj.c ^
@@ -817,7 +817,8 @@ return; } if (t->Image[face][baseLevel]->InternalFormat != - baseImage->InternalFormat) { + baseImage->InternalFormat \|\| + t->Image[face][baseLevel]->TexFormat != baseImage->TexFormat) { incomplete(t, BASE, "Cube face format mismatch"); return; } @@ -876,7 +877,8 @@ incomplete(t, MIPMAP, "TexImage[%d] is missing", i); return; } - if (img->InternalFormat != baseImage->InternalFormat) { + if (img->InternalFormat != baseImage->InternalFormat \|\| + img->TexFormat != baseImage->TexFormat) { incomplete(t, MIPMAP, "Format[i] != Format[baseLevel]"); return; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/microsoft/compiler/nir_to_dxil.c ^
@@ -119,7 +119,6 @@ .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_mul_high = true, - .lower_rotate = true, .lower_pack_half_2x16 = true, .lower_pack_unorm_4x8 = true, .lower_pack_snorm_4x8 = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/microsoft/vulkan/dzn_device.c ^
@@ -2806,7 +2806,7 @@ if (!device->dev13) goto cleanup; - if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, host_pointer, heap_desc.SizeInBytes, &IID_ID3D12Heap, &mem->heap))) + if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, host_pointer, heap_desc.SizeInBytes, &IID_ID3D12Heap, (void*)&mem->heap))) goto cleanup; D3D12_HEAP_DESC desc = dzn_ID3D12Heap_GetDesc(mem->heap); @@ -3920,7 +3920,7 @@ return VK_ERROR_FEATURE_NOT_PRESENT; ID3D12Heap heap; - if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, pHostPointer, 1, &IID_ID3D12Heap, &heap))) + if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, pHostPointer, 1, &IID_ID3D12Heap, (void *)&heap))) return VK_ERROR_INVALID_EXTERNAL_HANDLE; struct dzn_physical_device pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/nouveau/codegen/nv50_ir_from_nir.cpp ^
@@ -3465,7 +3465,7 @@ op.unify_interfaces = false; op.use_interpolated_input_intrinsics = true; op.lower_mul_2x32_64 = true; // TODO - op.lower_rotate = (chipset < NVISA_GV100_CHIPSET); + op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET); op.has_imul24 = false; op.has_fmulz = (chipset > NVISA_G80_CHIPSET); op.intel_vec4 = false;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/nouveau/vulkan/nvk_image.c ^
@@ -187,7 +187,7 @@ { VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice); - const VkPhysicalDeviceExternalImageFormatInfo external_info = NULL; + const VkPhysicalDeviceExternalImageFormatInfo external_info = vk_find_struct_const(pImageFormatInfo->pNext, PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/ci/panfrost-g52-fails.txt ^
@@ -526,17 +526,6 @@ dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.int32,Crash dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.uint32,Crash -dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_uint_pack32,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_unorm_pack32,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_sint_pack32,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_uint_pack32,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_unorm_pack32,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sfloat,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sint,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_uint,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_sint,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_uint,Fail -dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_unorm,Fail dEQP-VK.api.command_buffers.record_many_draws_secondary_2,Fail dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.b5g6r5_unorm_pack16.r16_snorm.general_general,Fail dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8_snorm.r8_uint.general_general,Fail @@ -550,24 +539,6 @@ dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8g8_unorm.r16_sint.general_general,Fail dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8g8_unorm.r5g6b5_unorm_pack16.general_general,Fail dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8g8_unorm.r8g8_unorm.general_optimal,Fail -dEQP-VK.binding_model.descriptor_copy.compute.mix_1,Fail -dEQP-VK.binding_model.descriptor_copy.compute.mix_array0,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_0,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_1,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_2,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_4,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_5,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array0,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array1,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array2,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_0,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_1,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_2,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_4,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_5,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array0,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array1,Fail -dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array2,Fail dEQP-VK.glsl.operator.sequence.no_side_effects.highp_bool_vec2_fragment,Fail dEQP-VK.glsl.operator.sequence.no_side_effects.highp_float_uint_fragment,Fail dEQP-VK.glsl.operator.sequence.no_side_effects.highp_vec4_ivec4_bvec4_fragment,Fail @@ -580,27 +551,6 @@ dEQP-VK.glsl.operator.sequence.side_effects.mediump_bool_vec2_fragment,Fail dEQP-VK.glsl.operator.sequence.side_effects.mediump_float_uint_fragment,Fail dEQP-VK.glsl.operator.sequence.side_effects.mediump_vec4_fragment,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_storage_read,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_uniform_read,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_write.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_write.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_write.range_3_texels,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_1_texel,Fail -dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_3_texels,Fail dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_linear,Fail dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_nearest,Fail dEQP-VK.texture.explicit_lod.2d.derivatives.linear_nearest_mipmap_linear,Fail
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/compiler/bifrost_compile.h ^
@@ -55,7 +55,6 @@ .lower_bitfield_insert = true, \ .lower_bitfield_extract = true, \ .lower_insert_byte = true, \ - .lower_rotate = true, \ \ /* Vertex ID is zero based in the traditional geometry flows, but not in \ * the memory-allocated IDVS flow introduced and used exclusively in \
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/midgard/midgard_compile.h ^
@@ -73,7 +73,6 @@ .lower_insert_byte = true, .lower_insert_word = true, .lower_ldexp = true, - .lower_rotate = true, .lower_pack_half_2x16 = true, .lower_pack_unorm_2x16 = true,
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_private.h ^
@@ -397,7 +397,6 @@ unsigned num_dyn_ubos; unsigned num_dyn_ssbos; uint32_t num_imgs; - uint32_t num_sets; struct { uint32_t size;
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_cmd_buffer.c ^
@@ -514,7 +514,7 @@ struct panvk_descriptor_state desc_state = &bind_point_state->desc_state; const struct panvk_pipeline pipeline = bind_point_state->pipeline; - for (unsigned s = 0; s < pipeline->layout->num_sets; s++) { + for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { const struct panvk_descriptor_set *set = desc_state->sets[s]; if (!set)
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_device.c ^
@@ -117,7 +117,7 @@ } if (debug & PANVK_DEBUG_TRACE) - pandecode_next_frame(0); + pandecode_next_frame(pdev->decode_ctx); batch->issued = true; }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/00-mesa-defaults.conf ^
@@ -960,6 +960,10 @@ <application name="Waterfox" executable="waterfox"> <option name="no_fp16" value="true" /> </application> + <!-- Game does not consider larger image count in non-vsynced modes. --> + <application name="Detroit Become Human" application_name_match="DetroitBecomeHuman"> + <option name="vk_x11_strict_image_count" value="true" /> + </application> </device> <!-- vmwgfx doesn't like full buffer swaps and can't sync to vertical retraces.--> <device driver="vmwgfx"> @@ -1114,10 +1118,10 @@ </device> <device driver="anv"> <application name="Aperture Desk Job" executable="deskjob"> - <option name="anv_assume_full_subgroups" value="true" /> + <option name="anv_assume_full_subgroups" value="32" /> </application> <application name="DOOMEternal" executable="DOOMEternalx64vk.exe"> - <option name="anv_assume_full_subgroups" value="true" /> + <option name="anv_assume_full_subgroups" value="32" /> <option name="fp64_workaround_enabled" value="true" /> </application> <application name="Wolfenstein: Youngblood(x64vk)" executable="Youngblood_x64vk.exe"> @@ -1166,6 +1170,12 @@ <application name="DEATH STRANDING" executable="ds.exe"> <option name="force_vk_vendor" value="-1" /> </application> + <application name="Baldur's Gate 3" executable="bg3.exe"> + <option name="anv_disable_fcv" value="true" /> + </application> + <application name="The Finals" executable="Discovery.exe"> + <option name="force_vk_vendor" value="-1" /> + </application> <!-- Disable 16-bit feature on zink and angle so that GLES mediump doesn't lower to our inefficent 16-bit shader support. No need to do so for @@ -1178,6 +1188,7 @@ <!-- Disable FCV optimization for Unreal Engine 5.1 workloads. --> <engine engine_name_match="UnrealEngine5.1"> <option name="anv_disable_fcv" value="true" /> + <option name="anv_assume_full_subgroups" value="16" /> </engine> </device> <device driver="dzn">
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/00-radv-defaults.conf ^
@@ -104,6 +104,7 @@ <application name="DOOM Eternal" application_name_match="DOOMEternal"> <option name="radv_zero_vram" value="true" /> + <option name="radv_force_active_accel_struct_leaves" value="true" /> </application> <application name="No Man's Sky" application_name_match="No Man's Sky"> @@ -148,6 +149,14 @@ <option name="radv_invariant_geom" value="true"/> </application> + <application name="Crysis 2 Remastered" executable="Crysis2Remastered.exe"> + <option name="radv_ssbo_non_uniform" value="true" /> + </application> + + <application name="Crysis 3 Remastered" executable="Crysis3Remastered.exe"> + <option name="radv_ssbo_non_uniform" value="true" /> + </application> + <!-- OpenGL Game workarounds (zink) --> <application name="Black Geyser: Couriers of Darkness" executable="BlackGeyser.x86_64"> <option name="radv_zero_vram" value="true" />
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/detect_arch.h ^
@@ -97,6 +97,10 @@ #define DETECT_ARCH_MIPS 1 #endif +#if defined(__hppa__) +#define DETECT_ARCH_HPPA 1 +#endif + #ifndef DETECT_ARCH_X86 #define DETECT_ARCH_X86 0 #endif @@ -137,4 +141,8 @@ #define DETECT_ARCH_MIPS 0 #endif +#ifndef DETECT_ARCH_HPPA +#define DETECT_ARCH_HPPA 0 +#endif + #endif /* UTIL_DETECT_ARCH_H_ */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/disk_cache_os.c ^
@@ -33,6 +33,7 @@ #include "util/compress.h" #include "util/crc32.h" +#include "util/u_debug.h" #include "util/disk_cache.h" #include "util/disk_cache_os.h" @@ -850,10 +851,10 @@ else if (cache_type == DISK_CACHE_DATABASE) cache_dir_name = CACHE_DIR_NAME_DB; - char path = getenv("MESA_SHADER_CACHE_DIR"); + char path = secure_getenv("MESA_SHADER_CACHE_DIR"); if (!path) { - path = getenv("MESA_GLSL_CACHE_DIR"); + path = secure_getenv("MESA_GLSL_CACHE_DIR"); if (path) fprintf(stderr, "*** MESA_GLSL_CACHE_DIR is deprecated; " @@ -870,7 +871,7 @@ } if (path == NULL) { - char xdg_cache_home = getenv("XDG_CACHE_HOME"); + char xdg_cache_home = secure_getenv("XDG_CACHE_HOME"); if (xdg_cache_home) { if (mkdir_if_needed(xdg_cache_home) == -1) @@ -940,7 +941,7 @@ return false; /* If running as a users other than the real user disable cache / - if (geteuid() != getuid()) + if (!__normal_user()) return false; / At user request, disable shader cache entirely. */
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/driconf.h ^
@@ -674,6 +674,10 @@ DRI_CONF_OPT_B(radv_tex_non_uniform, def, \ "Always mark texture sample operations as non-uniform.") +#define DRI_CONF_RADV_SSBO_NON_UNIFORM(def) \ + DRI_CONF_OPT_B(radv_ssbo_non_uniform, def, \ + "Always mark SSBO operations as non-uniform.") + #define DRI_CONF_RADV_FLUSH_BEFORE_TIMESTAMP_WRITE(def) \ DRI_CONF_OPT_B(radv_flush_before_timestamp_write, def, \ "Wait for previous commands to finish before writing timestamps") @@ -684,13 +688,17 @@ #define DRI_CONF_RADV_APP_LAYER() DRI_CONF_OPT_S_NODEF(radv_app_layer, "Select an application layer.") +#define DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(def) \ + DRI_CONF_OPT_B(radv_force_active_accel_struct_leaves, def, \ + "Force leaf nodes of acceleration structures to be marked active.") + /** * \brief ANV specific configuration options */ #define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(def) \ - DRI_CONF_OPT_B(anv_assume_full_subgroups, def, \ - "Allow assuming full subgroups requirement even when it's not specified explicitly") + DRI_CONF_OPT_I(anv_assume_full_subgroups, def, 0, 32, \ + "Allow assuming full subgroups requirement even when it's not specified explicitly and set the given size") #define DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(def) \ DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/hash_table.c ^
@@ -427,8 +427,7 @@ } static struct hash_entry * -hash_table_insert(struct hash_table ht, uint32_t hash, - const void key, void data) +hash_table_get_entry(struct hash_table ht, uint32_t hash, const void key) { struct hash_entry available_entry = NULL; @@ -469,11 +468,8 @@ / if (!entry_is_deleted(ht, entry) && entry->hash == hash && - ht->key_equals_function(key, entry->key)) { - entry->key = key; - entry->data = data; + ht->key_equals_function(key, entry->key)) return entry; - } hash_address += double_hash; if (hash_address >= size) @@ -484,8 +480,6 @@ if (entry_is_deleted(ht, available_entry)) ht->deleted_entries--; available_entry->hash = hash; - available_entry->key = key; - available_entry->data = data; ht->entries++; return available_entry; } @@ -496,6 +490,20 @@ return NULL; } +static struct hash_entry +hash_table_insert(struct hash_table ht, uint32_t hash, + const void key, void data) +{ + struct hash_entry entry = hash_table_get_entry(ht, hash, key); + + if (entry) { + entry->key = key; + entry->data = data; + } + + return entry; +} + /** * Inserts the key with the given hash into the table. * @@ -769,6 +777,13 @@ #define FREED_KEY_VALUE 0 +static void _mesa_hash_table_u64_delete_keys(void data) +{ + struct hash_table_u64 ht = ralloc_parent(data); + + _mesa_hash_table_u64_clear(ht); +} + struct hash_table_u64 * _mesa_hash_table_u64_create(void mem_ctx) { @@ -785,6 +800,31 @@ } else { ht->table = _mesa_hash_table_create(ht, key_u64_hash, key_u64_equals); + + / Allocate a ralloc sub-context which takes the u64 hash table + * as a parent and attach a destructor to it so we can free the + * hash_key_u64 objects that were allocated by + * _mesa_hash_table_u64_insert(). + * + * The order of creation of this sub-context is crucial: it needs + * to happen after the _mesa_hash_table_create() call to guarantee + * that the destructor is called before ht->table and its children + * are freed, otherwise the _mesa_hash_table_u64_clear() call in the + * destructor leads to a use-after-free situation. + / + if (ht->table) { + void dummy_ctx = ralloc_context(ht); + + /* If we can't allocate a sub-context, free the hash table + * immediately and return NULL to avoid future leaks. + / + if (!dummy_ctx) { + ralloc_free(ht); + return NULL; + } + + ralloc_set_destructor(dummy_ctx, _mesa_hash_table_u64_delete_keys); + } } if (ht->table) @@ -802,7 +842,7 @@ struct hash_key_u64 _key = (struct hash_key_u64 )entry->key; if (_key) - free(_key); + FREE(_key); } void @@ -847,7 +887,19 @@ return; _key->value = key; - _mesa_hash_table_insert(ht->table, _key, data); + struct hash_entry entry = + hash_table_get_entry(ht->table, key_u64_hash(_key), _key); + + if (!entry) { + FREE(_key); + return; + } + + entry->data = data; + if (!entry_is_present(ht->table, entry)) + entry->key = _key; + else + FREE(_key); } } @@ -905,6 +957,6 @@ struct hash_key _key = (struct hash_key )entry->key; _mesa_hash_table_remove(ht->table, entry); - free(_key); + FREE(_key); } }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/log.c ^
@@ -94,7 +94,7 @@ mesa_log_file = stderr; #if !DETECT_OS_WINDOWS - if (geteuid() == getuid()) { + if (__normal_user()) { const char log_file = os_get_option("MESA_LOG_FILE"); if (log_file) { FILE fp = fopen(log_file, "w");
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/perf/u_trace.c ^
@@ -385,7 +385,7 @@ u_trace_state.enabled_traces = debug_get_flags_option("MESA_GPU_TRACES", config_control, 0); const char *tracefile_name = debug_get_option_trace_file(); - if (tracefile_name && !__check_suid()) { + if (tracefile_name && __normal_user()) { u_trace_state.trace_file = fopen(tracefile_name, "w"); if (u_trace_state.trace_file != NULL) { atexit(trace_file_fini);
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/tests/half_float_test.cpp ^
@@ -46,18 +46,35 @@ } #endif -/* Sanity test our test values / -TEST(half_to_float_test, nan_test) +/ The sign of the bit for signaling is different on some old processors + * (PA-RISC, old MIPS without IEEE-754-2008 support). + * + * Disable the tests on those platforms, because it's not clear how to + * correctly handle NaNs when the CPU and GPU differ in their convention. + / +#if DETECT_ARCH_HPPA \|\| ((DETECT_ARCH_MIPS \|\| DETECT_ARCH_MIPS64) && !defined __mips_nan2008) +#define IEEE754_2008_NAN 0 +#else +#define IEEE754_2008_NAN 1 +#endif + +/ Sanity test our inf test values / +TEST(half_to_float_test, inf_test) { EXPECT_TRUE(isinf(TEST_POS_INF)); EXPECT_TRUE(isinf(TEST_NEG_INF)); +} +/ Make sure that our 32-bit float nan test value we're using is a + * non-signaling NaN. + / +#if IEEE754_2008_NAN +TEST(half_to_float_test, nan_test) +#else +TEST(half_to_float_test, DISABLED_nan_test) +#endif +{ EXPECT_TRUE(isnan(TEST_NAN)); - / Make sure that our 32-bit float nan test value we're using is a - * non-signaling NaN. The sign of the bit for signaling was apparently - * different on some old processors (PA-RISC, MIPS?). This test value should - * cover Intel, ARM, and PPC, for sure. - / EXPECT_FALSE(issignaling(TEST_NAN)); } @@ -82,12 +99,20 @@ } / Test the optionally HW instruction-using path. */ +#if IEEE754_2008_NAN TEST(half_to_float_test, half_to_float_test) +#else +TEST(half_to_float_test, DISABLED_half_to_float_test) +#endif { test_half_to_float_limits(_mesa_half_to_float); } +#if IEEE754_2008_NAN TEST(half_to_float_test, half_to_float_slow_test) +#else +TEST(half_to_float_test, DISABLED_half_to_float_slow_test) +#endif { test_half_to_float_limits(_mesa_half_to_float_slow); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/u_debug.h ^
@@ -39,6 +39,7 @@ #define U_DEBUG_H_ #include <stdarg.h> +#include <stdlib.h> #include <string.h> #if !defined(_WIN32) #include <sys/types.h> @@ -394,15 +395,22 @@ } static inline bool -__check_suid(void) +__normal_user(void) { -#if !defined(_WIN32) - if (geteuid() != getuid()) - return true; +#if defined(_WIN32) + return true; +#else + return geteuid() == getuid() && getegid() == getgid(); #endif - return false; } +#ifndef HAVE_SECURE_GETENV +static inline char secure_getenv(const char name) +{ + return getenv(name); +} +#endif + #define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \ static bool \ debug_get_option_ ## sufix (void) \
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/virtio/virtio-gpu/virgl_hw.h ^
@@ -586,6 +586,7 @@ #define VIRGL_CAP_V2_DRAW_PARAMETERS (1 << 14) #define VIRGL_CAP_V2_GROUP_VOTE (1 << 15) #define VIRGL_CAP_V2_MIRROR_CLAMP_TO_EDGE (1 << 16) +#define VIRGL_CAP_V2_MIRROR_CLAMP (1 << 17) /* virgl bind flags - these are compatible with mesa 10.5 gallium. * but are fixed, no other should be passed to virgl either.
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/virtio/vulkan/vn_pipeline.c ^
@@ -320,7 +320,7 @@ } } - layout->has_push_constant_ranges = pCreateInfo->pPushConstantRanges > 0; + layout->has_push_constant_ranges = pCreateInfo->pushConstantRangeCount > 0; VkPipelineLayout layout_handle = vn_pipeline_layout_to_handle(layout); vn_async_vkCreatePipelineLayout(dev->instance, device, pCreateInfo, NULL, @@ -1119,6 +1119,31 @@ state->gpl.fragment_output = true; } + /* After direct_gpl states collection, check the final state to validate + * VkPipelineLayout in case of being the final layout in linked pipeline. + * + * From the Vulkan 1.3.275 spec: + * VUID-VkGraphicsPipelineCreateInfo-layout-06602 + * + * If the pipeline requires fragment shader state or pre-rasterization + * shader state, layout must be a valid VkPipelineLayout handle + / + if ((state->gpl.fragment_shader && !is_raster_statically_disabled) \|\| + state->gpl.pre_raster_shaders) + valid.pipeline_layout = true; + + / Pipeline Derivatives + * + * VUID-VkGraphicsPipelineCreateInfo-flags-07984 + * + * If flags contains the VK_PIPELINE_CREATE_DERIVATIVE_BIT flag, and + * basePipelineIndex is -1, basePipelineHandle must be a valid graphics + * VkPipeline handle + / + if ((info->flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT) && + info->basePipelineIndex == -1) + valid.base_pipeline_handle = true; + out_fix_desc = (struct vn_graphics_pipeline_fix_desc) { .erase = { /* clang-format off
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/meson.build ^
@@ -92,6 +92,7 @@ idep_vulkan_wsi_defines = declare_dependency( compile_args : vulkan_wsi_list, + dependencies : vulkan_wsi_deps, ) vulkan_wsi_deps += idep_vulkan_wsi_defines
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/runtime/vk_graphics_state.c ^
@@ -2338,7 +2338,7 @@ VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer); struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; - SET_DYN_VALUE(dyn, RS_PROVOKING_VERTEX, + SET_DYN_VALUE(dyn, RS_RASTERIZATION_STREAM, rs.rasterization_stream, rasterizationStream); }
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/runtime/vk_instance.c ^
@@ -199,7 +199,7 @@ instance->trace_mode = parse_debug_string(getenv("MESA_VK_TRACE"), trace_options); instance->trace_frame = (uint32_t)debug_get_num_option("MESA_VK_TRACE_FRAME", 0xFFFFFFFF); - instance->trace_trigger_file = getenv("MESA_VK_TRACE_TRIGGER"); + instance->trace_trigger_file = secure_getenv("MESA_VK_TRACE_TRIGGER"); glsl_type_singleton_init_or_ref();
[-] [+]	Changed	_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_wayland.c ^
@@ -2215,6 +2215,8 @@ pthread_mutex_destroy(&chain->present_ids.lock); } + vk_free(pAllocator, (void )chain->drm_modifiers); + wsi_swapchain_finish(&chain->base); } @@ -2260,7 +2262,8 @@ / if (wsi_wl_surface->chain && wsi_swapchain_to_handle(&wsi_wl_surface->chain->base) != pCreateInfo->oldSwapchain) { - return VK_ERROR_NATIVE_WINDOW_IN_USE_KHR; + result = VK_ERROR_NATIVE_WINDOW_IN_USE_KHR; + goto fail; } if (pCreateInfo->oldSwapchain) { VK_FROM_HANDLE(wsi_wl_swapchain, old_chain, pCreateInfo->oldSwapchain); @@ -2370,11 +2373,24 @@ chain->shm_format = wl_shm_format_for_vk_format(chain->vk_format, alpha); } chain->num_drm_modifiers = num_drm_modifiers; - chain->drm_modifiers = drm_modifiers; + if (num_drm_modifiers) { + uint64_t drm_modifiers_copy = + vk_alloc(pAllocator, sizeof(drm_modifiers) * num_drm_modifiers, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!drm_modifiers_copy) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail_free_wl_chain; + } + + typed_memcpy(drm_modifiers_copy, drm_modifiers, num_drm_modifiers); + chain->drm_modifiers = drm_modifiers_copy; + } if (chain->wsi_wl_surface->display->wp_presentation_notwrapped) { - if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced)) - goto fail; + if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced)) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail_free_wl_chain; + } pthread_mutex_init(&chain->present_ids.lock, NULL); wl_list_init(&chain->present_ids.outstanding_list); @@ -2392,7 +2408,7 @@ result = wsi_wl_image_init(chain, &chain->images[i], pCreateInfo, pAllocator); if (result != VK_SUCCESS) - goto fail_image_init; + goto fail_free_wl_images; chain->images[i].busy = false; } @@ -2400,14 +2416,15 @@ return VK_SUCCESS; -fail_image_init: +fail_free_wl_images: wsi_wl_swapchain_images_free(chain); - +fail_free_wl_chain: wsi_wl_swapchain_chain_free(chain, pAllocator); fail: vk_free(pAllocator, chain); wsi_wl_surface->chain = NULL; + assert(result != VK_SUCCESS); return result; }