[-]
[+]
|
Changed |
_service:tar_git:mesa.spec
|
|
[-]
[+]
|
Changed |
_service
^
|
@@ -2,7 +2,7 @@
<service name="tar_git">
<param name="url">https://github.com/sailfish-on-dontbeevil/mesa</param>
<param name="branch">master</param>
- <param name="revision">23.3.1+git6</param>
+ <param name="revision">23.3.5+git1</param>
<param name="token"/>
<param name="debian">N</param>
<param name="dumb">N</param>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/.gitlab-ci/test/gitlab-ci.yml
^
|
@@ -380,7 +380,6 @@
echo "export SCRIPTS_DIR=./install" >> ${JOB_FOLDER}/set-job-env-vars.sh
echo "Variables passed through:"
cat ${JOB_FOLDER}/set-job-env-vars.sh
- echo "export CI_JOB_JWT=${CI_JOB_JWT}" >> ${JOB_FOLDER}/set-job-env-vars.sh
set -x
# Copy the mesa install tarball to the job folder, for later extraction
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/.pick_status.json
^
|
@@ -1,5 +1,19815 @@
[
{
+ "sha": "4cd5b2b5426e8d670fc3657eee040a79e3f9df1e",
+ "description": "intel/hasvk: assume() we don't get ISL_NUM_FORMATS",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "3d4ef6f983fa78c3d6f361ab2b5a3409e6c9d09d",
+ "description": "intel/vulkan: assume() that we don't use \"ISL_NUM_FORMATS\"",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "529e7ab9755c33c5c59438f3b58b5cfdc2eeffc5",
+ "description": "lavapipe: RM2024 extension promotions",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "1c01fd028616c755fdac59016b17b07933a416ed",
+ "description": "util/disk_cache: Use secure_getenv to determine cache directories",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "e8b0e5cac9062f9f290a46460279eaa3eb0c60a8",
+ "description": "radv: Use secure_getenv for RADV_THREAD_TRACE_TRIGGER",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "c01a07f2e47bc778ff6faf665b98be5556c77e77",
+ "description": "radv: Use secure_getenv in radv_builtin_cache_path",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "72f95a8364c018ed833aab171f8d5fa65145cb10",
+ "description": "mesa/main: Use secure_getenv for shader dumping",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "321e2cee5315e94c050f8659a8cd55e0e7cd9076",
+ "description": "vtn: Use secure_getenv for shader dumping",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 2,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "f3b892b74ab7db998dd24d8443803ba9dc20f8a6",
+ "description": "aux/trace: Guard triggers behind __normal_user",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "7ea96ff75b771ea8eb48d2b9fec05e5edc958b21",
+ "description": "vulkan: Use secure_getenv for trigger files",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "8b209a62006ad6fd4593bb4f528ce8aee23fc038",
+ "description": "util: Provide a secure_getenv fallback for platforms without it",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "0fa4ea98ca70646f445552fce3e1912655b34274",
+ "description": "ci: always skip dEQP-VK.info.device_extensions",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "4824238dd901b57e2f804b38fdc88e2d1a533c4f",
+ "description": "zink: Fix return type and values of create_buffer and create_images",
+ "nominated": false,
+ "nomination_type": 1,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": "f6383e03f9c8d56ffc76f014175fc9697bd66945",
+ "notes": null
+ },
+ {
+ "sha": "c309d2017230e657fd042b9b9dd7acd1c621d2c5",
+ "description": "aco/insert_exec_mask: Fix unconditional demote at top-level control flow.",
+ "nominated": true,
+ "nomination_type": 0,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "4097df29f6e716155fae17f0ec5ce38fa3ec2a96",
+ "description": "nvk: allow 3d compressed textures",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "9ddf647eabebd2e346c2bcb5e68e198ecca650ac",
+ "description": "nvk: Fix whitespace in nvk_image.c",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "e8bec58de0138ba1e6118b45b1c0240e25cdf11b",
+ "description": "nil: Set the level offset to 0 in nil_image_for_level",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
+ "notes": null
+ },
+ {
+ "sha": "445aacb4217cbf5fb7be604c5484eb84c3c06497",
+ "description": "clc: retrieve libclang path at runtime.",
+ "nominated": true,
+ "nomination_type": 1,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": "e22491c83265200f518e9fb4deff54e3c2823b68",
+ "notes": null
+ },
+ {
+ "sha": "8efd11fce99710757b14cb575f33778f730ec904",
+ "description": "clc: force fPIC for every user when using shared LLVM",
+ "nominated": true,
+ "nomination_type": 1,
+ "resolution": 1,
+ "main_sha": null,
+ "because_sha": "e22491c83265200f518e9fb4deff54e3c2823b68",
+ "notes": null
+ },
+ {
+ "sha": "37a13463478703e42e590b8d23a221604653b384",
+ "description": "meson: remove opencl-external-clang-headers option and rely on shared-llvm",
+ "nominated": false,
+ "nomination_type": 3,
+ "resolution": 4,
+ "main_sha": null,
+ "because_sha": null,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/VERSION
^
|
@@ -1 +1 @@
-23.3.1
+23.3.5
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/bin/symbols-check.py
^
|
@@ -7,6 +7,7 @@
# This list contains symbols that _might_ be exported for some platforms
PLATFORM_SYMBOLS = [
+ '_GLOBAL_OFFSET_TABLE_',
'__bss_end__',
'__bss_start__',
'__bss_start',
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes.rst
^
|
@@ -3,6 +3,10 @@
The release notes summarize what's new or changed in each Mesa release.
+- :doc:`23.3.5 release notes <relnotes/23.3.5>`
+- :doc:`23.3.4 release notes <relnotes/23.3.4>`
+- :doc:`23.3.3 release notes <relnotes/23.3.3>`
+- :doc:`23.3.2 release notes <relnotes/23.3.2>`
- :doc:`23.3.1 release notes <relnotes/23.3.1>`
- :doc:`23.3.0 release notes <relnotes/23.3.0>`
- :doc:`23.1.9 release notes <relnotes/23.1.9>`
@@ -404,6 +408,10 @@
:maxdepth: 1
:hidden:
+ 23.3.5 <relnotes/23.3.5>
+ 23.3.4 <relnotes/23.3.4>
+ 23.3.3 <relnotes/23.3.3>
+ 23.3.2 <relnotes/23.3.2>
23.3.1 <relnotes/23.3.1>
23.3.0 <relnotes/23.3.0>
23.1.9 <relnotes/23.1.9>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.1.rst
^
|
@@ -19,7 +19,7 @@
::
- TBD.
+ 6e48126d70fdb3f20ffeb246ca0c2e41ffdc835f0663a03d4526b8bf5db41de6 mesa-23.3.1.tar.xz
New features
|
[-]
[+]
|
Added |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.2.rst
^
|
@@ -0,0 +1,177 @@
+Mesa 23.3.2 Release Notes / 2023-12-27
+======================================
+
+Mesa 23.3.2 is a bug fix release which fixes bugs found since the 23.3.1 release.
+
+Mesa 23.3.2 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 23.3.2 implements the Vulkan 1.3 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ 3cfcb81fa16f89c56abe3855d2637d396ee4e03849b659000a6b8e5f57e69adc mesa-23.3.2.tar.xz
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- anv: glcts regression on zink
+- nir: Trivial loop not unrolling
+- Possible regression with AMD GPU with flatpak apps
+- Compiling Mesa with X in custom prefix fails in Intel Vulkan driver
+- radv/aco: Crysis 2 Remastered RT reflections are blocky around the edges with ACO, renders normally with LLVM
+
+
+Changes
+-------
+
+Bas Nieuwenhuizen (1):
+
+- radv: Use correct writemask for cooperative matrix ordering.
+
+Boris Brezillon (3):
+
+- util/hash_table: Use FREE() to be consistent with the CALLOC_STRUCT() call
+- util/hash_table: Don't leak hash_u64_key objects when the entry exists
+- util/hash_table: Don't leak hash_key_u64 objects when the u64 hash table is destroyed
+
+Christian Gmeiner (2):
+
+- etnaviv: Update headers from rnndb
+- etnaviv: Add static_assert(..) to catch memory corruption
+
+Dave Airlie (1):
+
+- intel/compiler: move gen5 final pass to actually be final pass
+
+David Heidelberg (2):
+
+- ci/freedreno: timestamp-get no longer fails on Adreno
+- ci/freedreno: fail introduced by ARB_post_depth_coverage
+
+Eric Engestrom (10):
+
+- docs: add sha256sum for 23.3.1
+- .pick_status.json: Update to d761871761e5fe7d498b0cc818ed627698ed1225
+- .pick_status.json: Update to 377c6b2d45ee73da3e5431846a3b4bfdd7ae2b83
+- ci/b2c: drop passthrough of unset CI_JOB_JWT
+- .pick_status.json: Updates notes for 6a92af158dc132eee449c175bdee66d92c68d191
+- vulkan/wsi: fix build when platform headers are installed in non-standard locations
+- .pick_status.json: Update to 670a799ebff9a98daafccf49324c2a01311b0c41
+- .pick_status.json: Update to e61fae6eb8ae1ae1228d6f89329324310db808ae
+- .pick_status.json: Update to 1e6fcd6a611574241b1cde306afcc416a03ac76b
+- .pick_status.json: Update to 55c262898ae7188311c89a60e4ec0fbb67b7a95b
+
+Faith Ekstrand (1):
+
+- nir: Scalarize bounds checked loads and stores
+
+Friedrich Vock (2):
+
+- radv,vtn,driconf: Add and use radv_rt_ssbo_non_uniform workaround for Crysis 2/3 Remastered
+- radv/rt: Initialize unused children in PLOC early-exit
+
+George Ouzounoudis (1):
+
+- vulkan: Fix dynamic graphics state enum usage
+
+Gert Wollny (1):
+
+- r600/sfn: keep workgroup and invocation ID registers for whole shader
+
+Jesse Natalie (1):
+
+- d3d12: Only destroy the winsys during screen destruction, not reset
+
+Jonathan Gray (1):
+
+- intel/common: add directory prefix to intel_gem.h include
+
+José Expósito (1):
+
+- egl/glx: fallback to software when Zink is forced and fails
+
+Karol Herbst (4):
+
+- rusticl/kernel: explicitly set rounding modes
+- rusticl: do not warn on empty RUSTICL_DEBUG or RUSTICL_FEATURES
+- rusticl: silence clippy::arc-with-non-send-sync for now
+- rusticl: check rustc version for flags requiring newer rustc/clippy
+
+Kenneth Graunke (3):
+
+- iris: Initialize bo->index to -1 when importing buffers
+- iris: Don't search the exec list if BOs have never been added to one
+- iris: Skip mi_builder init for indirect draws
+
+Lionel Landwerlin (4):
+
+- nir/clone: fix missing printf_info clone
+- nir/divergence: handle printf intrinsic
+- anv: fix incorrect queue_family access on command buffer
+- anv: wait for CS write completion before executing secondary
+
+Michel Dänzer (2):
+
+- gallium/dri: Return __DRI_ATTRIB_SWAP_UNDEFINED for _SWAP_METHOD
+- glx: Handle IGNORE_GLX_SWAP_METHOD_OML regardless of GLX_USE_APPLEGL
+
+Pierre-Eric Pelloux-Prayer (4):
+
+- radeonsi/sqtt: fix RGP pm4 state emit function
+- radeonsi/sqtt: clear record_counts variable
+- radeonsi/sqtt: rework pm4.reg_va_low_idx
+- radeonsi/sqtt: use calloc instead of malloc
+
+Robert Foss (1):
+
+- egl/surfaceless: Fix EGL_DEVICE_EXT implementation
+
+Sil Vilerino (1):
+
+- d3d12: Fix AV1 video encode 32 bits build
+
+Sviatoslav Peleshko (2):
+
+- nir/loop_analyze: Don't test non-positive iterations count
+- intel/fs: Don't optimize DW*1 MUL if it stores value to the accumulator
+
+Tapani Pälli (5):
+
+- anv/hasvk/drirc: change anv_assume_full_subgroups to have subgroup size
+- drirc: setup anv_assume_full_subgroups=16 for UnrealEngine5.1
+- iris: use intel_needs_workaround with 14015055625
+- mesa: fix enum support for EXT_clip_cull_distance
+- drirc/anv: disable FCV optimization for Baldur's Gate 3
+
+Timothy Arceri (1):
+
+- radeonsi: fix divide by zero in si_get_small_prim_cull_info()
+
+Vinson Lee (1):
+
+- etnaviv: Remove duplicate initializers
+
+Yiwei Zhang (1):
+
+- vulkan/wsi/wayland: ensure drm modifiers stored in chain are immutable
+
+Yonggang Luo (1):
+
+- dzn: Fixes -Werror=incompatible-pointer-type
|
[-]
[+]
|
Added |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.3.rst
^
|
@@ -0,0 +1,155 @@
+Mesa 23.3.3 Release Notes / 2024-01-10
+======================================
+
+Mesa 23.3.3 is a bug fix release which fixes bugs found since the 23.3.2 release.
+
+Mesa 23.3.3 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 23.3.3 implements the Vulkan 1.3 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ 518307c0057fa3cee8b58df78be431d4df5aafa7edc60d09278b2d7a0a80f3b4 mesa-23.3.3.tar.xz
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- Error during SPIR-V parsing of OpCopyLogical
+- radv: Atlas Fallen corrupted rendering
+- intel: Require 64KB alignment when using CCS and multiple engines
+- 23.3.2 regression: kms_swrast_dri.so segfaults
+- Mesa is not compatible with Python 3.12 due to use of distutils
+- anv: importing memory for a compressed image using modifier is hitting an assert
+
+
+Changes
+-------
+
+Connor Abbott (1):
+
+- ir3/legalize: Fix helper propagation with b.any/b.all/getone
+
+Daniel Schürmann (1):
+
+- nir/opt_move_discards_to_top: don't schedule discard/demote across subgroup operations
+
+Dave Airlie (5):
+
+- gallivm: handle llvm 16 atexit ordering problems.
+- intel/compiler: fix release build unused variable.
+- llvmpipe: fix caching for texture shaders.
+- intel/compiler: reemit boolean resolve for inverted if on gen5
+- radv: don't emit cp dma packets on video rings.
+
+Eric Engestrom (13):
+
+- docs: add sha256sum for 23.3.2
+- .pick_status.json: Mark eb5bb5c784e97c533e30b348e82e446ac0da59c8 as denominated
+- .pick_status.json: Update to ebee672ef87794f3f4201270623a92f34e62b8ff
+- .pick_status.json: Mark 060439bdf0e74f0f2e255d0a81b5356f9a2f5457 as denominated
+- .pick_status.json: Mark 8d0e70f628b745ad81124e0c3fe5e46ea84f6b46 as denominated
+- .pick_status.json: Update to 39c8cca34fb72db055df18abf1d473e099f4b05b
+- .pick_status.json: Update to 2c078bfd18cae0ed1a0a3916020e49fb74668504
+- .pick_status.json: Update to e2a7c877ad1fd6bda4032f707eea7646e5229969
+- .pick_status.json: Update to 031978933151e95690e93919e7bfd9f1753f2794
+- .pick_status.json: Mark fbe4e16db2d369c3e54067d17f81bdce8661a461 as denominated
+- .pick_status.json: Mark b38c776690c9c39b04c57d74f9b036de56995aff as denominated
+- .pick_status.json: Update to f6d2df5a7542025022e69b81dbe3af3e51ea5cd3
+- .pick_status.json: Update to 67ad1142cf6afe61de834cefeddb4be06382899f
+
+Erik Faye-Lund (2):
+
+- zink: update profile schema
+- zink: use KHR version of maint5 features
+
+Friedrich Vock (1):
+
+- radv/rt: Free traversal NIR after compilation
+
+Georg Lehmann (1):
+
+- aco: fix applying input modifiers to DPP8
+
+Jonathan Gray (1):
+
+- zink: put sysmacros.h include under #ifdef MAJOR_IN_SYSMACROS
+
+José Roberto de Souza (2):
+
+- anv: Assume that imported bos already have flat CCS requirements satisfied
+- anv: Increase ANV_MAX_QUEUE_FAMILIES
+
+Karol Herbst (2):
+
+- zink: lock screen queue on context_destroy and CreateSwapchain
+- zink: fix heap-use-after-free on batch_state with sub-allocated pipe_resources
+
+Konstantin Seurer (2):
+
+- vtn: Remove transpose(m0)*m1 fast path
+- vtn: Allow for OpCopyLogical with different but compatible types
+
+Leo Liu (1):
+
+- gallium/vl: match YUYV/UYVY swizzle with change of color channels
+
+Lionel Landwerlin (2):
+
+- isl: implement Wa_22015614752
+- intel/fs: fix depth compute state for unchanged depth layout
+
+Marek Olšák (1):
+
+- glthread: don't unroll draws using user VBOs with GLES
+
+Mary Guillemard (2):
+
+- zink: Initialize pQueueFamilyIndices for image query / create
+- zink: Always fill external_only in zink_query_dmabuf_modifiers
+
+Mike Blumenkrantz (1):
+
+- zink: enforce maxTexelBufferElements for texel buffer sizing
+
+Rhys Perry (1):
+
+- aco/tests: use more raw strings
+
+Samuel Pitoiset (2):
+
+- radv: fix binding partial depth/stencil views with dynamic rendering
+- radv: disable stencil test without a stencil attachment
+
+Sil Vilerino (2):
+
+- Revert "d3d12: Only destroy the winsys during screen destruction, not reset"
+- Revert "d3d12: Fix screen->winsys leak in d3d12_screen"
+
+Vinson Lee (1):
+
+- ac/rgp: Fix single-bit-bitfield-constant-conversion warning
+
+Yonggang Luo (1):
+
+- meson: Support for both packaging and distutils
+
+antonino (1):
+
+- egl: only check dri3 on X11
|
[-]
[+]
|
Added |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.4.rst
^
|
@@ -0,0 +1,199 @@
+Mesa 23.3.4 Release Notes / 2024-01-24
+======================================
+
+Mesa 23.3.4 is a bug fix release which fixes bugs found since the 23.3.3 release.
+
+Mesa 23.3.4 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 23.3.4 implements the Vulkan 1.3 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ df12d765be4650fe532860b18aa18e6da1d0b07d1a21dfdfe04660e6b7bac39a mesa-23.3.4.tar.xz
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- [AMDGPU RDNA3] Antialiasing is broken in Blender
+- Assassin's Creed Odyssey wrong colors on Arc A770
+- The Finals fails to launch with DX12 on Intel Arc unless "force_vk_vendor" is set to -1.
+- zink crashes on nvidia
+- radv: games render with garbage output on RX5600M through PRIME with DCC
+- radv: RGP reports for mesh shaders are confusing
+- d3d10umd: Build failure regression with MSVC during 23.3 development cycle
+- VA-API CI tests freeze
+- Radeon: YUYV DMA BUF eglCreateImageKHR fails
+
+
+Changes
+-------
+
+Alessandro Astone (1):
+
+- zink: Fix resizable BAR detection logic
+
+Boris Brezillon (3):
+
+- panvk: Fix tracing
+- panvk: Fix access to unitialized panvk_pipeline_layout::num_sets field
+- panfrost: Clamp the render area to the damage region
+
+Daniel Schürmann (1):
+
+- aco: give spiller more room to assign spilled SGPRs to VGPRs
+
+Dave Airlie (2):
+
+- radv/video: refactor sq start/end code to avoid decode hangs.
+- radv: don't submit empty command buffers on encoder ring.
+
+David Rosca (1):
+
+- radeonsi/vcn: Fix H264 slice header when encoding I frames
+
+Eric Engestrom (7):
+
+- docs: add sha256sum for 23.3.3
+- .pick_status.json: Update to 68f5277887aae1cdc202f45ecd44df2c3c59ba7d
+- .pick_status.json: Update to 4fe5f06d400a7310ffc280761c27b036aec86646
+- .pick_status.json: Update to ff84aef116f9d0d13440fd13edf2ac0b69a8c132
+- .pick_status.json: Update to 6e4bb8253ed36f911a0a45dfecf89c237a8cd362
+- .pick_status.json: Update to d0a3bac163ca803eda03feb3afea80e516568caf
+- .pick_status.json: Update to eca4f0f632b1e3e6e24bd12ee5f00522eb7d0fdb
+
+Friedrich Vock (4):
+
+- radv/rt: Add workaround to make leaves always active
+- radv: Fix shader replay allocation condition
+- nir: Make is_trivial_deref_cast public
+- nir: Handle casts in nir_opt_copy_prop_vars
+
+Georg Lehmann (1):
+
+- aco: stop scheduling at p_logical_end
+
+Hans-Kristian Arntzen (1):
+
+- wsi/x11: Add workaround for Detroit Become Human.
+
+Ian Romanick (1):
+
+- intel/compiler: Track mue_compaction and mue_header_packing flags in brw_get_compiler_config_value
+
+Jesse Natalie (1):
+
+- mesa: Consider mesa format in addition to internal format for mip/cube completeness
+
+Karol Herbst (3):
+
+- rusticl/kernel: run opt/lower_memcpy later to fix a crash
+- nir: rework and fix rotate lowering
+- rusticl/kernel: check that local size on dispatch doesn't exceed limits
+
+Konstantin Seurer (4):
+
+- ac/llvm: Enable helper invocations for quad OPs
+- lavapipe: Fix DGC vertex buffer handling
+- lavapipe: Mark vertex elements dirty if the stride changed
+- lavapipe: Report the correct preprocess buffer size
+
+Lionel Landwerlin (4):
+
+- anv: fix disabled Wa_14017076903/18022508906
+- anv: hide vendor ID for The Finals
+- anv: fix pipeline executable properties with graphics libraries
+- anv: implement undocumented tile cache flush requirements
+
+Lucas Stach (1):
+
+- etnaviv: disable 64bpp render/sampler formats
+
+Matt Turner (4):
+
+- symbols-check: Add _GLOBAL_OFFSET_TABLE_
+- nir: Fix cast
+- util: Add DETECT_ARCH_HPPA macro
+- util/tests: Disable half-float NaN test on hppa/old-mips
+
+Max R (1):
+
+- d3d10umd: Fix compilation
+
+Mike Blumenkrantz (5):
+
+- lavapipe: fix devenv icd filename
+- zink: always force flushes when originating from api frontend
+- zink: ignore tc buffer replacement info
+- zink: fix buffer rebind early-out check
+- zink: fix separate shader patch variable location adjustment
+
+Patrick Lerda (1):
+
+- glsl/nir: fix gl_nir_cross_validate_outputs_to_inputs() memory leak
+
+Pavel Ondračka (1):
+
+- r300: fix reusing of color varying slots for generic ones
+
+Pierre-Eric Pelloux-Prayer (2):
+
+- ac/surface: don't oversize surf_size
+- radeonsi: compute epitch when modifying surf_pitch
+
+Rhys Perry (3):
+
+- radv: do nir_shader_gather_info after radv_nir_lower_rt_abi
+- nir/lower_non_uniform: set non_uniform=false when lowering is not needed
+- nir/lower_shader_calls: remove CF before nir_opt_if
+
+Samuel Pitoiset (2):
+
+- radv: do not issue SQTT marker with DISPATCH_MESH_INDIRECT_MULTI
+- radv: fix indirect dispatches on the compute queue on GFX7
+
+Sviatoslav Peleshko (1):
+
+- nir: Use alu source components count in nir_alu_srcs_negative_equal
+
+Tapani Pälli (4):
+
+- anv: check for wa 16013994831 in emit_so_memcpy_end
+- iris: expand pre-hiz data cache flush to gfx >= 125
+- anv: expand pre-hiz data cache flush to gfx >= 125
+- iris: replace constant cache invalidate with hdc flush
+
+Tatsuyuki Ishi (1):
+
+- radv: never set DISABLE_WR_CONFIRM for CP DMA clears and copies
+
+Timur Kristóf (1):
+
+- radv: Correctly select SDMA support for PRIME blit.
+
+Yiwei Zhang (4):
+
+- vulkan/wsi/wayland: fix returns and avoid leaks for failed swapchain
+- venus: fix pipeline layout lifetime
+- venus: fix pipeline derivatives
+- venus: fix to respect the final pipeline layout
+
+Yonggang Luo (1):
+
+- compiler/spirv: The spirv shader is binary, should write in binary mode
|
[-]
[+]
|
Added |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/docs/relnotes/23.3.5.rst
^
|
@@ -0,0 +1,154 @@
+Mesa 23.3.5 Release Notes / 2024-02-01
+======================================
+
+Mesa 23.3.5 is a bug fix release which fixes bugs found since the 23.3.4 release.
+
+Mesa 23.3.5 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is **only** available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+Mesa 23.3.5 implements the Vulkan 1.3 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+
+SHA256 checksum
+---------------
+
+::
+
+ TBD.
+
+
+New features
+------------
+
+- None
+
+
+Bug fixes
+---------
+
+- [radeonsi] Regression: graphical artifacting on water texture in OpenGOAL
+- VAAPI: EFC on VCN2 produces broken H264 video and crashes the HEVC encoder
+
+
+Changes
+-------
+
+Antoine Coutant (1):
+
+- clc: retrieve libclang path at runtime.
+
+Daniel Schürmann (1):
+
+- aco/insert_exec_mask: Fix unconditional demote at top-level control flow.
+
+David Heidelberg (1):
+
+- ci/freedreno: timestamp-get no longer fails on Adreno
+
+Dmitry Baryshkov (1):
+
+- freedreno/drm: don't crash for unsupported devices
+
+Eric Engestrom (8):
+
+- docs: add sha256sum for 23.3.4
+- .pick_status.json: Update to b75ee1a0670a3207dfd99917e4f47d064a44197f
+- .pick_status.json: Update to 4cd5b2b5426e8d670fc3657eee040a79e3f9df1e
+- util: rename __check_suid() to __normal_user()
+- tree-wide: use __normal_user() everywhere instead of writing the check manually
+- util: simplify logic in __normal_user()
+- util: check for setgid() as well in __normal_user()
+- .pick_status.json: Mark 321e2cee5315e94c050f8659a8cd55e0e7cd9076 as denominated
+
+Faith Ekstrand (1):
+
+- nvk: Don't exnore ExternalImageFormatInfo
+
+Friedrich Vock (7):
+
+- util: Provide a secure_getenv fallback for platforms without it
+- aux/trace: Guard triggers behind __normal_user
+- mesa/main: Use secure_getenv for shader dumping
+- radv: Use secure_getenv in radv_builtin_cache_path
+- radv: Use secure_getenv for RADV_THREAD_TRACE_TRIGGER
+- util/disk_cache: Use secure_getenv to determine cache directories
+- vulkan: Use secure_getenv for trigger files
+
+Gert Wollny (5):
+
+- r600: lower dround_even also on hardware that supports fp64
+- virgl: Use better reporting for mirror_clamp features
+- radv: Fix compilation with gcc-13 and tsan enabled
+- nir/lower_int64: Fix compilation with gcc-13 and tsan enabled
+- nir/builder: Fix compilation with gcc-13 when tsan is enabled
+
+Haihao Xiang (1):
+
+- anv: Fix typo in transition_color_buffer
+
+Hyunjun Ko (1):
+
+- anv/video: fix out-of-bounds read
+
+Iago Toral Quiroga (3):
+
+- broadcom/compiler: fix incorrect flags setup in non-uniform if path
+- broadcom/compiler: fix incorrect flags update for subgroup elect
+- broadcom/compiler: be more careful with unifa in non-uniform control flow
+
+Karol Herbst (1):
+
+- clc: force fPIC for every user when using shared LLVM
+
+Lionel Landwerlin (2):
+
+- anv: don't prevent L1 untyped cache flush in 3D mode
+- anv: fix transfer barriers flushes with compute queue
+
+Louis-Francis Ratté-Boulianne (4):
+
+- panfrost: factor out method to check whether we can discard resource
+- panfrost: add copy_resource flag to pan_resource_modifier_convert
+- panfrost: add can_discard flag to pan_legalize_afbc_format
+- panfrost: Legalize before updating part of a AFBC-packed texture
+
+Mike Blumenkrantz (3):
+
+- zink: set more dynamic states when using shader objects
+- zink: always map descriptor buffers as COHERENT
+- zink: fix descriptor buffer unmaps on screen destroy
+
+Pierre-Eric Pelloux-Prayer (1):
+
+- radeonsi: emit cache flushes before draw registers
+
+Rhys Perry (1):
+
+- aco: fix labelling of s_not with constant
+
+Rob Clark (3):
+
+- freedreno: De-duplicate 19.2MHz RBBM tick conversion
+- freedreno: Fix timestamp conversion
+- freedreno: Implement PIPE_CAP_TIMER_RESOLUTION
+
+Rohan Garg (1):
+
+- anv: untyped data port flush required when a pipeline sets the VK_ACCESS_2_SHADER_STORAGE_READ_BIT
+
+Sebastian Wick (1):
+
+- radeonsi: Destroy queues before the aux contexts
+
+Tapani Pälli (1):
+
+- anv: move \*bits_for_access_flags to genX_cmd_buffer
+
+Thong Thai (1):
+
+- radeonsi/vcn: remove EFC support for renoir
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/meson.build
^
|
@@ -796,6 +796,7 @@
endif
add_languages('rust', required: true)
+ rustc = meson.get_compiler('rust')
with_clc = true
endif
@@ -885,9 +886,12 @@
has_mako = run_command(
prog_python, '-c',
'''
-from distutils.version import StrictVersion
+try:
+ from packaging.version import Version
+except:
+ from distutils.version import StrictVersion as Version
import mako
-assert StrictVersion(mako.__version__) >= StrictVersion("0.8.0")
+assert Version(mako.__version__) >= Version("0.8.0")
''', check: false)
if has_mako.returncode() != 0
error('Python (3.x) mako module >= 0.8.0 required to build mesa.')
@@ -1333,6 +1337,7 @@
'getrandom': '',
'qsort_s': '',
'posix_fallocate': '',
+ 'secure_getenv': '',
}
foreach f, prefix: functions_to_detect
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-navi10-aco-fails.txt
^
|
@@ -1,3 +1,2 @@
# New CTS failures in 1.3.7.0
dEQP-VK.api.version_check.unavailable_entry_points,Fail
-dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.partial_binding_depth_stencil,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-navi21-llvm-fails.txt
^
|
@@ -3,8 +3,6 @@
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_frag,Fail
dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_denorm_flush_to_zero_vert,Fail
-dEQP-VK.draw.renderpass.shader_invocation.helper_invocation,Fail
-
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r32g32b32a32_sfloat.samples_8,Fail
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r8g8b8a8_unorm.samples_8,Fail
dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_4.r32g32b32a32_sfloat.samples_8,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-polaris10-aco-fails.txt
^
|
@@ -20,4 +20,3 @@
# New CTS failures in 1.3.7.0.
dEQP-VK.api.version_check.unavailable_entry_points,Fail
-dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.partial_binding_depth_stencil,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/ci/radv-renoir-aco-fails.txt
^
|
@@ -1,3 +1,2 @@
# New CTS failures in 1.3.7.0.
dEQP-VK.api.version_check.unavailable_entry_points,Fail
-dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.partial_binding_depth_stencil,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.c
^
|
@@ -1253,6 +1253,11 @@
info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND ||
info->family == CHIP_TONGA;
+ /* GFX7 CP requires 32 bytes alignment for the indirect buffer arguments on
+ * the compute queue.
+ */
+ info->has_async_compute_align32_bug = info->gfx_level == GFX7;
+
/* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
* feature version wasn't bumped.
*/
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_gpu_info.h
^
|
@@ -102,6 +102,7 @@
bool has_vgt_flush_ngg_legacy_bug;
bool has_cs_regalloc_hang_bug;
bool has_async_compute_threadgroup_bug;
+ bool has_async_compute_align32_bug;
bool has_32bit_predication;
bool has_3d_cube_border_color_mipmap;
bool has_image_opcodes;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_rgp.c
^
|
@@ -79,9 +79,9 @@
struct sqtt_file_header_flags {
union {
struct {
- int32_t is_semaphore_queue_timing_etw : 1;
- int32_t no_queue_semaphore_timestamps : 1;
- int32_t reserved : 30;
+ uint32_t is_semaphore_queue_timing_etw : 1;
+ uint32_t no_queue_semaphore_timestamps : 1;
+ uint32_t reserved : 30;
};
uint32_t value;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/common/ac_surface.c
^
|
@@ -1869,20 +1869,18 @@
util_next_power_of_two(LINEAR_PITCH_ALIGNMENT / surf->bpe);
if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
- surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) {
- /* Adjust surf_pitch to be in elements units not in pixels */
+ surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR &&
+ in->numMipLevels == 1) {
+ /* Divide surf_pitch (= pitch in pixels) by blk_w to get a
+ * pitch in elements instead because that's what the hardware needs
+ * in resource descriptors.
+ * See the comment in si_descriptors.c.
+ */
surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w,
linear_alignment);
- surf->u.gfx9.epitch =
- MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);
- /* The surface is really a surf->bpe bytes per pixel surface even if we
- * use it as a surf->bpe bytes per element one.
- * Adjust surf_slice_size and surf_size to reflect the change
- * made to surf_pitch.
- */
- surf->u.gfx9.surf_slice_size =
- MAX2(surf->u.gfx9.surf_slice_size,
- (uint64_t)surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
+ surf->u.gfx9.epitch = surf->u.gfx9.surf_pitch - 1;
+ /* Adjust surf_slice_size and surf_size to reflect the change made to surf_pitch. */
+ surf->u.gfx9.surf_slice_size = (uint64_t)surf->u.gfx9.surf_pitch * out.height * surf->bpe;
surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
for (unsigned i = 0; i < in->numMipLevels; i++) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_insert_exec_mask.cpp
^
|
@@ -555,33 +555,32 @@
(ctx.info[block->index].exec[0].second & mask_type_global));
int num;
- Temp cond, exit_cond;
- if (instr->operands[0].isConstant()) {
+ Operand src;
+ Temp exit_cond;
+ if (instr->operands[0].isConstant() && !(block->kind & block_kind_top_level)) {
assert(instr->operands[0].constantValue() == -1u);
/* transition to exact and set exec to zero */
exit_cond = bld.tmp(s1);
- cond =
- bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)),
- Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
+ src = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)),
+ Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
num = ctx.info[block->index].exec.size() - 2;
if (!(ctx.info[block->index].exec.back().second & mask_type_exact)) {
- ctx.info[block->index].exec.back().first = Operand(cond);
+ ctx.info[block->index].exec.back().first = src;
ctx.info[block->index].exec.emplace_back(Operand(bld.lm), mask_type_exact);
}
} else {
/* demote_if: transition to exact */
if (block->kind & block_kind_top_level && ctx.info[block->index].exec.size() == 2 &&
ctx.info[block->index].exec.back().second & mask_type_global) {
- /* We don't need to actually copy anything into exact, since the s_andn2
+ /* We don't need to actually copy anything into exec, since the s_andn2
* instructions later will do that.
*/
ctx.info[block->index].exec.pop_back();
} else {
transition_to_Exact(ctx, bld, block->index);
}
- assert(instr->operands[0].isTemp());
- cond = instr->operands[0].getTemp();
+ src = instr->operands[0];
num = ctx.info[block->index].exec.size() - 1;
}
@@ -589,7 +588,7 @@
if (ctx.info[block->index].exec[i].second & mask_type_exact) {
Instruction* andn2 =
bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
- get_exec_op(ctx.info[block->index].exec[i].first), cond);
+ get_exec_op(ctx.info[block->index].exec[i].first), src);
if (i == (int)ctx.info[block->index].exec.size() - 1)
andn2->definitions[0] = Definition(exec, bld.lm);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_optimizer.cpp
^
|
@@ -1440,7 +1440,7 @@
instr->operands[i].setTemp(info.temp);
} else if (info.is_neg() && can_use_mod && mod_bitsize_compat &&
can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) {
- if (!instr->isDPP() && !instr->isSDWA())
+ if (!instr->isDPP16() && can_use_VOP3(ctx, instr))
instr->format = asVOP3(instr->format);
instr->operands[i].setTemp(info.temp);
if (!instr->valu().abs[i])
@@ -1448,7 +1448,7 @@
}
if (info.is_abs() && can_use_mod && mod_bitsize_compat &&
can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) {
- if (!instr->isDPP() && !instr->isSDWA())
+ if (!instr->isDPP16() && can_use_VOP3(ctx, instr))
instr->format = asVOP3(instr->format);
instr->operands[i] = Operand(info.temp);
instr->valu().abs[i] = true;
@@ -2003,7 +2003,8 @@
break;
case aco_opcode::s_not_b32:
case aco_opcode::s_not_b64:
- if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
+ if (!instr->operands[0].isTemp()) {
+ } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
ctx.info[instr->definitions[1].tempId()].set_scc_invert(
ctx.info[instr->operands[0].tempId()].temp);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_scheduler.cpp
^
|
@@ -1087,6 +1087,9 @@
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
Instruction* current = block->instructions[idx].get();
+ if (current->opcode == aco_opcode::p_logical_end)
+ break;
+
if (block->kind & block_kind_export_end && current->isEXP() && ctx.schedule_pos_exports) {
unsigned target = current->exp().dest;
if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/aco_spill.cpp
^
|
@@ -1938,7 +1938,7 @@
/* calculate extra VGPRs required for spilling SGPRs */
if (demand.sgpr > sgpr_limit) {
unsigned sgpr_spills = demand.sgpr - sgpr_limit;
- extra_vgprs = DIV_ROUND_UP(sgpr_spills, program->wave_size) + 1;
+ extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
}
/* add extra SGPRs required for spilling VGPRs */
if (demand.vgpr + extra_vgprs > vgpr_limit) {
@@ -1949,7 +1949,7 @@
if (demand.sgpr + extra_sgprs > sgpr_limit) {
/* re-calculate in case something has changed */
unsigned sgpr_spills = demand.sgpr + extra_sgprs - sgpr_limit;
- extra_vgprs = DIV_ROUND_UP(sgpr_spills, program->wave_size) + 1;
+ extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
}
}
/* the spiller has to target the following register demand */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/compiler/tests/glsl_scraper.py
^
|
@@ -28,16 +28,16 @@
}
base_layout_qualifier_id_re = r'({0}\s*=\s*(?P<{0}>\d+))'
-id_re = '(?P<name_%d>[^(gl_)]\w+)'
-type_re = '(?P<dtype_%d>\w+)'
+id_re = r'(?P<name_%d>[^(gl_)]\w+)'
+type_re = r'(?P<dtype_%d>\w+)'
location_re = base_layout_qualifier_id_re.format('location')
component_re = base_layout_qualifier_id_re.format('component')
binding_re = base_layout_qualifier_id_re.format('binding')
set_re = base_layout_qualifier_id_re.format('set')
unk_re = r'\w+(=\d+)?'
layout_qualifier_re = r'layout\W*\((%s)+\)' % '|'.join([location_re, binding_re, set_re, unk_re, '[, ]+'])
-ubo_decl_re = 'uniform\W+%s(\W*{)?(?P<type_ubo>)' % (id_re%0)
-ssbo_decl_re = 'buffer\W+%s(\W*{)?(?P<type_ssbo>)' % (id_re%1)
+ubo_decl_re = r'uniform\W+%s(\W*{)?(?P<type_ubo>)' % (id_re%0)
+ssbo_decl_re = r'buffer\W+%s(\W*{)?(?P<type_ssbo>)' % (id_re%1)
image_buffer_decl_re = r'uniform\W+imageBuffer\w+%s;(?P<type_img_buf>)' % (id_re%2)
image_decl_re = r'uniform\W+image\w+\W+%s;(?P<type_img>)' % (id_re%3)
texture_buffer_decl_re = r'uniform\W+textureBuffer\w+%s;(?P<type_tex_buf>)' % (id_re%4)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/llvm/ac_llvm_build.c
^
|
@@ -2989,7 +2989,7 @@
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
-static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
+static LLVMValueRef ac_build_mode(struct ac_llvm_context *ctx, LLVMValueRef src, const char *mode)
{
LLVMTypeRef src_type = LLVMTypeOf(src);
unsigned bitsize = ac_get_elem_bits(ctx, src_type);
@@ -3002,7 +3002,7 @@
src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.%s", mode, type);
ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, 0);
if (bitsize < 32)
@@ -3011,6 +3011,16 @@
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
+static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
+{
+ return ac_build_mode(ctx, src, "wwm");
+}
+
+LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src)
+{
+ return ac_build_mode(ctx, src, "wqm");
+}
+
static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
LLVMValueRef inactive)
{
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/llvm/ac_llvm_build.h
^
|
@@ -469,6 +469,8 @@
LLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src);
LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
+LLVMValueRef ac_build_wqm(struct ac_llvm_context *ctx, LLVMValueRef src);
+
LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/llvm/ac_nir_to_llvm.c
^
|
@@ -3415,21 +3415,26 @@
case nir_intrinsic_quad_broadcast: {
unsigned lane = nir_src_as_uint(instr->src[1]);
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
+ result = ac_build_wqm(&ctx->ac, result);
break;
}
case nir_intrinsic_quad_swap_horizontal:
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
+ result = ac_build_wqm(&ctx->ac, result);
break;
case nir_intrinsic_quad_swap_vertical:
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
+ result = ac_build_wqm(&ctx->ac, result);
break;
case nir_intrinsic_quad_swap_diagonal:
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
+ result = ac_build_wqm(&ctx->ac, result);
break;
case nir_intrinsic_quad_swizzle_amd: {
uint32_t mask = nir_intrinsic_swizzle_mask(instr);
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
(mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
+ result = ac_build_wqm(&ctx->ac, result);
break;
}
case nir_intrinsic_masked_swizzle_amd: {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/build_helpers.h
^
|
@@ -156,6 +156,7 @@
#define VK_GEOMETRY_TYPE_TRIANGLES_KHR 0
#define VK_GEOMETRY_TYPE_AABBS_KHR 1
+#define VK_GEOMETRY_TYPE_INSTANCES_KHR 2
#define VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR 1
#define VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR 2
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/leaf.comp
^
|
@@ -333,6 +333,14 @@
is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
}
+#if ALWAYS_ACTIVE
+ if (!is_active && args.geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
+ bounds.min = vec3(0.0);
+ bounds.max = vec3(0.0);
+ is_active = true;
+ }
+#endif
+
DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : RADV_BVH_INVALID_NODE;
uvec4 ballot = subgroupBallot(is_active);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/meson.build
^
|
@@ -53,7 +53,12 @@
[
'leaf.comp',
'leaf',
- [],
+ ['ALWAYS_ACTIVE=0'],
+ ],
+ [
+ 'leaf.comp',
+ 'leaf_always_active',
+ ['ALWAYS_ACTIVE=1'],
],
[
'morton.comp',
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/bvh/ploc_internal.comp
^
|
@@ -249,7 +249,8 @@
total_bounds.min = vec3(INFINITY);
total_bounds.max = vec3(-INFINITY);
- for (uint32_t i = 0; i < DEREF(args.header).active_leaf_count; i++) {
+ uint32_t i = 0;
+ for (; i < DEREF(args.header).active_leaf_count; i++) {
uint32_t child_id = DEREF(INDEX(key_id_pair, src_ids, i)).id;
if (child_id != RADV_BVH_INVALID_NODE) {
@@ -263,6 +264,8 @@
DEREF(dst_node).children[i] = child_id;
}
+ for (; i < 2; i++)
+ DEREF(dst_node).children[i] = RADV_BVH_INVALID_NODE;
DEREF(dst_node).base.aabb = total_bounds;
DEREF(dst_node).bvh_offset = RADV_UNKNOWN_BVH_OFFSET;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/meta/radv_meta.c
^
|
@@ -296,7 +296,7 @@
static bool
radv_builtin_cache_path(char *path)
{
- char *xdg_cache_home = getenv("XDG_CACHE_HOME");
+ char *xdg_cache_home = secure_getenv("XDG_CACHE_HOME");
const char *suffix = "/radv_builtin_shaders";
const char *suffix2 = "/.cache/radv_builtin_shaders";
struct passwd pwd, *result;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/nir/radv_nir_lower_cooperative_matrix.c
^
|
@@ -181,7 +181,7 @@
nir_def *elem = intr->src[1].ssa;
nir_def *r = nir_vector_insert(&b, src1, elem, index);
- nir_store_deref(&b, dst_deref, r, 0xffff);
+ nir_store_deref(&b, dst_deref, r, nir_component_mask(r->num_components));
nir_instr_remove(instr);
progress = true;
break;
@@ -193,7 +193,7 @@
nir_def *r = nir_replicate(&b, elem, radv_nir_cmat_length(desc, wave_size));
- nir_store_deref(&b, dst_deref, r, 0xffff);
+ nir_store_deref(&b, dst_deref, r, nir_component_mask(r->num_components));
nir_instr_remove(instr);
progress = true;
break;
@@ -253,7 +253,7 @@
}
nir_def *mat = nir_vec(&b, vars, length);
- nir_store_deref(&b, dst_deref, mat, 0xffff);
+ nir_store_deref(&b, dst_deref, mat, nir_component_mask(mat->num_components));
nir_instr_remove(instr);
progress = true;
break;
@@ -332,7 +332,8 @@
ret = nir_cmat_muladd_amd(&b, A, B, C, .saturate = nir_intrinsic_saturate(intr),
.cmat_signed_mask = nir_intrinsic_cmat_signed_mask(intr));
- nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, 0xffff);
+ nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret,
+ nir_component_mask(ret->num_components));
nir_instr_remove(instr);
progress = true;
break;
@@ -366,7 +367,7 @@
ret = nir_vec(&b, components, ret->num_components * 2);
}
- nir_store_deref(&b, dst_deref, ret, 0xffff);
+ nir_store_deref(&b, dst_deref, ret, nir_component_mask(ret->num_components));
nir_instr_remove(instr);
progress = true;
break;
@@ -375,7 +376,8 @@
nir_def *src1 = radv_nir_load_cmat(&b, wave_size, intr->src[1].ssa);
nir_op op = nir_intrinsic_alu_op(intr);
nir_def *ret = nir_build_alu2(&b, op, src1, intr->src[2].ssa);
- nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, 0xffff);
+ nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret,
+ nir_component_mask(ret->num_components));
nir_instr_remove(instr);
progress = true;
break;
@@ -385,14 +387,16 @@
nir_def *src2 = radv_nir_load_cmat(&b, wave_size, intr->src[2].ssa);
nir_op op = nir_intrinsic_alu_op(intr);
nir_def *ret = nir_build_alu2(&b, op, src1, src2);
- nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret, 0xffff);
+ nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), ret,
+ nir_component_mask(ret->num_components));
nir_instr_remove(instr);
progress = true;
break;
}
case nir_intrinsic_cmat_bitcast: {
nir_def *src1 = radv_nir_load_cmat(&b, wave_size, intr->src[1].ssa);
- nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), src1, 0xffff);
+ nir_store_deref(&b, nir_instr_as_deref(intr->src[0].ssa->parent_instr), src1,
+ nir_component_mask(src1->num_components));
nir_instr_remove(instr);
progress = true;
break;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_acceleration_structure.c
^
|
@@ -41,6 +41,10 @@
#include "bvh/leaf.spv.h"
};
+static const uint32_t leaf_always_active_spv[] = {
+#include "bvh/leaf_always_active.spv.h"
+};
+
static const uint32_t morton_spv[] = {
#include "bvh/morton.spv.h"
};
@@ -513,9 +517,14 @@
if (device->meta_state.accel_struct_build.radix_sort)
goto exit;
- result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
- &device->meta_state.accel_struct_build.leaf_pipeline,
- &device->meta_state.accel_struct_build.leaf_p_layout);
+ if (device->instance->force_active_accel_struct_leaves)
+ result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv),
+ sizeof(struct leaf_args), &device->meta_state.accel_struct_build.leaf_pipeline,
+ &device->meta_state.accel_struct_build.leaf_p_layout);
+ else
+ result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
+ &device->meta_state.accel_struct_build.leaf_pipeline,
+ &device->meta_state.accel_struct_build.leaf_p_layout);
if (result != VK_SUCCESS)
goto exit;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_cmd_buffer.c
^
|
@@ -2271,17 +2271,19 @@
static void
radv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer)
{
+ const struct radv_rendering_state *render = &cmd_buffer->state.render;
struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ const bool stencil_test_enable =
+ d->vk.ds.stencil.test_enable && (render->ds_att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
- radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL,
- S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) |
- S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) |
- S_028800_ZFUNC(d->vk.ds.depth.compare_op) |
- S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) |
- S_028800_STENCIL_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) |
- S_028800_BACKFACE_ENABLE(d->vk.ds.stencil.test_enable ? 1 : 0) |
- S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) |
- S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare));
+ radeon_set_context_reg(
+ cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL,
+ S_028800_Z_ENABLE(d->vk.ds.depth.test_enable ? 1 : 0) |
+ S_028800_Z_WRITE_ENABLE(d->vk.ds.depth.write_enable ? 1 : 0) | S_028800_ZFUNC(d->vk.ds.depth.compare_op) |
+ S_028800_DEPTH_BOUNDS_ENABLE(d->vk.ds.depth.bounds_test.enable ? 1 : 0) |
+ S_028800_STENCIL_ENABLE(stencil_test_enable) | S_028800_BACKFACE_ENABLE(stencil_test_enable) |
+ S_028800_STENCILFUNC(d->vk.ds.stencil.front.op.compare) |
+ S_028800_STENCILFUNC_BF(d->vk.ds.stencil.back.op.compare));
}
static void
@@ -5861,6 +5863,11 @@
render->ds_att.format = inheritance_info->depthAttachmentFormat;
if (inheritance_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
render->ds_att.format = inheritance_info->stencilAttachmentFormat;
+
+ if (vk_format_has_depth(render->ds_att.format))
+ render->ds_att_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ if (vk_format_has_stencil(render->ds_att.format))
+ render->ds_att_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
cmd_buffer->state.inherited_pipeline_statistics = pBeginInfo->pInheritanceInfo->pipelineStatistics;
@@ -7716,6 +7723,7 @@
}
struct radv_attachment ds_att = {.iview = NULL};
+ VkImageAspectFlags ds_att_aspects = 0;
const VkRenderingAttachmentInfo *d_att_info = pRenderingInfo->pDepthAttachment;
const VkRenderingAttachmentInfo *s_att_info = pRenderingInfo->pStencilAttachment;
if ((d_att_info != NULL && d_att_info->imageView != VK_NULL_HANDLE) ||
@@ -7751,7 +7759,16 @@
assert(d_iview == NULL || s_iview == NULL || d_iview == s_iview);
ds_att.iview = d_iview ? d_iview : s_iview, ds_att.format = ds_att.iview->vk.format;
- radv_initialise_ds_surface(cmd_buffer->device, &ds_att.ds, ds_att.iview);
+
+ if (d_iview && s_iview) {
+ ds_att_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+ } else if (d_iview) {
+ ds_att_aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
+ } else {
+ ds_att_aspects = VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+
+ radv_initialise_ds_surface(cmd_buffer->device, &ds_att.ds, ds_att.iview, ds_att_aspects);
assert(d_res_iview == NULL || s_res_iview == NULL || d_res_iview == s_res_iview);
ds_att.resolve_iview = d_res_iview ? d_res_iview : s_res_iview;
@@ -7800,6 +7817,7 @@
render->color_att_count = pRenderingInfo->colorAttachmentCount;
typed_memcpy(render->color_att, color_att, render->color_att_count);
render->ds_att = ds_att;
+ render->ds_att_aspects = ds_att_aspects;
render->vrs_att = vrs_att;
render->vrs_texel_size = vrs_texel_size;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
@@ -7807,7 +7825,7 @@
if (cmd_buffer->device->physical_device->rad_info.rbplus_allowed)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
if (render->vrs_att.iview && cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10_3) {
if (render->ds_att.iview) {
@@ -8068,18 +8086,15 @@
uint32_t draw_id_enable = !!cmd_buffer->state.uses_drawid;
uint32_t mode1_enable = !cmd_buffer->device->mesh_fast_launch_2;
- const bool sqtt_en = !!cmd_buffer->device->sqtt.bo;
radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1));
radeon_emit(cs, 0); /* data_offset */
radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) | S_4C1_DRAW_INDEX_REG(draw_id_reg));
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11)
radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) |
- S_4C2_XYZ_DIM_ENABLE(xyz_dim_enable) | S_4C2_MODE1_ENABLE(mode1_enable) |
- S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en));
+ S_4C2_XYZ_DIM_ENABLE(xyz_dim_enable) | S_4C2_MODE1_ENABLE(mode1_enable));
else
- radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) |
- S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en));
+ radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va));
radeon_emit(cs, draw_count);
radeon_emit(cs, count_va & 0xFFFFFFFF);
radeon_emit(cs, count_va >> 32);
@@ -9693,11 +9708,39 @@
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ uint64_t indirect_va = info->va;
+
radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
&cmd_buffer->mec_inv_pred_emitted, 4 /* DISPATCH_INDIRECT size */);
+
+ if (cmd_buffer->device->physical_device->rad_info.has_async_compute_align32_bug &&
+ cmd_buffer->qf == RADV_QUEUE_COMPUTE && !radv_is_aligned(indirect_va, 32)) {
+ const uint64_t unaligned_va = indirect_va;
+ UNUSED void *ptr;
+ uint32_t offset;
+
+ if (!radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, sizeof(VkDispatchIndirectCommand), 32, &offset, &ptr))
+ return;
+
+ indirect_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
+
+ for (uint32_t i = 0; i < 3; i++) {
+ const uint64_t src_va = unaligned_va + i * 4;
+ const uint64_t dst_va = indirect_va + i * 4;
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, src_va);
+ radeon_emit(cs, src_va >> 32);
+ radeon_emit(cs, dst_va);
+ radeon_emit(cs, dst_va >> 32);
+ }
+ }
+
radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, info->va);
- radeon_emit(cs, info->va >> 32);
+ radeon_emit(cs, indirect_va);
+ radeon_emit(cs, indirect_va >> 32);
radeon_emit(cs, dispatch_initiator);
} else {
radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1));
@@ -10592,7 +10635,10 @@
}
radv_gang_barrier(cmd_buffer, 0, dst_stage_mask);
- radv_cp_dma_wait_for_stages(cmd_buffer, src_stage_mask);
+
+ const bool is_gfx_or_ace = cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE;
+ if (is_gfx_or_ace)
+ radv_cp_dma_wait_for_stages(cmd_buffer, src_stage_mask);
cmd_buffer->state.flush_bits |= dst_flush_bits;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_device.c
^
|
@@ -1842,7 +1842,7 @@
void
radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
- struct radv_image_view *iview)
+ struct radv_image_view *iview, VkImageAspectFlags ds_aspects)
{
unsigned level = iview->vk.base_mip_level;
unsigned format, stencil_format;
@@ -1859,7 +1859,9 @@
stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
- ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice);
+ ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) |
+ S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) |
+ S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT));
if (device->physical_device->rad_info.gfx_level >= GFX10) {
ds->db_depth_view |=
S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_image.c
^
|
@@ -530,7 +530,7 @@
image_info->surf_index = NULL;
}
- if (create_info->prime_blit_src && device->physical_device->rad_info.gfx_level == GFX9) {
+ if (create_info->prime_blit_src && !device->physical_device->rad_info.sdma_supports_compression) {
/* Older SDMA hw can't handle DCC */
image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_instance.c
^
|
@@ -153,6 +153,8 @@
DRI_CONF_RADV_FLUSH_BEFORE_TIMESTAMP_WRITE(false)
DRI_CONF_RADV_RT_WAVE64(false)
DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
+ DRI_CONF_RADV_SSBO_NON_UNIFORM(false)
+ DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(false)
DRI_CONF_RADV_APP_LAYER()
DRI_CONF_SECTION_END
};
@@ -203,6 +205,8 @@
instance->tex_non_uniform = driQueryOptionb(&instance->dri_options, "radv_tex_non_uniform");
+ instance->ssbo_non_uniform = driQueryOptionb(&instance->dri_options, "radv_ssbo_non_uniform");
+
instance->app_layer = driQueryOptionstr(&instance->dri_options, "radv_app_layer");
instance->flush_before_timestamp_write =
@@ -211,6 +215,9 @@
instance->force_rt_wave64 = driQueryOptionb(&instance->dri_options, "radv_rt_wave64");
instance->dual_color_blend_by_location = driQueryOptionb(&instance->dri_options, "dual_color_blend_by_location");
+
+ instance->force_active_accel_struct_leaves =
+ driQueryOptionb(&instance->dri_options, "radv_force_active_accel_struct_leaves");
}
static const struct vk_instance_extension_table radv_instance_extensions_supported = {
@@ -253,7 +260,7 @@
static void
radv_handle_legacy_sqtt_trigger(struct vk_instance *instance)
{
- char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
+ char *trigger_file = secure_getenv("RADV_THREAD_TRACE_TRIGGER");
if (trigger_file) {
instance->trace_trigger_file = trigger_file;
instance->trace_mode |= RADV_TRACE_MODE_RGP;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_pipeline.c
^
|
@@ -162,6 +162,7 @@
key.image_2d_view_of_3d = device->image_2d_view_of_3d && device->physical_device->rad_info.gfx_level == GFX9;
key.tex_non_uniform = device->instance->tex_non_uniform;
+ key.ssbo_non_uniform = device->instance->ssbo_non_uniform;
for (unsigned i = 0; i < num_stages; ++i) {
const VkPipelineShaderStageCreateInfo *const stage = &stages[i];
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_pipeline_rt.c
^
|
@@ -414,6 +414,10 @@
temp_stage.nir = shaders[i];
radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device,
pipeline, monolithic);
+
+ /* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */
+ nir_shader_gather_info(temp_stage.nir, nir_shader_get_entrypoint(temp_stage.nir));
+
radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled);
radv_postprocess_nir(device, pipeline_key, &temp_stage);
@@ -609,6 +613,7 @@
radv_shader_layout_init(pipeline_layout, MESA_SHADER_INTERSECTION, &traversal_stage.layout);
result = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, false, &traversal_stage, NULL, NULL,
&pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
+ ralloc_free(traversal_module.nir);
cleanup:
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_private.h
^
|
@@ -416,9 +416,11 @@
bool flush_before_query_copy;
bool enable_unified_heap_on_apu;
bool tex_non_uniform;
+ bool ssbo_non_uniform;
bool flush_before_timestamp_write;
bool force_rt_wave64;
bool dual_color_blend_by_location;
+ bool force_active_accel_struct_leaves;
char *app_layer;
};
@@ -1526,7 +1528,7 @@
void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
struct radv_image_view *iview);
void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
- struct radv_image_view *iview);
+ struct radv_image_view *iview, VkImageAspectFlags ds_aspects);
void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
struct radv_ds_buffer_info *ds);
@@ -1568,6 +1570,7 @@
uint32_t color_att_count;
struct radv_attachment color_att[MAX_RTS];
struct radv_attachment ds_att;
+ VkImageAspectFlags ds_att_aspects;
struct radv_attachment vrs_att;
VkExtent2D vrs_texel_size;
};
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_queue.c
^
|
@@ -1641,8 +1641,11 @@
}
queue->device->ws->cs_unchain(cmd_buffer->cs);
- if (!chainable || !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs))
- cs_array[num_submitted_cs++] = cmd_buffer->cs;
+ if (!chainable || !queue->device->ws->cs_chain(chainable, cmd_buffer->cs, queue->state.uses_shadow_regs)) {
+ /* don't submit empty command buffers to the kernel. */
+ if (radv_queue_ring(queue) != AMD_IP_VCN_ENC || cmd_buffer->cs->cdw != 0)
+ cs_array[num_submitted_cs++] = cmd_buffer->cs;
+ }
chainable = can_chain_next ? cmd_buffer->cs : NULL;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_sampler.c
^
|
@@ -47,6 +47,7 @@
unreachable("illegal tex wrap mode");
break;
}
+ return 0;
}
static unsigned
@@ -73,6 +74,7 @@
unreachable("illegal compare mode");
break;
}
+ return 0;
}
static unsigned
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.c
^
|
@@ -93,7 +93,6 @@
.lower_ffma64 = split_fma,
.lower_fpow = true,
.lower_mul_2x32_64 = true,
- .lower_rotate = true,
.lower_iadd_sat = device->rad_info.gfx_level <= GFX8,
.lower_hadd = true,
.lower_mul_32x16 = true,
@@ -461,6 +460,7 @@
.private_data = &spirv_debug_data,
},
.force_tex_non_uniform = key->tex_non_uniform,
+ .force_ssbo_non_uniform = key->ssbo_non_uniform,
};
nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint,
&spirv_options, &device->physical_device->nir_options[stage->stage]);
@@ -1328,14 +1328,14 @@
if (!hole->freelist.prev)
continue;
- if (hole->offset + hole->size < src->offset)
- continue;
-
uint32_t hole_begin = hole->offset;
uint32_t hole_end = hole->offset + hole->size;
+ if (hole_end < block_end)
+ continue;
+
/* If another allocated block overlaps the current replay block, allocation is impossible */
- if (block_begin > hole_begin || (hole_end < block_end && hole_end >= block_begin))
+ if (hole_begin > block_begin)
return NULL;
union radv_shader_arena_block *block = insert_block(device, hole, block_begin - hole_begin, src->size, NULL);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_shader.h
^
|
@@ -95,6 +95,7 @@
uint32_t dynamic_provoking_vtx_mode : 1;
uint32_t dynamic_line_rast_mode : 1;
uint32_t tex_non_uniform : 1;
+ uint32_t ssbo_non_uniform : 1;
uint32_t enable_remove_point_size : 1;
uint32_t unknown_rast_prim : 1;
uint32_t mesh_shader_queries : 1;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/radv_video.c
^
|
@@ -112,6 +112,21 @@
*sq->ib_checksum = checksum;
}
+static void
+radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
+{
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256);
+ radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
+ rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+ ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
+ cmd_buffer->cs->cdw++;
+ ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
+ cmd_buffer->cs->cdw++;
+ cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+ cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
+ memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
+}
+
/* generate an stream handle */
static unsigned
si_vid_alloc_stream_handle(struct radv_physical_device *pdevice)
@@ -1668,19 +1683,6 @@
cmd_buffer->video.vid = vid;
cmd_buffer->video.params = params;
-
- if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) {
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256);
- radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
- rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
- ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
- cmd_buffer->cs->cdw++;
- ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
- cmd_buffer->cs->cdw++;
- cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
- cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
- memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
- }
}
static void
@@ -1693,6 +1695,9 @@
uint32_t out_offset;
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
+ if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
+ radv_vcn_sq_start(cmd_buffer);
+
rvcn_dec_message_create(vid, ptr, size);
send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
@@ -1702,7 +1707,8 @@
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
for (unsigned i = 0; i < 8; i++)
radeon_emit(cmd_buffer->cs, 0x81ff);
- }
+ } else
+ radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
}
static void
@@ -1739,12 +1745,6 @@
VKAPI_ATTR void VKAPI_CALL
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
- if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED)
- return;
-
- radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
}
static void
@@ -1840,6 +1840,9 @@
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
msg_bo = cmd_buffer->upload.upload_bo;
+ if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
+ radv_vcn_sq_start(cmd_buffer);
+
uint32_t slice_offset;
rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_ptr, &slice_offset, frame_info);
rvcn_dec_message_feedback(fb_ptr);
@@ -1869,7 +1872,8 @@
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
- }
+ } else
+ radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
}
VKAPI_ATTR void VKAPI_CALL
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/amd/vulkan/si_cmd_buffer.c
^
|
@@ -1614,12 +1614,6 @@
/* Sync flags. */
if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1);
- else {
- if (device->physical_device->rad_info.gfx_level >= GFX9)
- command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
- else
- command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
- }
if (flags & CP_DMA_RAW_WAIT)
command |= S_415_RAW_WAIT(1);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/asahi/compiler/agx_compile.h
^
|
@@ -275,7 +275,6 @@
.lower_hadd = true,
.vectorize_io = true,
.use_interpolated_input_intrinsics = true,
- .lower_rotate = true,
.has_isub = true,
.support_16bit_alu = true,
.max_unroll_iterations = 32,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/broadcom/compiler/nir_to_vir.c
^
|
@@ -3047,6 +3047,46 @@
c->current_unifa_offset += 4;
}
+/* Checks if the value of a nir src is derived from a nir register */
+static bool
+nir_src_derived_from_reg(nir_src src)
+{
+ nir_def *def = src.ssa;
+ if (nir_load_reg_for_def(def))
+ return true;
+
+ nir_instr *parent = def->parent_instr;
+ switch (parent->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(parent);
+ int num_srcs = nir_op_infos[alu->op].num_inputs;
+ for (int i = 0; i < num_srcs; i++) {
+ if (nir_src_derived_from_reg(alu->src[i].src))
+ return true;
+ }
+ return false;
+ }
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
+ int num_srcs = nir_intrinsic_infos[intr->intrinsic].num_srcs;
+ for (int i = 0; i < num_srcs; i++) {
+ if (nir_src_derived_from_reg(intr->src[i]))
+ return true;
+ }
+ return false;
+ }
+ case nir_instr_type_load_const:
+ case nir_instr_type_undef:
+ return false;
+ default:
+ /* By default we assume it may come from a register, the above
+ * cases should be able to handle the majority of situations
+ * though.
+ */
+ return true;
+ };
+}
+
static bool
ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
@@ -3069,6 +3109,24 @@
if (nir_src_is_divergent(offset))
return false;
+ /* Emitting loads from unifa may not be safe under non-uniform control
+ * flow. It seems the address that is used to write to the unifa
+ * register is taken from the first lane and if that lane is disabled
+ * by control flow then the value we read may be bogus and lead to
+ * invalid memory accesses on follow-up ldunifa instructions. However,
+ * ntq_store_def only emits conditional writes for nir registersas long
+ * we can be certain that the offset isn't derived from a load_reg we
+ * should be fine.
+ *
+ * The following CTS test can be used to trigger the problem, which
+ * causes a GMP violations in the sim without this check:
+ * dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int
+ */
+ if (vir_in_nonuniform_control_flow(c) &&
+ nir_src_derived_from_reg(offset)) {
+ return false;
+ }
+
/* We can only use unifa with SSBOs if they are read-only. Otherwise
* ldunifa won't see the shader writes to that address (possibly
* because ldunifa doesn't read from the L2T cache).
@@ -3243,34 +3301,6 @@
vir_uniform_ui(c, 32 - c->local_invocation_index_bits));
}
-/* Various subgroup operations rely on the A flags, so this helper ensures that
- * A flags represents currently active lanes in the subgroup.
- */
-static void
-set_a_flags_for_subgroup(struct v3d_compile *c)
-{
- /* MSF returns 0 for disabled lanes in compute shaders so
- * PUSHZ will set A=1 for disabled lanes. We want the inverse
- * of this but we don't have any means to negate the A flags
- * directly, but we can do it by repeating the same operation
- * with NORZ (A = ~A & ~Z).
- */
- assert(c->s->info.stage == MESA_SHADER_COMPUTE);
- vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
- vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ);
-
- /* If we are under non-uniform control flow we also need to
- * AND the A flags with the current execute mask.
- */
- if (vir_in_nonuniform_control_flow(c)) {
- const uint32_t bidx = c->cur_block->index;
- vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(),
- c->execute,
- vir_uniform_ui(c, bidx)),
- V3D_QPU_UF_ANDZ);
- }
-}
-
static void
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
@@ -3752,10 +3782,23 @@
break;
case nir_intrinsic_elect: {
- set_a_flags_for_subgroup(c);
- struct qreg first = vir_FLAFIRST(c);
+ struct qreg first;
+ if (vir_in_nonuniform_control_flow(c)) {
+ /* Sets A=1 for lanes enabled in the execution mask */
+ vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
+ V3D_QPU_PF_PUSHZ);
+ /* Updates A ANDing with lanes enabled in MSF */
+ vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()),
+ V3D_QPU_UF_ANDNZ);
+ first = vir_FLAFIRST(c);
+ } else {
+ /* Sets A=1 for inactive lanes */
+ vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()),
+ V3D_QPU_PF_PUSHZ);
+ first = vir_FLNAFIRST(c);
+ }
- /* Produce a boolean result from Flafirst */
+ /* Produce a boolean result */
vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(),
first, vir_uniform_ui(c, 1)),
V3D_QPU_PF_PUSHZ);
@@ -3972,19 +4015,27 @@
c->execute,
vir_uniform_ui(c, else_block->index));
+ /* Set the flags for taking the THEN block */
+ vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
+ V3D_QPU_PF_PUSHZ);
+
/* Jump to ELSE if nothing is active for THEN (unless THEN block is
* so small it won't pay off), otherwise fall through.
*/
bool is_cheap = exec_list_is_singular(&if_stmt->then_list) &&
is_cheap_block(nir_if_first_then_block(if_stmt));
if (!is_cheap) {
- vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ);
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLNA);
vir_link_blocks(c->cur_block, else_block);
}
vir_link_blocks(c->cur_block, then_block);
- /* Process the THEN block. */
+ /* Process the THEN block.
+ *
+ * Notice we don't call ntq_activate_execute_for_block here on purpose:
+ * c->execute is already set up to be 0 for lanes that must take the
+ * THEN block.
+ */
vir_set_emit_block(c, then_block);
ntq_emit_cf_list(c, &if_stmt->then_list);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/broadcom/vulkan/v3dv_pipeline.c
^
|
@@ -228,7 +228,6 @@
.lower_ldexp = true,
.lower_mul_high = true,
.lower_wpos_pntc = false,
- .lower_rotate = true,
.lower_to_scalar = true,
.lower_device_index_to_zero = true,
.lower_fquantize2f16 = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/clc/clc_helpers.cpp
^
|
@@ -23,6 +23,7 @@
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
+#include <cstdlib>
#include <filesystem>
#include <sstream>
#include <mutex>
@@ -57,6 +58,10 @@
#include "spirv.h"
+#if DETECT_OS_UNIX
+#include <dlfcn.h>
+#endif
+
#ifdef USE_STATIC_OPENCL_C_H
#if LLVM_VERSION_MAJOR < 15
#include "opencl-c.h.h"
@@ -876,12 +881,24 @@
#endif
}
#else
+
+ Dl_info info;
+ if (dladdr((void *)clang::CompilerInvocation::CreateFromArgs, &info) == 0) {
+ clc_error(logger, "Couldn't find libclang path.\n");
+ return {};
+ }
+
+ char *clang_path = realpath(info.dli_fname, NULL);
+ if (clang_path == nullptr) {
+ clc_error(logger, "Couldn't find libclang path.\n");
+ return {};
+ }
+
// GetResourcePath is a way to retrive the actual libclang resource dir based on a given binary
- // or library. The path doesn't even need to exist, we just have to put something in there,
- // because we might have linked clang statically.
- auto libclang_path = fs::path(LLVM_LIB_DIR) / "libclang.so";
+ // or library.
auto clang_res_path =
- fs::path(Driver::GetResourcesPath(libclang_path.string(), CLANG_RESOURCE_DIR)) / "include";
+ fs::path(Driver::GetResourcesPath(std::string(clang_path), CLANG_RESOURCE_DIR)) / "include";
+ free(clang_path);
c->getHeaderSearchOpts().UseBuiltinIncludes = true;
c->getHeaderSearchOpts().UseStandardSystemIncludes = true;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/clc/meson.build
^
|
@@ -125,7 +125,13 @@
idep_mesautil, dep_spirv_tools]
)
+_idep_mesaclc_link_args = []
+if _shared_llvm
+ _idep_mesaclc_link_args += cc.get_supported_link_arguments('-fPIC')
+endif
+
idep_mesaclc = declare_dependency(
link_with : _libmesaclc,
include_directories : include_directories('.'),
+ link_args : _idep_mesaclc_link_args,
)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/glsl/gl_nir_link_varyings.c
^
|
@@ -746,7 +746,7 @@
if (!validate_explicit_variable_location(consts,
output_explicit_locations,
var, prog, producer)) {
- return;
+ goto out;
}
}
}
@@ -800,7 +800,7 @@
if (!validate_explicit_variable_location(consts,
input_explicit_locations,
input, prog, consumer)) {
- return;
+ goto out;
}
while (idx < slot_limit) {
@@ -808,7 +808,7 @@
linker_error(prog,
"Invalid location %u in %s shader\n", idx,
_mesa_shader_stage_to_string(consumer->Stage));
- return;
+ goto out;
}
output = output_explicit_locations[idx][input->data.location_frac].var;
@@ -871,6 +871,7 @@
}
}
+ out:
_mesa_symbol_table_dtor(table);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir.h
^
|
@@ -1655,6 +1655,12 @@
nir_def def;
} nir_deref_instr;
+/**
+ * Returns true if the cast is trivial, i.e. the source and destination type is
+ * the same.
+ */
+bool nir_deref_cast_is_trivial(nir_deref_instr *cast);
+
/** Returns true if deref might have one of the given modes
*
* For multi-mode derefs, this returns true if any of the possible modes for
@@ -3789,8 +3795,10 @@
/* Lowers when 32x32->64 bit multiplication is not supported */
bool lower_mul_2x32_64;
- /* Lowers when rotate instruction is not supported */
- bool lower_rotate;
+ /* Indicates that urol and uror are supported */
+ bool has_rotate8;
+ bool has_rotate16;
+ bool has_rotate32;
/** Backend supports ternary addition */
bool has_iadd3;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_builder.h
^
|
@@ -1594,6 +1594,7 @@
default:
unreachable("Invalid deref instruction type");
}
+ return NULL;
}
static inline nir_def *
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_clone.c
^
|
@@ -687,6 +687,32 @@
return nfxn;
}
+static u_printf_info *
+clone_printf_info(void *mem_ctx, const nir_shader *s)
+{
+ u_printf_info *infos = ralloc_array(mem_ctx, u_printf_info, s->printf_info_count);
+
+ for (unsigned i = 0; i < s->printf_info_count; i++) {
+ const u_printf_info *src_info = &s->printf_info[i];
+
+ infos[i].num_args = src_info->num_args;
+ infos[i].arg_sizes = ralloc_size(mem_ctx,
+ sizeof(infos[i].arg_sizes[0]) *
+ src_info->num_args);
+ memcpy(infos[i].arg_sizes, src_info->arg_sizes,
+ sizeof(infos[i].arg_sizes[0]) * src_info->num_args);
+
+
+ infos[i].string_size = src_info->string_size;
+ infos[i].strings = ralloc_size(mem_ctx,
+ src_info->string_size);
+ memcpy(infos[i].strings, src_info->strings,
+ src_info->string_size);
+ }
+
+ return infos;
+}
+
nir_shader *
nir_shader_clone(void *mem_ctx, const nir_shader *s)
{
@@ -734,6 +760,11 @@
memcpy(ns->xfb_info, s->xfb_info, size);
}
+ if (s->printf_info_count > 0) {
+ ns->printf_info = clone_printf_info(ns, s);
+ ns->printf_info_count = s->printf_info_count;
+ }
+
free_clone_state(&state);
return ns;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_deref.c
^
|
@@ -26,9 +26,11 @@
#include "nir.h"
#include "nir_builder.h"
-static bool
-is_trivial_deref_cast(nir_deref_instr *cast)
+bool
+nir_deref_cast_is_trivial(nir_deref_instr *cast)
{
+ assert(cast->deref_type == nir_deref_type_cast);
+
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
if (!parent)
return false;
@@ -57,7 +59,7 @@
*tail = NULL;
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
- if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
+ if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d))
continue;
count++;
if (count <= max_short_path_len)
@@ -80,7 +82,7 @@
head = tail = path->path + count;
*tail = NULL;
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
- if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
+ if (d->deref_type == nir_deref_type_cast && nir_deref_cast_is_trivial(d))
continue;
*(--head) = d;
}
@@ -943,7 +945,7 @@
static bool
is_trivial_array_deref_cast(nir_deref_instr *cast)
{
- assert(is_trivial_deref_cast(cast));
+ assert(nir_deref_cast_is_trivial(cast));
nir_deref_instr *parent = nir_src_as_deref(cast->parent);
@@ -1187,7 +1189,7 @@
return true;
progress |= opt_remove_cast_cast(cast);
- if (!is_trivial_deref_cast(cast))
+ if (!nir_deref_cast_is_trivial(cast))
return progress;
/* If this deref still contains useful alignment information, we don't want
@@ -1239,7 +1241,7 @@
*/
if (parent->deref_type == nir_deref_type_cast &&
parent->cast.align_mul == 0 &&
- is_trivial_deref_cast(parent))
+ nir_deref_cast_is_trivial(parent))
parent = nir_deref_instr_parent(parent);
nir_def_rewrite_uses(&deref->def,
&parent->def);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_divergence_analysis.c
^
|
@@ -616,6 +616,7 @@
case nir_intrinsic_isberd_nv:
case nir_intrinsic_al2p_nv:
case nir_intrinsic_ald_nv:
+ case nir_intrinsic_printf:
is_divergent = true;
break;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_instr_set.c
^
|
@@ -441,7 +441,7 @@
} else {
alu1_actual_src = alu1->src[src1].src;
- for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++)
+ for (unsigned i = 0; i < nir_src_num_components(alu1_actual_src); i++)
alu1_swizzle[i] = i;
}
@@ -458,7 +458,7 @@
} else {
alu2_actual_src = alu2->src[src2].src;
- for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu2, src2); i++)
+ for (unsigned i = 0; i < nir_src_num_components(alu2_actual_src); i++)
alu2_swizzle[i] = i;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_loop_analyze.c
^
|
@@ -858,6 +858,7 @@
unsigned execution_mode)
{
nir_const_value span, iter;
+ unsigned iter_bit_size = bit_size;
switch (invert_comparison_if_needed(cond_op, invert_cond)) {
case nir_op_ine:
@@ -911,13 +912,14 @@
iter = eval_const_binop(nir_op_fdiv, bit_size, span,
step, execution_mode);
iter = eval_const_unop(nir_op_f2i64, bit_size, iter, execution_mode);
+ iter_bit_size = 64;
break;
default:
return -1;
}
- uint64_t iter_u64 = nir_const_value_as_uint(iter, bit_size);
+ uint64_t iter_u64 = nir_const_value_as_uint(iter, iter_bit_size);
return iter_u64 > INT_MAX ? -1 : (int)iter_u64;
}
@@ -1138,11 +1140,13 @@
*/
for (int bias = -1; bias <= 1; bias++) {
const int iter_bias = iter_int + bias;
+ if (iter_bias < 1)
+ continue;
if (test_iterations(iter_bias, step, limit, alu_op, bit_size,
induction_base_type, initial,
limit_rhs, invert_cond, execution_mode)) {
- return iter_bias > 0 ? iter_bias - trip_offset : iter_bias;
+ return iter_bias - trip_offset;
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_int64.c
^
|
@@ -1344,6 +1344,7 @@
default:
unreachable("Unsupported intrinsic");
}
+ return NULL;
}
static bool
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_io.c
^
|
@@ -1570,7 +1570,8 @@
nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
/* TODO: Better handle block_intel. */
- const unsigned load_size = (bit_size / 8) * load->num_components;
+ assert(load->num_components == 1);
+ const unsigned load_size = bit_size / 8;
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
nir_builder_instr_insert(b, &load->instr);
@@ -1755,7 +1756,8 @@
if (addr_format_needs_bounds_check(addr_format)) {
/* TODO: Better handle block_intel. */
- const unsigned store_size = (value->bit_size / 8) * store->num_components;
+ assert(store->num_components == 1);
+ const unsigned store_size = value->bit_size / 8;
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
nir_builder_instr_insert(b, &store->instr);
@@ -1948,8 +1950,12 @@
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
unsigned vec_stride = glsl_get_explicit_stride(deref->type);
unsigned scalar_size = type_scalar_size_bytes(deref->type);
- assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
- assert(vec_stride == 0 || vec_stride >= scalar_size);
+ if (vec_stride == 0) {
+ vec_stride = scalar_size;
+ } else {
+ assert(glsl_type_is_vector(deref->type));
+ assert(vec_stride >= scalar_size);
+ }
uint32_t align_mul, align_offset;
if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
@@ -1958,10 +1964,27 @@
align_offset = 0;
}
+ /* In order for bounds checking to be correct as per the Vulkan spec,
+ * we need to check at the individual component granularity. Prior to
+ * robustness2, we're technically allowed to be sloppy by 16B. Even with
+ * robustness2, UBO loads are allowed to have a granularity as high as 256B
+ * depending on hardware limits. However, we have none of that information
+ * here. Short of adding new address formats, the easiest way to do that
+ * is to just split any loads and stores into individual components here.
+ *
+ * TODO: At some point in the future we may want to add more ops similar to
+ * nir_intrinsic_load_global_constant_bounded and make bouds checking the
+ * back-end's problem. Another option would be to somehow plumb more of
+ * that information through to nir_lower_explicit_io. For now, however,
+ * scalarizing is at least correct.
+ */
+ bool scalarize = vec_stride > scalar_size ||
+ addr_format_needs_bounds_check(addr_format);
+
switch (intrin->intrinsic) {
case nir_intrinsic_load_deref: {
nir_def *value;
- if (vec_stride > scalar_size) {
+ if (scalarize) {
nir_def *comps[NIR_MAX_VEC_COMPONENTS] = {
NULL,
};
@@ -1990,7 +2013,7 @@
case nir_intrinsic_store_deref: {
nir_def *value = intrin->src[1].ssa;
nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
- if (vec_stride > scalar_size) {
+ if (scalarize) {
for (unsigned i = 0; i < intrin->num_components; i++) {
if (!(write_mask & (1 << i)))
continue;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_non_uniform_access.c
^
|
@@ -136,8 +136,12 @@
num_handles++;
}
- if (num_handles == 0)
+ if (num_handles == 0) {
+ /* nu_handle_init() returned false because the handles are uniform. */
+ tex->texture_non_uniform = false;
+ tex->sampler_non_uniform = false;
return false;
+ }
b->cursor = nir_instr_remove(&tex->instr);
@@ -177,8 +181,10 @@
return false;
struct nu_handle handle;
- if (!nu_handle_init(&handle, &intrin->src[handle_src]))
+ if (!nu_handle_init(&handle, &intrin->src[handle_src])) {
+ nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
return false;
+ }
b->cursor = nir_instr_remove(&intrin->instr);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_lower_shader_calls.c
^
|
@@ -2050,6 +2050,8 @@
for (unsigned i = 0; i < num_calls; i++) {
nir_instr *resume_instr = lower_resume(resume_shaders[i], i);
replace_resume_with_halt(resume_shaders[i], resume_instr);
+ /* Remove CF after halt before nir_opt_if(). */
+ nir_opt_dead_cf(resume_shaders[i]);
/* Remove the dummy blocks added by flatten_resume_if_ladder() */
nir_opt_if(resume_shaders[i], nir_opt_if_optimize_phi_true_false);
nir_opt_dce(resume_shaders[i]);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_algebraic.py
^
|
@@ -1380,22 +1380,22 @@
(('ishr', a, 0), a),
(('ushr', 0, a), 0),
(('ushr', a, 0), a),
- (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
- (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), '!options->lower_rotate'),
- (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
- (('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), '!options->lower_rotate'),
- (('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'),
- (('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), '!options->lower_rotate'),
- (('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'),
- (('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), '!options->lower_rotate'),
- (('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), 'options->lower_rotate'),
- (('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), 'options->lower_rotate'),
- (('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), 'options->lower_rotate'),
- (('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b))), 'options->lower_rotate'),
- (('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), 'options->lower_rotate'),
- (('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), 'options->lower_rotate'),
- (('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), 'options->lower_rotate'),
- (('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b))), 'options->lower_rotate'),
+ (('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), 'options->has_rotate16'),
+ (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), 'options->has_rotate16'),
+ (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), 'options->has_rotate32'),
+ (('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), 'options->has_rotate32'),
+ (('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), 'options->has_rotate16'),
+ (('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), 'options->has_rotate16'),
+ (('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), 'options->has_rotate32'),
+ (('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), 'options->has_rotate32'),
+ (('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), '!options->has_rotate8'),
+ (('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), '!options->has_rotate16'),
+ (('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), '!options->has_rotate32'),
+ (('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b)))),
+ (('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), '!options->has_rotate8'),
+ (('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), '!options->has_rotate16'),
+ (('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), '!options->has_rotate32'),
+ (('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b)))),
# bfi(X, a, b) = (b & ~X) | (a & X)
# If X = ~0: (b & 0) | (a & 0xffffffff) = a
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_copy_prop_vars.c
^
|
@@ -1065,6 +1065,12 @@
if (nir_deref_mode_must_be(src.instr, ignore))
break;
+ /* Ignore trivial casts. If trivial casts are applied to array derefs of vectors,
+ * not doing this causes is_array_deref_of_vector to (wrongly) return false. */
+ while (src.instr->deref_type == nir_deref_type_cast &&
+ nir_deref_instr_parent(src.instr) && nir_deref_cast_is_trivial(src.instr))
+ src.instr = nir_deref_instr_parent(src.instr);
+
/* Direct array_derefs of vectors operate on the vectors (the parent
* deref). Indirects will be handled like other derefs.
*/
@@ -1157,6 +1163,12 @@
nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
assert(glsl_type_is_vector_or_scalar(dst.instr->type));
+ /* Ignore trivial casts. If trivial casts are applied to array derefs of vectors,
+ * not doing this causes is_array_deref_of_vector to (wrongly) return false. */
+ while (dst.instr->deref_type == nir_deref_type_cast &&
+ nir_deref_instr_parent(dst.instr) && nir_deref_cast_is_trivial(dst.instr))
+ dst.instr = nir_deref_instr_parent(dst.instr);
+
/* Direct array_derefs of vectors operate on the vectors (the parent
* deref). Indirects will be handled like other derefs.
*/
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/nir_opt_move_discards_to_top.c
^
|
@@ -165,10 +165,47 @@
instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
goto break_all;
}
-
- if ((intrin->intrinsic == nir_intrinsic_discard_if && consider_discards) ||
- intrin->intrinsic == nir_intrinsic_demote_if)
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_quad_broadcast:
+ case nir_intrinsic_quad_swap_horizontal:
+ case nir_intrinsic_quad_swap_vertical:
+ case nir_intrinsic_quad_swap_diagonal:
+ case nir_intrinsic_quad_swizzle_amd:
+ consider_discards = false;
+ break;
+ case nir_intrinsic_vote_any:
+ case nir_intrinsic_vote_all:
+ case nir_intrinsic_vote_feq:
+ case nir_intrinsic_vote_ieq:
+ case nir_intrinsic_ballot:
+ case nir_intrinsic_first_invocation:
+ case nir_intrinsic_read_invocation:
+ case nir_intrinsic_read_first_invocation:
+ case nir_intrinsic_elect:
+ case nir_intrinsic_reduce:
+ case nir_intrinsic_inclusive_scan:
+ case nir_intrinsic_exclusive_scan:
+ case nir_intrinsic_shuffle:
+ case nir_intrinsic_shuffle_xor:
+ case nir_intrinsic_shuffle_up:
+ case nir_intrinsic_shuffle_down:
+ case nir_intrinsic_rotate:
+ case nir_intrinsic_masked_swizzle_amd:
+ instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
+ goto break_all;
+ case nir_intrinsic_discard_if:
+ if (!consider_discards) {
+ /* assume that a shader either uses discard or demote, but not both */
+ instr->pass_flags = STOP_PROCESSING_INSTR_FLAG;
+ goto break_all;
+ }
+ FALLTHROUGH;
+ case nir_intrinsic_demote_if:
moved = moved || try_move_discard(intrin);
+ break;
+ default:
+ break;
+ }
continue;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/tests/comparison_pre_tests.cpp
^
|
@@ -579,3 +579,95 @@
EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
}
+
+TEST_F(comparison_pre_test, multi_comps_load)
+{
+ /* Before:
+ *
+ * vec1 32 ssa_0 = load_ubo (...)
+ * vec4 32 ssa_1 = load_ubo (...)
+ * vec1 1 ssa_2 = flt ssa_0, ssa_1.w
+ *
+ * if ssa_2 {
+ * vec1 32 ssa_3 = fneg ssa_1.x
+ * vec1 32 ssa_4 = fadd ssa_0, ssa_3
+ * } else {
+ * }
+ */
+ nir_def *ssa_0 = nir_load_ubo(&bld, 1, 32,
+ nir_imm_int(&bld, 0),
+ nir_imm_int(&bld, 0));
+ nir_def *ssa_1 = nir_load_ubo(&bld, 4, 32,
+ nir_imm_int(&bld, 1),
+ nir_imm_int(&bld, 0));
+
+ nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
+ flt->src[0].src = nir_src_for_ssa(ssa_0);
+ flt->src[1].src = nir_src_for_ssa(ssa_1);
+ memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
+ memcpy(&flt->src[1].swizzle, wwww, sizeof(wwww));
+ nir_builder_alu_instr_finish_and_insert(&bld, flt);
+ flt->def.num_components = 1;
+ nir_def *ssa_2 = &flt->def;
+
+ nir_if *nif = nir_push_if(&bld, ssa_2);
+ {
+ nir_alu_instr *fneg = nir_alu_instr_create(bld.shader, nir_op_fneg);
+ fneg->src[0].src = nir_src_for_ssa(ssa_1);
+ memcpy(&fneg->src[0].swizzle, xxxx, sizeof(xxxx));
+ nir_builder_alu_instr_finish_and_insert(&bld, fneg);
+ fneg->def.num_components = 1;
+ nir_def *ssa_3 = &fneg->def;
+
+ nir_fadd(&bld, ssa_0, ssa_3);
+ }
+ nir_pop_if(&bld, nif);
+
+ EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, multi_comps_load2)
+{
+ /* Before:
+ *
+ * vec1 32 ssa_0 = load_ubo (...)
+ * vec4 32 ssa_1 = load_ubo (...)
+ * vec1 1 ssa_2 = flt ssa_0, ssa_1.x
+ *
+ * if ssa_2 {
+ * vec1 32 ssa_3 = fneg ssa_1.w
+ * vec1 32 ssa_4 = fadd ssa_0, ssa_3
+ * } else {
+ * }
+ */
+ nir_def *ssa_0 = nir_load_ubo(&bld, 1, 32,
+ nir_imm_int(&bld, 0),
+ nir_imm_int(&bld, 0));
+ nir_def *ssa_1 = nir_load_ubo(&bld, 4, 32,
+ nir_imm_int(&bld, 1),
+ nir_imm_int(&bld, 0));
+
+ nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
+ flt->src[0].src = nir_src_for_ssa(ssa_0);
+ flt->src[1].src = nir_src_for_ssa(ssa_1);
+ memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
+ memcpy(&flt->src[1].swizzle, xxxx, sizeof(xxxx));
+ nir_builder_alu_instr_finish_and_insert(&bld, flt);
+ flt->def.num_components = 1;
+ nir_def *ssa_2 = &flt->def;
+
+ nir_if *nif = nir_push_if(&bld, ssa_2);
+ {
+ nir_alu_instr *fneg = nir_alu_instr_create(bld.shader, nir_op_fneg);
+ fneg->src[0].src = nir_src_for_ssa(ssa_1);
+ memcpy(&fneg->src[0].swizzle, wwww, sizeof(wwww));
+ nir_builder_alu_instr_finish_and_insert(&bld, fneg);
+ fneg->def.num_components = 1;
+ nir_def *ssa_3 = &fneg->def;
+
+ nir_fadd(&bld, ssa_0, ssa_3);
+ }
+ nir_pop_if(&bld, nif);
+
+ EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
+}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/nir/tests/loop_analyze_tests.cpp
^
|
@@ -285,6 +285,7 @@
INOT_COMPARE(ilt_rev)
INOT_COMPARE(ine)
+INOT_COMPARE(uge_rev)
#define KNOWN_COUNT_TEST(_init_value, _cond_value, _incr_value, cond, incr, count) \
TEST_F(nir_loop_analyze_test, incr ## _ ## cond ## _known_count_ ## count) \
@@ -569,6 +570,16 @@
/* uint i = 0;
* while (true) {
+ * if (!(0 >= i))
+ * break;
+ *
+ * i += 1;
+ * }
+ */
+KNOWN_COUNT_TEST(0x00000000, 0x00000000, 0x00000001, inot_uge_rev, iadd, 1)
+
+/* uint i = 0;
+ * while (true) {
* if (i != 0)
* break;
*
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/nir_spirv.h
^
|
@@ -116,6 +116,8 @@
/* Force texture sampling to be non-uniform. */
bool force_tex_non_uniform;
+ /* Force SSBO accesses to be non-uniform. */
+ bool force_ssbo_non_uniform;
/* In Debug Builds, instead of emitting an OS break on failure, just return NULL from
* spirv_to_nir(). This is useful for the unit tests that want to report a test failed
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/spirv_to_nir.c
^
|
@@ -156,7 +156,7 @@
if (len < 0 || len >= sizeof(filename))
return;
- FILE *f = fopen(filename, "w");
+ FILE *f = fopen(filename, "wb");
if (f == NULL)
return;
@@ -4375,9 +4375,13 @@
w + 5, count - 5);
break;
- case SpvOpCopyLogical:
+ case SpvOpCopyLogical: {
ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
+ struct vtn_type *dst_type = vtn_get_value_type(b, w[2]);
+ vtn_assert(vtn_types_compatible(b, type, dst_type));
+ ssa->type = glsl_get_bare_type(dst_type->type);
break;
+ }
case SpvOpCopyObject:
vtn_copy_value(b, w[3], w[2]);
return;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/vtn_alu.c
^
|
@@ -94,38 +94,16 @@
transpose_result = true;
}
- if (src0_transpose && !src1_transpose &&
- glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
- /* We already have the rows of src0 and the columns of src1 available,
- * so we can just take the dot product of each row with each column to
- * get the result.
- */
-
- for (unsigned i = 0; i < src1_columns; i++) {
- nir_def *vec_src[4];
- for (unsigned j = 0; j < src0_rows; j++) {
- vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
- src1->elems[i]->def);
- }
- dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
- }
- } else {
- /* We don't handle the case where src1 is transposed but not src0, since
- * the general case only uses individual components of src1 so the
- * optimizer should chew through the transpose we emitted for src1.
- */
-
- for (unsigned i = 0; i < src1_columns; i++) {
- /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
+ for (unsigned i = 0; i < src1_columns; i++) {
+ /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
+ dest->elems[i]->def =
+ nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def,
+ nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
+ for (int j = src0_columns - 2; j >= 0; j--) {
dest->elems[i]->def =
- nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def,
- nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
- for (int j = src0_columns - 2; j >= 0; j--) {
- dest->elems[i]->def =
- nir_ffma(&b->nb, src0->elems[j]->def,
- nir_channel(&b->nb, src1->elems[i]->def, j),
- dest->elems[i]->def);
- }
+ nir_ffma(&b->nb, src0->elems[j]->def,
+ nir_channel(&b->nb, src1->elems[i]->def, j),
+ dest->elems[i]->def);
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/compiler/spirv/vtn_variables.c
^
|
@@ -2632,6 +2632,9 @@
/* Workaround for https://gitlab.freedesktop.org/mesa/mesa/-/issues/3406 */
access |= base->access & ACCESS_NON_UNIFORM;
+ if (base->mode == vtn_variable_mode_ssbo && b->options->force_ssbo_non_uniform)
+ access |= ACCESS_NON_UNIFORM;
+
struct vtn_pointer *ptr = vtn_pointer_dereference(b, base, chain);
ptr->ptr_type = ptr_type;
ptr->access |= access;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/egl/drivers/dri2/egl_dri2.c
^
|
@@ -1067,6 +1067,8 @@
dri2_dpy->dri3_major_version != -1 &&
!dri2_dpy->multibuffers_available &&
#endif
+ (disp->Platform == EGL_PLATFORM_X11_KHR ||
+ disp->Platform == EGL_PLATFORM_XCB_EXT) &&
!debug_get_bool_option("LIBGL_KOPPER_DRI2", false))
return EGL_FALSE;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/egl/main/eglapi.c
^
|
@@ -695,7 +695,7 @@
if (disp->Options.ForceSoftware)
RETURN_EGL_ERROR(disp, EGL_NOT_INITIALIZED, EGL_FALSE);
else {
- bool success = disp->Options.Zink;
+ bool success = false;
if (!disp->Options.Zink && !getenv("GALLIUM_DRIVER")) {
disp->Options.Zink = EGL_TRUE;
success = _eglDriver.Initialize(disp);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/egl/main/egldisplay.c
^
|
@@ -642,6 +642,7 @@
_eglGetSurfacelessDisplay(void *native_display, const EGLAttrib *attrib_list)
{
_EGLDisplay *dpy;
+ _EGLDevice *dev = NULL;
/* Any native display must be an EGLDeviceEXT we know about */
if (native_display != NULL) {
@@ -657,8 +658,8 @@
switch (attrib) {
case EGL_DEVICE_EXT:
- if ((native_display && native_display != (void *)value) ||
- (native_display != _eglLookupDevice(native_display))) {
+ dev = _eglLookupDevice((void *)value);
+ if (!dev) {
_eglError(EGL_BAD_DEVICE_EXT, "eglGetPlatformDisplay");
return NULL;
}
@@ -671,10 +672,9 @@
}
}
- dpy =
- _eglFindDisplay(_EGL_PLATFORM_SURFACELESS, native_display, attrib_list);
+ dpy = _eglFindDisplay(_EGL_PLATFORM_SURFACELESS, NULL, attrib_list);
if (dpy) {
- dpy->Device = native_display;
+ dpy->Device = dev;
}
return dpy;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a530-fails.txt
^
|
@@ -208,7 +208,6 @@
spec@arb_separate_shader_objects@400 combinations by name,Fail
spec@arb_texture_rectangle@1-1-linear-texture,Fail
spec@arb_timer_query@query gl_timestamp,Fail
-spec@arb_timer_query@timestamp-get,Fail
spec@arb_transform_feedback3@gl_skipcomponents1-1,Fail
spec@arb_transform_feedback3@gl_skipcomponents1-2,Fail
spec@arb_transform_feedback3@gl_skipcomponents1-3,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a618-fails.txt
^
|
@@ -91,6 +91,7 @@
spec@arb_sample_shading@samplemask 4@sample mask_in_one,Fail
# Same results w/ zink-on-tu as with freedreno:
+spec@arb_post_depth_coverage@arb_post_depth_coverage-multisampling,Fail
spec@arb_sample_shading@samplemask 2 all@noms partition,Fail
spec@arb_sample_shading@samplemask 2@noms partition,Fail
spec@arb_sample_shading@samplemask 4 all@noms partition,Fail
@@ -110,8 +111,6 @@
spec@arb_texture_rectangle@1-1-linear-texture,Fail
-spec@arb_timer_query@timestamp-get,Fail
-
spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
# fails unrelated to GL_ARB_enhanced_layouts
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a630-fails.txt
^
|
@@ -114,8 +114,6 @@
spec@arb_texture_rectangle@1-1-linear-texture,Fail
-spec@arb_timer_query@timestamp-get,Fail
-
spec@arb_vertex_type_2_10_10_10_rev@attrib-p-type-size-match,Fail
# fails unrelated to GL_ARB_enhanced_layouts
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ci/freedreno-a660-fails.txt
^
|
@@ -11,24 +11,6 @@
dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail
gmem-dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bvec2_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bvec3_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bvec4_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_float_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec2_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec3_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec4_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec2_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec3_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec4_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec2_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec3_fragment,Fail
-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec4_fragment,Fail
-gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_ivec4_fragment,Fail
-gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_uvec2_fragment,Fail
-gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec2_fragment,Fail
-gmem-dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_vec4_fragment,Fail
-
# New CTS fails in 1.3.6.3
gmem-dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage:struct_mixed_types.uniform_buffer_block_geom,Fail
gmem-dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage.struct_mixed_types.uniform_buffer_block_geom,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/drm/freedreno_device.c
^
|
@@ -104,6 +104,9 @@
if (!use_heap) {
struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D);
+ if (!pipe)
+ goto fail;
+
/* Userspace fences don't appear to be reliable enough (missing some
* cache flushes?) on older gens, so limit sub-alloc heaps to a6xx+
* for now:
@@ -119,6 +122,10 @@
}
return dev;
+
+fail:
+ fd_device_del(dev);
+ return NULL;
}
/* like fd_device_new() but creates it's own private dup() of the fd
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_compiler.c
^
|
@@ -109,7 +109,6 @@
.lower_unpack_unorm_2x16 = true,
.lower_pack_split = true,
.use_interpolated_input_intrinsics = true,
- .lower_rotate = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,
@@ -133,7 +132,7 @@
ir3_shader_debug = debug_get_option_ir3_shader_debug();
ir3_shader_override_path =
- !__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL;
+ __normal_user() ? debug_get_option_ir3_shader_override_path() : NULL;
if (ir3_shader_override_path) {
ir3_shader_debug |= IR3_DBG_NOCACHE;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/freedreno/ir3/ir3_legalize.c
^
|
@@ -998,6 +998,7 @@
if (block->brtype == IR3_BRANCH_ALL ||
block->brtype == IR3_BRANCH_ANY ||
block->brtype == IR3_BRANCH_GETONE) {
+ bd->uses_helpers_beginning = true;
bd->uses_helpers_end = true;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/driver_trace/tr_dump.c
^
|
@@ -284,7 +284,7 @@
atexit(trace_dump_trace_close);
const char *trigger = debug_get_option("GALLIUM_TRACE_TRIGGER", NULL);
- if (trigger) {
+ if (trigger && __normal_user()) {
trigger_filename = strdup(trigger);
trigger_active = false;
} else
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
^
|
@@ -62,6 +62,7 @@
#include <llvm/Support/PrettyStackTrace.h>
#include <llvm/ExecutionEngine/ObjectCache.h>
#include <llvm/Support/TargetSelect.h>
+#include <llvm/CodeGen/SelectionDAGNodes.h>
#if LLVM_VERSION_MAJOR >= 15
#include <llvm/Support/MemoryBuffer.h>
#endif
@@ -100,6 +101,8 @@
#include "lp_bld_misc.h"
#include "lp_bld_debug.h"
+static void lp_run_atexit_for_destructors(void);
+
namespace {
class LLVMEnsureMultithreaded {
@@ -147,6 +150,7 @@
}
}
#endif
+ lp_run_atexit_for_destructors();
}
extern "C" void
@@ -623,3 +627,33 @@
M->setOverrideStackAlignment(align);
#endif
}
+
+using namespace llvm;
+
+class GallivmRunAtExitForStaticDestructors : public SDNode
+{
+public:
+ /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
+ GallivmRunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
+ {
+ }
+};
+
+static void
+lp_run_atexit_for_destructors(void)
+{
+ /* LLVM >= 16 registers static variable destructors on the first compile, which gcc
+ * implements by calling atexit there. Before that, u_queue registers its atexit
+ * handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
+ * the LLVM destructors are called first while shader compiler threads may still be
+ * running, which crashes in LLVM in SelectionDAG.cpp.
+ *
+ * The solution is to run the code that declares the LLVM static variables first,
+ * so that atexit for LLVM is registered first and u_queue is registered after that,
+ * which ensures that all u_queue threads are terminated before LLVM destructors are
+ * called.
+ *
+ * This just executes the code that declares static variables.
+ */
+ GallivmRunAtExitForStaticDestructors();
+}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/nir/nir_to_tgsi.c
^
|
@@ -3689,13 +3689,15 @@
!options->lower_fdph ||
!options->lower_flrp64 ||
!options->lower_fmod ||
- !options->lower_rotate ||
!options->lower_uadd_carry ||
!options->lower_usub_borrow ||
!options->lower_uadd_sat ||
!options->lower_usub_sat ||
!options->lower_uniforms_to_ubo ||
!options->lower_vector_cmp ||
+ options->has_rotate8 ||
+ options->has_rotate16 ||
+ options->has_rotate32 ||
options->lower_fsqrt != lower_fsqrt ||
options->force_indirect_unrolling != no_indirects_mask ||
force_indirect_unrolling_sampler) {
@@ -3709,7 +3711,6 @@
new_options->lower_fdph = true;
new_options->lower_flrp64 = true;
new_options->lower_fmod = true;
- new_options->lower_rotate = true;
new_options->lower_uadd_carry = true;
new_options->lower_usub_borrow = true;
new_options->lower_uadd_sat = true;
@@ -3717,6 +3718,9 @@
new_options->lower_uniforms_to_ubo = true;
new_options->lower_vector_cmp = true;
new_options->lower_fsqrt = lower_fsqrt;
+ new_options->has_rotate8 = false;
+ new_options->has_rotate16 = false;
+ new_options->has_rotate32 = false;
new_options->force_indirect_unrolling = no_indirects_mask;
new_options->force_indirect_unrolling_sampler = force_indirect_unrolling_sampler;
@@ -4062,7 +4066,6 @@
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,
- .lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/auxiliary/vl/vl_video_buffer.c
^
|
@@ -296,13 +296,19 @@
nr_components = 3;
for (j = 0; j < nr_components && component < VL_NUM_COMPONENTS; ++j, ++component) {
+ unsigned pipe_swizzle;
+
if (buf->sampler_view_components[component])
continue;
memset(&sv_templ, 0, sizeof(sv_templ));
u_sampler_view_default_template(&sv_templ, res, sampler_format[plane_order[i]]);
- sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = PIPE_SWIZZLE_X + j;
+ pipe_swizzle = (buf->base.buffer_format == PIPE_FORMAT_YUYV || buf->base.buffer_format == PIPE_FORMAT_UYVY) ?
+ (PIPE_SWIZZLE_X + j + 1) % 3 :
+ (PIPE_SWIZZLE_X + j);
+ sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = pipe_swizzle;
sv_templ.swizzle_a = PIPE_SWIZZLE_1;
+
buf->sampler_view_components[component] = pipe->create_sampler_view(pipe, res, &sv_templ);
if (!buf->sampler_view_components[component])
goto error;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_screen.cpp
^
|
@@ -735,10 +735,6 @@
screen->dev->Release();
screen->dev = nullptr;
}
- if (screen->winsys) {
- screen->winsys->destroy(screen->winsys);
- screen->winsys = nullptr;
- }
}
void
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_video_enc_av1.cpp
^
|
@@ -2189,7 +2189,7 @@
writtenTemporalDelimBytes // Bytes Written AFTER placingPositionStart arg above
);
assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == writtenTemporalDelimBytes);
- debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", writtenTemporalDelimBytes);
+ debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenTemporalDelimBytes));
}
size_t writtenSequenceBytes = 0;
@@ -2208,7 +2208,7 @@
writtenSequenceBytes // Bytes Written AFTER placingPositionStart arg above
);
assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == (writtenSequenceBytes + writtenTemporalDelimBytes));
- debug_printf("Written OBU_SEQUENCE_HEADER bytes: %" PRIu64 "\n", writtenSequenceBytes);
+ debug_printf("Written OBU_SEQUENCE_HEADER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenSequenceBytes));
}
// Only supported bitstream format is with obu_size for now.
@@ -2254,14 +2254,14 @@
writtenFrameBytes // Bytes Written AFTER placingPositionStart arg above
);
- debug_printf("Written OBU_FRAME bytes: %" PRIu64 "\n", writtenFrameBytes);
+ debug_printf("Written OBU_FRAME bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenFrameBytes));
assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() ==
(writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes));
debug_printf("Uploading %" PRIu64
" bytes from OBU sequence and/or picture headers to comp_bit_destination %p at offset 0\n",
- pD3D12Enc->m_BitstreamHeadersBuffer.size(),
+ static_cast<uint64_t>(pD3D12Enc->m_BitstreamHeadersBuffer.size()),
associatedMetadata.comp_bit_destination);
// Upload headers to the finalized compressed bitstream buffer
@@ -2330,13 +2330,13 @@
writtenFrameBytes // Bytes Written AFTER placingPositionStart arg above
);
- debug_printf("Written OBU_FRAME_HEADER bytes: %" PRIu64 "\n", writtenFrameBytes);
+ debug_printf("Written OBU_FRAME_HEADER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenFrameBytes));
assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() ==
(writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes));
debug_printf("Uploading %" PRIu64 " bytes from OBU headers to comp_bit_destination %p at offset 0\n",
- pD3D12Enc->m_BitstreamHeadersBuffer.size(),
+ static_cast<uint64_t>(pD3D12Enc->m_BitstreamHeadersBuffer.size()),
associatedMetadata.comp_bit_destination);
// Upload headers to the finalized compressed bitstream buffer
@@ -2361,7 +2361,7 @@
debug_printf("Uploading tile group %d to comp_bit_destination %p at offset %" PRIu64 "\n",
tg_idx,
associatedMetadata.comp_bit_destination,
- comp_bitstream_offset);
+ static_cast<uint64_t>(comp_bitstream_offset));
size_t tile_group_obu_size = 0;
size_t decode_tile_elements_size = 0;
@@ -2387,9 +2387,9 @@
debug_printf("Written %" PRIu64 " bytes for OBU_TILE_GROUP open_bitstream_unit() prefix with obu_header() and "
"obu_size to staging_bitstream_buffer %p at offset %" PRIu64 "\n",
- writtenTileObuPrefixBytes,
+ static_cast<uint64_t>(writtenTileObuPrefixBytes),
associatedMetadata.m_StagingBitstreamConstruction.data(),
- staging_bitstream_buffer_offset);
+ static_cast<uint64_t>(staging_bitstream_buffer_offset));
writtenTileBytes += writtenTileObuPrefixBytes;
@@ -2404,10 +2404,10 @@
debug_printf("Uploading %" PRIu64 " bytes for OBU_TILE_GROUP open_bitstream_unit() prefix with obu_header() "
"and obu_size: %" PRIu64 " to comp_bit_destination %p at offset %" PRIu64 "\n",
- writtenTileObuPrefixBytes,
- tile_group_obu_size,
+ static_cast<uint64_t>(writtenTileObuPrefixBytes),
+ static_cast<uint64_t>(tile_group_obu_size),
associatedMetadata.comp_bit_destination,
- comp_bitstream_offset);
+ static_cast<uint64_t>(comp_bitstream_offset));
staging_bitstream_buffer_offset += writtenTileObuPrefixBytes;
@@ -2517,7 +2517,7 @@
// Add current pending frame being processed in the loop
extra_show_existing_frame_payload_bytes += writtenTemporalDelimBytes;
- debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", writtenTemporalDelimBytes);
+ debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", static_cast<uint64_t>(writtenTemporalDelimBytes));
size_t writtenShowExistingFrameBytes = 0;
av1_pic_header_t showExistingPicHdr = {};
@@ -2561,7 +2561,7 @@
"in current frame ref_frame_idx[%" PRIu32 "]) bytes: %" PRIu64 "\n",
*pendingFrameIt /*PictureIndex*/,
showExistingPicHdr.frame_to_show_map_idx,
- writtenShowExistingFrameBytes);
+ static_cast<uint64_t>(writtenShowExistingFrameBytes));
// Remove it from the list of pending frames
pendingFrameIt =
@@ -2628,7 +2628,7 @@
tileGroup.tg_start,
tileGroup.tg_end,
comp_bit_destination,
- comp_bit_destination_offset);
+ static_cast<uint64_t>(comp_bit_destination_offset));
debug_printf("[Tile group start %d to end %d] Using staging_bitstream_buffer %p at offset %" PRIu64
" to write the tile_obu_group() prefix syntax: tile_start_and_end_present_flag, tg_start, tg_end and "
@@ -2636,7 +2636,7 @@
tileGroup.tg_start,
tileGroup.tg_end,
staging_bitstream_buffer.data(),
- staging_bitstream_buffer_offset);
+ static_cast<uint64_t>(staging_bitstream_buffer_offset));
// Reserve space upfront in the scratch storage
// Do not modify anything before staging_bitstream_buffer_offset
@@ -2673,9 +2673,9 @@
" for tile_obu_group() prefix syntax: tile_start_and_end_present_flag, tg_start, tg_end\n",
tileGroup.tg_start,
tileGroup.tg_end,
- bitstream_tile_group_obu_bytes,
+ static_cast<uint64_t>(bitstream_tile_group_obu_bytes),
staging_bitstream_buffer.data(),
- staging_bitstream_buffer_offset);
+ static_cast<uint64_t>(staging_bitstream_buffer_offset));
// Save this to compare the final written destination byte size against the expected tile_group_obu_size
@@ -2699,11 +2699,11 @@
" to comp_bit_destination %p at offset %" PRIu64 "\n",
tileGroup.tg_start,
tileGroup.tg_end,
- bitstream_tile_group_obu_bytes,
+ static_cast<uint64_t>(bitstream_tile_group_obu_bytes),
staging_bitstream_buffer.data(),
- staging_bitstream_buffer_offset,
+ static_cast<uint64_t>(staging_bitstream_buffer_offset),
comp_bit_destination,
- comp_bit_destination_offset);
+ static_cast<uint64_t>(comp_bit_destination_offset));
comp_bit_destination_offset += bitstream_tile_group_obu_bytes;
written_bytes_to_staging_bitstream_buffer += bitstream_tile_group_obu_bytes;
@@ -2729,9 +2729,9 @@
tileGroup.tg_start,
tileGroup.tg_end,
TileIdx,
- TileSizeBytes,
+ static_cast<uint64_t>(TileSizeBytes),
staging_bitstream_buffer.data(),
- (written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset));
+ static_cast<uint64_t>(written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset));
// Upload current tile_size_minus_1
// Note: The buffer_subdata is queued in pD3D12Enc->base.context but doesn't execute immediately
@@ -2751,11 +2751,11 @@
tileGroup.tg_start,
tileGroup.tg_end,
TileIdx,
- TileSizeBytes,
+ static_cast<uint64_t>(TileSizeBytes),
staging_bitstream_buffer.data(),
- (written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset),
+ static_cast<uint64_t>(written_bytes_to_staging_bitstream_buffer + staging_bitstream_buffer_offset),
comp_bit_destination,
- comp_bit_destination_offset);
+ static_cast<uint64_t>(comp_bit_destination_offset));
comp_bit_destination_offset += TileSizeBytes;
written_bytes_to_staging_bitstream_buffer += TileSizeBytes;
@@ -2788,11 +2788,11 @@
tileGroup.tg_start,
tileGroup.tg_end,
TileIdx,
- tile_size,
+ static_cast<uint64_t>(tile_size),
src_driver_bitstream,
- src_buf_tile_position,
+ static_cast<uint64_t>(src_buf_tile_position),
comp_bit_destination,
- comp_bit_destination_offset);
+ static_cast<uint64_t>(comp_bit_destination_offset));
comp_bit_destination_offset += tile_size;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_av1.cpp
^
|
@@ -153,7 +153,7 @@
write_obu_header(&bitstream_full_obu, OBU_TEMPORAL_DELIMITER, obu_extension_flag, temporal_id, spatial_id);
// Write the data size
- const size_t obu_size_in_bytes = 0;
+ const uint64_t obu_size_in_bytes = 0;
debug_printf("obu_size: %" PRIu64 " (temporal_delimiter_obu() has empty payload as per AV1 codec spec)\n",
obu_size_in_bytes);
pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes);
@@ -197,7 +197,7 @@
write_obu_header(&bitstream_full_obu, OBU_SEQUENCE_HEADER, obu_extension_flag, temporal_id, spatial_id);
// Write the data size
- const size_t obu_size_in_bytes = static_cast<size_t>(bitstream_seq.get_byte_count());
+ const uint64_t obu_size_in_bytes = bitstream_seq.get_byte_count();
debug_printf("obu_size: %" PRIu64 "\n", obu_size_in_bytes);
pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes);
@@ -802,7 +802,7 @@
debug_printf("frame_header_obu() bytes (without OBU_FRAME nor OBU_FRAME_HEADER alignment padding): %" PRId32 "\n",
bitstream_pic.get_byte_count()); // May be bit unaligned at this point (see padding below)
debug_printf("extra_obu_size_bytes (ie. tile_group_obu_size if writing OBU_FRAME ): %" PRIu64 "\n",
- extra_obu_size_bytes);
+ static_cast<uint64_t>(extra_obu_size_bytes));
// Write the obu_header
constexpr uint32_t obu_extension_flag = 0;
@@ -825,7 +825,7 @@
bitstream_pic.flush();
// Write the obu_size element
- const size_t obu_size_in_bytes = bitstream_pic.get_byte_count() + extra_obu_size_bytes;
+ const uint64_t obu_size_in_bytes = bitstream_pic.get_byte_count() + extra_obu_size_bytes;
debug_printf("obu_size: %" PRIu64 "\n", obu_size_in_bytes);
pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes);
@@ -913,7 +913,7 @@
// Write the obu_size element
pack_obu_header_size(&bitstream_full_obu, tile_group_obu_size);
- debug_printf("obu_size: %" PRIu64 "\n", tile_group_obu_size);
+ debug_printf("obu_size: %" PRIu64 "\n", static_cast<uint64_t>(tile_group_obu_size));
bitstream_full_obu.flush();
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_av1.cpp
^
|
@@ -213,7 +213,7 @@
"Number of DPB virtual entries is %" PRIu64 " entries for frame with OrderHint "
"%d (PictureIndex %d) are: \n%s \n",
m_PhysicalAllocationsStorage.get_number_of_pics_in_dpb(),
- m_CurrentFrameReferencesData.pVirtualDPBEntries.size(),
+ static_cast<uint64_t>(m_CurrentFrameReferencesData.pVirtualDPBEntries.size()),
m_CurrentFramePicParams.OrderHint,
m_CurrentFramePicParams.PictureIndex,
dpbContents.c_str());
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler.c
^
|
@@ -54,8 +54,6 @@
.lower_fmod = true,
.lower_vector_cmp = true,
.lower_fdph = true,
- .lower_extract_byte = true,
- .lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_fdiv = true, /* !specs->has_new_transcendentals */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_screen.c
^
|
@@ -458,6 +458,11 @@
{
bool supported = true;
+ /* Requires split sampler support, which the driver doesn't support, yet. */
+ if (!util_format_is_compressed(format) &&
+ util_format_get_blocksizebits(format) > 32)
+ return false;
+
if (fmt == TEXTURE_FORMAT_ETC1)
supported = VIV_FEATURE(screen, chipFeatures, ETC1_TEXTURE_COMPRESSION);
@@ -500,6 +505,10 @@
if (fmt == ETNA_NO_MATCH)
return false;
+ /* Requires split target support, which the driver doesn't support, yet. */
+ if (util_format_get_blocksizebits(format) > 32)
+ return false;
+
if (sample_count > 1) {
/* Explicitly enabled. */
if (!DBG_ENABLED(ETNA_DBG_MSAA))
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/etnaviv_shader.c
^
|
@@ -147,6 +147,7 @@
COND(last_varying_2x, VIVS_RA_CONTROL_LAST_VARYING_2X);
cs->PA_ATTRIBUTE_ELEMENT_COUNT = VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(link.num_varyings);
+ STATIC_ASSERT(VIVS_PA_SHADER_ATTRIBUTES__LEN >= ETNA_NUM_VARYINGS);
for (int idx = 0; idx < link.num_varyings; ++idx)
cs->PA_SHADER_ATTRIBUTES[idx] = link.varyings[idx].pa_attributes;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h
^
|
@@ -10,7 +10,7 @@
The rules-ng-ng source files this header was generated from are:
- cmdstream.xml ( 16930 bytes, from 2019-01-04 11:37:39)
- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
-- common.xml ( 35468 bytes, from 2020-01-04 20:02:31)
+- common.xml ( 35465 bytes, from 2023-11-13 11:29:31)
Copyright (C) 2012-2019 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/common.xml.h
^
|
@@ -10,10 +10,10 @@
The rules-ng-ng source files this header was generated from are:
- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26)
- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
-- common.xml ( 35468 bytes, from 2020-01-04 20:02:31)
-- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53)
+- common.xml ( 35465 bytes, from 2023-11-13 11:29:31)
+- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31)
-Copyright (C) 2012-2020 by the following authors:
+Copyright (C) 2012-2023 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
- Christian Gmeiner <christian.gmeiner@gmail.com>
- Lucas Stach <l.stach@pengutronix.de>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
^
|
@@ -10,10 +10,10 @@
The rules-ng-ng source files this header was generated from are:
- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26)
- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
-- common.xml ( 35468 bytes, from 2020-01-04 20:02:31)
-- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53)
+- common.xml ( 35465 bytes, from 2023-11-13 11:29:31)
+- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31)
-Copyright (C) 2012-2022 by the following authors:
+Copyright (C) 2012-2023 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
- Christian Gmeiner <christian.gmeiner@gmail.com>
- Lucas Stach <l.stach@pengutronix.de>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/isa.xml.h
^
|
@@ -8,10 +8,10 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- isa.xml ( 38205 bytes, from 2022-09-03 22:41:40)
+- isa.xml ( 39261 bytes, from 2023-11-13 11:29:31)
- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
-Copyright (C) 2012-2022 by the following authors:
+Copyright (C) 2012-2023 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
- Christian Gmeiner <christian.gmeiner@gmail.com>
- Lucas Stach <l.stach@pengutronix.de>
@@ -90,8 +90,8 @@
#define INST_OPCODE_CMP 0x00000031
#define INST_OPCODE_LOAD 0x00000032
#define INST_OPCODE_STORE 0x00000033
-#define INST_OPCODE_COPYSIGN 0x00000034
-#define INST_OPCODE_GETEXP 0x00000035
+#define INST_OPCODE_IMG_LOAD_3D 0x00000034
+#define INST_OPCODE_IMG_STORE_3D 0x00000035
#define INST_OPCODE_GETMANT 0x00000036
#define INST_OPCODE_NAN 0x00000037
#define INST_OPCODE_NEXTAFTER 0x00000038
@@ -159,8 +159,8 @@
#define INST_OPCODE_NORM_DP4 0x00000076
#define INST_OPCODE_NORM_MUL 0x00000077
#define INST_OPCODE_STORE_ATTR 0x00000078
-#define INST_OPCODE_LOAD_ATTR 0x00000079
-#define INST_OPCODE_EMIT 0x0000007a
+#define INST_OPCODE_IMG_LOAD 0x00000079
+#define INST_OPCODE_IMG_STORE 0x0000007a
#define INST_OPCODE_RESTART 0x0000007b
#define INST_OPCODE_NOP7C 0x0000007c
#define INST_OPCODE_NOP7D 0x0000007d
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/state.xml.h
^
|
@@ -8,17 +8,17 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state.xml ( 27198 bytes, from 2022-08-16 16:28:18)
-- common.xml ( 35468 bytes, from 2020-01-04 20:02:31)
-- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53)
-- state_hi.xml ( 34803 bytes, from 2022-08-16 16:28:18)
+- state.xml ( 28218 bytes, from 2023-11-13 11:29:31)
+- common.xml ( 35465 bytes, from 2023-11-13 11:29:31)
+- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31)
+- state_hi.xml ( 34935 bytes, from 2023-11-13 11:29:31)
- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
-- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26)
-- state_3d.xml ( 84326 bytes, from 2022-10-07 06:11:53)
-- state_blt.xml ( 14424 bytes, from 2022-10-07 06:11:53)
+- state_2d.xml ( 52271 bytes, from 2023-09-13 13:37:23)
+- state_3d.xml ( 86123 bytes, from 2023-11-13 12:42:26)
+- state_blt.xml ( 14424 bytes, from 2023-09-13 13:37:23)
- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26)
-Copyright (C) 2012-2022 by the following authors:
+Copyright (C) 2012-2023 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
- Christian Gmeiner <christian.gmeiner@gmail.com>
- Lucas Stach <l.stach@pengutronix.de>
@@ -400,7 +400,16 @@
#define VIVS_GL_FENCE_OUT_DATA_LOW 0x0000386c
-#define VIVS_GL_HALTI5_UNK03884 0x00003884
+#define VIVS_GL_USC_CONTROL 0x00003884
+#define VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__MASK 0x00000007
+#define VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__SHIFT 0
+#define VIVS_GL_USC_CONTROL_L1_CACHE_RATIO(x) (((x) << VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__SHIFT) & VIVS_GL_USC_CONTROL_L1_CACHE_RATIO__MASK)
+#define VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__MASK 0x00000f00
+#define VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__SHIFT 8
+#define VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO(x) (((x) << VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__SHIFT) & VIVS_GL_USC_CONTROL_ATTRIB_CACHE_RATIO__MASK)
+#define VIVS_GL_USC_CONTROL_UNK16__MASK 0x001f0000
+#define VIVS_GL_USC_CONTROL_UNK16__SHIFT 16
+#define VIVS_GL_USC_CONTROL_UNK16(x) (((x) << VIVS_GL_USC_CONTROL_UNK16__SHIFT) & VIVS_GL_USC_CONTROL_UNK16__MASK)
#define VIVS_GL_HALTI5_SH_SPECIALS 0x00003888
#define VIVS_GL_HALTI5_SH_SPECIALS_VS_PSIZE_OUT__MASK 0x0000007f
@@ -434,6 +443,30 @@
#define VIVS_GL_SECURITY_UNK3904 0x00003904
+#define VIVS_GL_NN_CONFIG 0x00003930
+#define VIVS_GL_NN_CONFIG_UNK0__MASK 0x00000003
+#define VIVS_GL_NN_CONFIG_UNK0__SHIFT 0
+#define VIVS_GL_NN_CONFIG_UNK0(x) (((x) << VIVS_GL_NN_CONFIG_UNK0__SHIFT) & VIVS_GL_NN_CONFIG_UNK0__MASK)
+#define VIVS_GL_NN_CONFIG_DISABLE_ZDPN 0x00000004
+#define VIVS_GL_NN_CONFIG_DISABLE_SWTILING 0x00000008
+#define VIVS_GL_NN_CONFIG_SMALL_BATCH 0x00000010
+#define VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__MASK 0x00000060
+#define VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__SHIFT 5
+#define VIVS_GL_NN_CONFIG_DDR_BURST_SIZE(x) (((x) << VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__SHIFT) & VIVS_GL_NN_CONFIG_DDR_BURST_SIZE__MASK)
+#define VIVS_GL_NN_CONFIG_UNK7 0x00000080
+#define VIVS_GL_NN_CONFIG_NN_CORE_COUNT__MASK 0x00000f00
+#define VIVS_GL_NN_CONFIG_NN_CORE_COUNT__SHIFT 8
+#define VIVS_GL_NN_CONFIG_NN_CORE_COUNT(x) (((x) << VIVS_GL_NN_CONFIG_NN_CORE_COUNT__SHIFT) & VIVS_GL_NN_CONFIG_NN_CORE_COUNT__MASK)
+#define VIVS_GL_NN_CONFIG_UNK12 0x00001000
+
+#define VIVS_GL_SRAM_REMAP_ADDRESS 0x00003938
+
+#define VIVS_GL_OCB_REMAP_START 0x0000393c
+
+#define VIVS_GL_OCB_REMAP_END 0x00003940
+
+#define VIVS_GL_TP_CONFIG 0x0000394c
+
#define VIVS_GL_UNK03A00 0x00003a00
#define VIVS_GL_UNK03A04 0x00003a04
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
^
|
@@ -8,17 +8,17 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state.xml ( 27198 bytes, from 2022-04-22 10:35:24)
-- common.xml ( 35468 bytes, from 2020-10-28 12:56:03)
-- common_3d.xml ( 15058 bytes, from 2020-10-28 12:56:03)
-- state_hi.xml ( 34803 bytes, from 2020-10-28 12:56:03)
-- copyright.xml ( 1597 bytes, from 2020-10-28 12:56:03)
-- state_2d.xml ( 51552 bytes, from 2020-10-28 12:56:03)
-- state_3d.xml ( 84445 bytes, from 2022-11-15 15:59:38)
-- state_blt.xml ( 14424 bytes, from 2022-11-07 11:18:41)
-- state_vg.xml ( 5975 bytes, from 2020-10-28 12:56:03)
+- state.xml ( 28218 bytes, from 2023-11-13 11:29:31)
+- common.xml ( 35465 bytes, from 2023-11-13 11:29:31)
+- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31)
+- state_hi.xml ( 34935 bytes, from 2023-11-13 11:29:31)
+- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
+- state_2d.xml ( 52271 bytes, from 2023-09-13 13:37:23)
+- state_3d.xml ( 86123 bytes, from 2023-11-13 12:42:26)
+- state_blt.xml ( 14424 bytes, from 2023-09-13 13:37:23)
+- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26)
-Copyright (C) 2012-2022 by the following authors:
+Copyright (C) 2012-2023 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
- Christian Gmeiner <christian.gmeiner@gmail.com>
- Lucas Stach <l.stach@pengutronix.de>
@@ -159,6 +159,15 @@
#define VIVS_VS_END_PC 0x00000800
#define VIVS_VS_OUTPUT_COUNT 0x00000804
+#define VIVS_VS_OUTPUT_COUNT_COUNT__MASK 0x000000ff
+#define VIVS_VS_OUTPUT_COUNT_COUNT__SHIFT 0
+#define VIVS_VS_OUTPUT_COUNT_COUNT(x) (((x) << VIVS_VS_OUTPUT_COUNT_COUNT__SHIFT) & VIVS_VS_OUTPUT_COUNT_COUNT__MASK)
+#define VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__MASK 0x0000ff00
+#define VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__SHIFT 8
+#define VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG(x) (((x) << VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__SHIFT) & VIVS_VS_OUTPUT_COUNT_OUTPUT16_REG__MASK)
+#define VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__MASK 0x00ff0000
+#define VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__SHIFT 16
+#define VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG(x) (((x) << VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__SHIFT) & VIVS_VS_OUTPUT_COUNT_OUTPUT17_REG__MASK)
#define VIVS_VS_INPUT_COUNT 0x00000808
#define VIVS_VS_INPUT_COUNT_COUNT__MASK 0x0000000f
@@ -270,7 +279,7 @@
#define VIVS_VS_ICACHE_PREFETCH 0x0000088c
-#define VIVS_VS_ICACHE_UNK00890 0x00000890
+#define VIVS_VS_ICACHE_PREFETCH_INSTRUCTIONS 0x00000890
#define VIVS_VS_HALTI5_UNK00898(i0) (0x00000898 + 0x4*(i0))
#define VIVS_VS_HALTI5_UNK00898__ESIZE 0x00000004
@@ -421,17 +430,23 @@
#define VIVS_CL_UNK00924 0x00000924
-#define VIVS_CL_UNK00940 0x00000940
+#define VIVS_CL_GLOBAL_WORK_OFFSET_X 0x0000092c
-#define VIVS_CL_UNK00944 0x00000944
+#define VIVS_CL_GLOBAL_WORK_OFFSET_Y 0x00000934
-#define VIVS_CL_UNK00948 0x00000948
+#define VIVS_CL_GLOBAL_WORK_OFFSET_Z 0x0000093c
-#define VIVS_CL_UNK0094C 0x0000094c
+#define VIVS_CL_WORKGROUP_COUNT_X 0x00000940
-#define VIVS_CL_UNK00950 0x00000950
+#define VIVS_CL_WORKGROUP_COUNT_Y 0x00000944
-#define VIVS_CL_UNK00954 0x00000954
+#define VIVS_CL_WORKGROUP_COUNT_Z 0x00000948
+
+#define VIVS_CL_WORKGROUP_SIZE_X 0x0000094c
+
+#define VIVS_CL_WORKGROUP_SIZE_Y 0x00000950
+
+#define VIVS_CL_WORKGROUP_SIZE_Z 0x00000954
#define VIVS_CL_HALTI5_UNK00958 0x00000958
@@ -504,7 +519,7 @@
#define VIVS_PA_SHADER_ATTRIBUTES(i0) (0x00000a40 + 0x4*(i0))
#define VIVS_PA_SHADER_ATTRIBUTES__ESIZE 0x00000004
-#define VIVS_PA_SHADER_ATTRIBUTES__LEN 0x0000000a
+#define VIVS_PA_SHADER_ATTRIBUTES__LEN 0x00000010
#define VIVS_PA_SHADER_ATTRIBUTES_BYPASS_FLAT 0x00000001
#define VIVS_PA_SHADER_ATTRIBUTES_UNK4__MASK 0x000000f0
#define VIVS_PA_SHADER_ATTRIBUTES_UNK4__SHIFT 4
@@ -593,7 +608,7 @@
#define VIVS_PS_OUTPUT_REG 0x00001004
#define VIVS_PS_INPUT_COUNT 0x00001008
-#define VIVS_PS_INPUT_COUNT_COUNT__MASK 0x0000000f
+#define VIVS_PS_INPUT_COUNT_COUNT__MASK 0x0000001f
#define VIVS_PS_INPUT_COUNT_COUNT__SHIFT 0
#define VIVS_PS_INPUT_COUNT_COUNT(x) (((x) << VIVS_PS_INPUT_COUNT_COUNT__SHIFT) & VIVS_PS_INPUT_COUNT_COUNT__MASK)
#define VIVS_PS_INPUT_COUNT_UNK8__MASK 0x00001f00
@@ -628,6 +643,8 @@
#define VIVS_PS_RANGE_HIGH__SHIFT 16
#define VIVS_PS_RANGE_HIGH(x) (((x) << VIVS_PS_RANGE_HIGH__SHIFT) & VIVS_PS_RANGE_HIGH__MASK)
+#define VIVS_PS_REG_COUNT 0x0000101e
+
#define VIVS_PS_UNIFORM_BASE 0x00001024
#define VIVS_PS_INST_ADDR 0x00001028
@@ -676,7 +693,7 @@
#define VIVS_PS_ICACHE_PREFETCH 0x00001048
-#define VIVS_PS_ICACHE_UNK0104C 0x0000104c
+#define VIVS_PS_ICACHE_PREFETCH_INSTRUCTIONS 0x0000104c
#define VIVS_PS_MSAA_CONFIG 0x00001054
@@ -694,6 +711,12 @@
#define VIVS_PS_HALTI5_UNK01098 0x00001098
+#define VIVS_PS_PSCS_THROTTLE 0x0000109c
+
+#define VIVS_PS_NN_INST_ADDR 0x000010a0
+
+#define VIVS_PS_TP_INST_ADDR 0x000010b8
+
#define VIVS_PS_INST_MEM(i0) (0x00006000 + 0x4*(i0))
#define VIVS_PS_INST_MEM__ESIZE 0x00000004
#define VIVS_PS_INST_MEM__LEN 0x00000400
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/state_blt.xml.h
^
|
@@ -8,17 +8,17 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state.xml ( 27198 bytes, from 2022-08-16 16:28:18)
-- common.xml ( 35468 bytes, from 2020-01-04 20:02:31)
-- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53)
-- state_hi.xml ( 34803 bytes, from 2022-08-16 16:28:18)
+- state.xml ( 28218 bytes, from 2023-11-13 11:29:31)
+- common.xml ( 35465 bytes, from 2023-11-13 11:29:31)
+- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31)
+- state_hi.xml ( 34935 bytes, from 2023-11-13 11:29:31)
- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
-- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26)
-- state_3d.xml ( 84326 bytes, from 2022-10-07 06:11:53)
-- state_blt.xml ( 14424 bytes, from 2022-10-07 06:11:53)
+- state_2d.xml ( 52271 bytes, from 2023-09-13 13:37:23)
+- state_3d.xml ( 86123 bytes, from 2023-11-13 12:42:26)
+- state_blt.xml ( 14424 bytes, from 2023-09-13 13:37:23)
- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26)
-Copyright (C) 2012-2022 by the following authors:
+Copyright (C) 2012-2023 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
- Christian Gmeiner <christian.gmeiner@gmail.com>
- Lucas Stach <l.stach@pengutronix.de>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h
^
|
@@ -10,8 +10,8 @@
The rules-ng-ng source files this header was generated from are:
- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26)
- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26)
-- common.xml ( 35468 bytes, from 2020-01-04 20:02:31)
-- common_3d.xml ( 15058 bytes, from 2022-10-07 06:11:53)
+- common.xml ( 35465 bytes, from 2023-11-13 11:29:31)
+- common_3d.xml ( 15069 bytes, from 2023-11-13 11:29:31)
Copyright (C) 2012-2018 by the following authors:
- Wladimir J. van der Laan <laanwj@gmail.com>
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
^
|
@@ -43,7 +43,6 @@
.lower_all_io_to_temps = true,
.vertex_id_zero_based = true, /* its not implemented anyway */
.lower_bitops = true,
- .lower_rotate = true,
.lower_vector_cmp = true,
.lower_fdph = true,
.has_fsub = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c
^
|
@@ -206,16 +206,6 @@
OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
}
-static uint64_t
-ticks_to_ns(uint32_t ts)
-{
- /* This is based on the 19.2MHz always-on rbbm timer.
- *
- * TODO we should probably query this value from kernel..
- */
- return ts * (1000000000 / 19200000);
-}
-
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/a6xx/fd6_query.cc
^
|
@@ -267,16 +267,6 @@
OUT_RING(ring, 0x00000000);
}
-static uint64_t
-ticks_to_ns(uint64_t ts)
-{
- /* This is based on the 19.2MHz always-on rbbm timer.
- *
- * TODO we should probably query this value from kernel..
- */
- return ts * (1000000000 / 19200000);
-}
-
static void
time_elapsed_accumulate_result(struct fd_acc_query *aq,
struct fd_acc_query_sample *s,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_screen.c
^
|
@@ -141,8 +141,7 @@
if (screen->has_timestamp) {
uint64_t n;
fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n);
- assert(screen->max_freq > 0);
- return n * 1000000000 / screen->max_freq;
+ return ticks_to_ns(n);
} else {
int64_t cpu_time = os_time_get_nano();
return cpu_time + screen->cpu_gpu_time_delta;
@@ -590,6 +589,8 @@
/* only a4xx, requires new enough kernel so we know max_freq: */
return (screen->max_freq > 0) &&
(is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen));
+ case PIPE_CAP_TIMER_RESOLUTION:
+ return ticks_to_ns(1);
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
@@ -1113,10 +1114,11 @@
screen->max_freq = 0;
} else {
screen->max_freq = val;
- if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0)
- screen->has_timestamp = true;
}
+ if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0)
+ screen->has_timestamp = true;
+
screen->dev_id = fd_pipe_dev_id(screen->pipe);
if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/freedreno/freedreno_util.h
^
|
@@ -507,6 +507,13 @@
return INDEX4_SIZE_32_BIT;
}
+/* Convert 19.2MHz RBBM always-on timer ticks to ns */
+static inline uint64_t
+ticks_to_ns(uint64_t ts)
+{
+ return ts * (1000000000 / 19200000);
+}
+
#ifdef __cplusplus
}
#endif
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/i915/i915_screen.c
^
|
@@ -117,7 +117,6 @@
.lower_fdph = true,
.lower_flrp32 = true,
.lower_fmod = true,
- .lower_rotate = true,
.lower_sincos = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
@@ -161,7 +160,6 @@
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
- .lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_2x32_64 = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_batch.c
^
|
@@ -278,6 +278,9 @@
{
unsigned index = READ_ONCE(bo->index);
+ if (index == -1)
+ return -1;
+
if (index < batch->exec_count && batch->exec_bos[index] == bo)
return index;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_bufmgr.c
^
|
@@ -1413,6 +1413,7 @@
bo->bufmgr = bufmgr;
bo->gem_handle = open_arg.handle;
bo->name = name;
+ bo->index = -1;
bo->real.global_name = handle;
bo->real.prime_fd = -1;
bo->real.reusable = false;
@@ -1974,6 +1975,7 @@
bo->bufmgr = bufmgr;
bo->name = "prime";
+ bo->index = -1;
bo->real.reusable = false;
bo->real.imported = true;
bo->real.mmap_mode = IRIS_MMAP_NONE;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_resolve.c
^
|
@@ -682,7 +682,7 @@
/* A data cache flush is not suggested by HW docs, but we found it to fix
* a number of failures.
*/
- unsigned wa_flush = intel_device_info_is_dg2(batch->screen->devinfo) &&
+ unsigned wa_flush = devinfo->verx10 >= 125 &&
res->aux.usage == ISL_AUX_USAGE_HIZ_CCS ?
PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/iris/iris_state.c
^
|
@@ -6806,13 +6806,16 @@
bool program_needs_wa_14015055625 = false;
+#if INTEL_WA_14015055625_GFX_VER
/* Check if FS stage will use primitive ID overrides for Wa_14015055625. */
const struct brw_vue_map *last_vue_map =
&brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
if ((wm_prog_data->inputs & VARYING_BIT_PRIMITIVE_ID) &&
- last_vue_map->varying_to_slot[VARYING_SLOT_PRIMITIVE_ID] == -1) {
+ last_vue_map->varying_to_slot[VARYING_SLOT_PRIMITIVE_ID] == -1 &&
+ intel_needs_workaround(batch->screen->devinfo, 14015055625)) {
program_needs_wa_14015055625 = true;
}
+#endif
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
if (!(stage_dirty & (IRIS_STAGE_DIRTY_VS << stage)))
@@ -6828,8 +6831,10 @@
uint32_t scratch_addr =
pin_scratch_space(ice, batch, prog_data, stage);
+#if INTEL_WA_14015055625_GFX_VER
shader_program_needs_wa_14015055625(ice, batch, prog_data, stage,
&program_needs_wa_14015055625);
+#endif
if (stage == MESA_SHADER_FRAGMENT) {
UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast;
@@ -7864,6 +7869,11 @@
#endif
}
+ if (indirect) {
+ struct mi_builder b;
+ uint32_t mocs;
+ mi_builder_init(&b, batch->screen->devinfo, batch);
+
#define _3DPRIM_END_OFFSET 0x2420
#define _3DPRIM_START_VERTEX 0x2430
#define _3DPRIM_VERTEX_COUNT 0x2434
@@ -7871,103 +7881,100 @@
#define _3DPRIM_START_INSTANCE 0x243C
#define _3DPRIM_BASE_VERTEX 0x2440
- struct mi_builder b;
- uint32_t mocs;
- mi_builder_init(&b, batch->screen->devinfo, batch);
+ if (!indirect->count_from_stream_output) {
+ if (indirect->indirect_draw_count) {
+ use_predicate = true;
+
+ struct iris_bo *draw_count_bo =
+ iris_resource_bo(indirect->indirect_draw_count);
+ unsigned draw_count_offset =
+ indirect->indirect_draw_count_offset;
+ mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0);
+ mi_builder_set_mocs(&b, mocs);
+
+ if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+ /* comparison = draw id < draw count */
+ struct mi_value comparison =
+ mi_ult(&b, mi_imm(drawid_offset),
+ mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
+
+ /* predicate = comparison & conditional rendering predicate */
+ mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
+ mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
+ } else {
+ uint32_t mi_predicate;
+
+ /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
+ mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset));
+ /* Upload the current draw count from the draw parameters buffer
+ * to MI_PREDICATE_SRC0. Zero the top 32-bits of
+ * MI_PREDICATE_SRC0.
+ */
+ mi_store(&b, mi_reg64(MI_PREDICATE_SRC0),
+ mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
- if (indirect && !indirect->count_from_stream_output) {
- if (indirect->indirect_draw_count) {
- use_predicate = true;
-
- struct iris_bo *draw_count_bo =
- iris_resource_bo(indirect->indirect_draw_count);
- unsigned draw_count_offset =
- indirect->indirect_draw_count_offset;
- mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0);
+ if (drawid_offset == 0) {
+ mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+ MI_PREDICATE_COMBINEOP_SET |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+ } else {
+ /* While draw_index < draw_count the predicate's result will be
+ * (draw_index == draw_count) ^ TRUE = TRUE
+ * When draw_index == draw_count the result is
+ * (TRUE) ^ TRUE = FALSE
+ * After this all results will be:
+ * (FALSE) ^ FALSE = FALSE
+ */
+ mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_XOR |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+ }
+ iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+ }
+ }
+ struct iris_bo *bo = iris_resource_bo(indirect->buffer);
+ assert(bo);
+
+ mocs = iris_mocs(bo, &batch->screen->isl_dev, 0);
mi_builder_set_mocs(&b, mocs);
- if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
- /* comparison = draw id < draw count */
- struct mi_value comparison =
- mi_ult(&b, mi_imm(drawid_offset),
- mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
-
- /* predicate = comparison & conditional rendering predicate */
- mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
- mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
+ mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
+ mi_mem32(ro_bo(bo, indirect->offset + 0)));
+ mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
+ mi_mem32(ro_bo(bo, indirect->offset + 4)));
+ mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX),
+ mi_mem32(ro_bo(bo, indirect->offset + 8)));
+ if (draw->index_size) {
+ mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX),
+ mi_mem32(ro_bo(bo, indirect->offset + 12)));
+ mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
+ mi_mem32(ro_bo(bo, indirect->offset + 16)));
} else {
- uint32_t mi_predicate;
-
- /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
- mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset));
- /* Upload the current draw count from the draw parameters buffer
- * to MI_PREDICATE_SRC0. Zero the top 32-bits of
- * MI_PREDICATE_SRC0.
- */
- mi_store(&b, mi_reg64(MI_PREDICATE_SRC0),
- mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
+ mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
+ mi_mem32(ro_bo(bo, indirect->offset + 12)));
+ mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0));
+ }
+ } else if (indirect->count_from_stream_output) {
+ struct iris_stream_output_target *so =
+ (void *) indirect->count_from_stream_output;
+ struct iris_bo *so_bo = iris_resource_bo(so->offset.res);
- if (drawid_offset == 0) {
- mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
- MI_PREDICATE_COMBINEOP_SET |
- MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
- } else {
- /* While draw_index < draw_count the predicate's result will be
- * (draw_index == draw_count) ^ TRUE = TRUE
- * When draw_index == draw_count the result is
- * (TRUE) ^ TRUE = FALSE
- * After this all results will be:
- * (FALSE) ^ FALSE = FALSE
- */
- mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
- MI_PREDICATE_COMBINEOP_XOR |
- MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
- }
- iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
- }
- }
- struct iris_bo *bo = iris_resource_bo(indirect->buffer);
- assert(bo);
+ mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0);
+ mi_builder_set_mocs(&b, mocs);
- mocs = iris_mocs(bo, &batch->screen->isl_dev, 0);
- mi_builder_set_mocs(&b, mocs);
+ iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ);
- mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
- mi_mem32(ro_bo(bo, indirect->offset + 0)));
- mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
- mi_mem32(ro_bo(bo, indirect->offset + 4)));
- mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX),
- mi_mem32(ro_bo(bo, indirect->offset + 8)));
- if (draw->index_size) {
- mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX),
- mi_mem32(ro_bo(bo, indirect->offset + 12)));
- mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
- mi_mem32(ro_bo(bo, indirect->offset + 16)));
- } else {
- mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
- mi_mem32(ro_bo(bo, indirect->offset + 12)));
+ struct iris_address addr = ro_bo(so_bo, so->offset.offset);
+ struct mi_value offset =
+ mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
+ mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/lima/lima_program.c
^
|
@@ -57,7 +57,6 @@
/* could be implemented by clamp */
.lower_fsat = true,
.lower_bitops = true,
- .lower_rotate = true,
.lower_sincos = true,
.lower_fceil = true,
.lower_insert_byte = true,
@@ -78,7 +77,6 @@
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_fsign = true,
- .lower_rotate = true,
.lower_fdot = true,
.lower_fdph = true,
.lower_insert_byte = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
^
|
@@ -616,7 +616,6 @@
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
- .lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_2x32_64 = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/llvmpipe/lp_texture_handle.c
^
|
@@ -202,6 +202,7 @@
static void *
compile_function(struct llvmpipe_context *ctx, struct gallivm_state *gallivm, LLVMValueRef function,
+ bool needs_caching,
uint8_t cache_key[SHA1_DIGEST_LENGTH])
{
gallivm_verify_function(gallivm, function);
@@ -209,7 +210,7 @@
void *function_ptr = func_to_pointer(gallivm_jit_function(gallivm, function));
- if (!gallivm->cache->data_size)
+ if (needs_caching)
lp_disk_cache_insert_shader(llvmpipe_screen(ctx->pipe.screen), gallivm->cache, cache_key);
gallivm_free_ir(gallivm);
@@ -251,10 +252,12 @@
_mesa_sha1_update(&hash_ctx, image_function_base_hash, strlen(image_function_base_hash));
_mesa_sha1_update(&hash_ctx, texture, sizeof(*texture));
_mesa_sha1_update(&hash_ctx, &op, sizeof(op));
+ _mesa_sha1_update(&hash_ctx, &ms, sizeof(ms));
_mesa_sha1_final(&hash_ctx, cache_key);
struct lp_cached_code cached = { 0 };
lp_disk_cache_find_shader(llvmpipe_screen(ctx->pipe.screen), &cached, cache_key);
+ bool needs_caching = !cached.data_size;
struct gallivm_state *gallivm = gallivm_create("sample_function", ctx->context, &cached);
@@ -333,7 +336,7 @@
free(image_soa);
- return compile_function(ctx, gallivm, function, cache_key);
+ return compile_function(ctx, gallivm, function, needs_caching, cache_key);
}
static void *
@@ -407,6 +410,7 @@
struct lp_cached_code cached = { 0 };
lp_disk_cache_find_shader(llvmpipe_screen(ctx->pipe.screen), &cached, cache_key);
+ bool needs_caching = !cached.data_size;
struct gallivm_state *gallivm = gallivm_create("sample_function", ctx->context, &cached);
@@ -480,7 +484,7 @@
free(sampler_soa);
- return compile_function(ctx, gallivm, function, cache_key);
+ return compile_function(ctx, gallivm, function, needs_caching, cache_key);
}
static void *
@@ -496,6 +500,7 @@
struct lp_cached_code cached = { 0 };
lp_disk_cache_find_shader(llvmpipe_screen(ctx->pipe.screen), &cached, cache_key);
+ bool needs_caching = !cached.data_size;
struct gallivm_state *gallivm = gallivm_create("sample_function", ctx->context, &cached);
@@ -560,7 +565,7 @@
free(sampler_soa);
- return compile_function(ctx, gallivm, function, cache_key);
+ return compile_function(ctx, gallivm, function, needs_caching, cache_key);
}
static void
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/nouveau/nv30/nv30_screen.c
^
|
@@ -477,7 +477,6 @@
.lower_flrp64 = true,
.lower_fmod = true,
.lower_fpow = true, /* In hardware as of nv40 FS */
- .lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.force_indirect_unrolling = nir_var_all,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_blit.c
^
|
@@ -92,7 +92,7 @@
/* Legalize here because it could trigger a recursive blit otherwise */
pan_legalize_afbc_format(ctx, pan_resource(info->dst.resource),
- info->dst.format, true);
+ info->dst.format, true, false);
panfrost_blitter_save(ctx, info->render_condition_enable
? PAN_RENDER_BLIT_COND
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_cmdstream.c
^
|
@@ -4224,7 +4224,7 @@
struct panfrost_sampler_view *so =
rzalloc(pctx, struct panfrost_sampler_view);
- pan_legalize_afbc_format(ctx, pan_resource(texture), template->format,
+ pan_legalize_afbc_format(ctx, pan_resource(texture), template->format, false,
false);
pipe_reference(NULL, &texture->reference);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_context.c
^
|
@@ -283,7 +283,7 @@
*/
if (drm_is_afbc(rsrc->image.layout.modifier)) {
pan_resource_modifier_convert(
- ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
+ ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, true,
"Shader image");
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_job.c
^
|
@@ -68,7 +68,7 @@
{
if (surf) {
struct panfrost_resource *rsrc = pan_resource(surf->texture);
- pan_legalize_afbc_format(batch->ctx, rsrc, surf->format, true);
+ pan_legalize_afbc_format(batch->ctx, rsrc, surf->format, true, false);
panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
}
}
@@ -493,6 +493,19 @@
fb->rts[i].discard = !reserve && !(batch->resolve & mask);
+ /* Clamp the rendering area to the damage extent. The
+ * KHR_partial_update spec states that trying to render outside of
+ * the damage region is "undefined behavior", so we should be safe.
+ */
+ if (!fb->rts[i].discard) {
+ fb->extent.minx = MAX2(fb->extent.minx, prsrc->damage.extent.minx);
+ fb->extent.miny = MAX2(fb->extent.miny, prsrc->damage.extent.miny);
+ fb->extent.maxx = MIN2(fb->extent.maxx, prsrc->damage.extent.maxx - 1);
+ fb->extent.maxy = MIN2(fb->extent.maxy, prsrc->damage.extent.maxy - 1);
+ assert(fb->extent.minx <= fb->extent.maxx);
+ assert(fb->extent.miny <= fb->extent.maxy);
+ }
+
rts[i].format = surf->format;
rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
rts[i].last_level = rts[i].first_level = surf->u.tex.level;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_resource.c
^
|
@@ -1072,6 +1072,19 @@
box->width, box->height, box->depth);
}
+static bool
+panfrost_can_discard(struct pipe_resource *resource, const struct pipe_box *box,
+ unsigned usage)
+{
+ struct panfrost_resource *rsrc = pan_resource(resource);
+
+ return ((usage & PIPE_MAP_DISCARD_RANGE) &&
+ !(usage & PIPE_MAP_UNSYNCHRONIZED) &&
+ !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
+ panfrost_box_covers_resource(resource, box) &&
+ !(rsrc->image.data.bo->flags & PAN_BO_SHARED));
+}
+
static void *
panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource,
unsigned level,
@@ -1155,11 +1168,7 @@
/* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
* being mapped.
*/
- if ((usage & PIPE_MAP_DISCARD_RANGE) && !(usage & PIPE_MAP_UNSYNCHRONIZED) &&
- !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
- panfrost_box_covers_resource(resource, box) &&
- !(rsrc->image.data.bo->flags & PAN_BO_SHARED)) {
-
+ if (panfrost_can_discard(resource, box, usage)) {
usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
}
@@ -1298,13 +1307,10 @@
void
pan_resource_modifier_convert(struct panfrost_context *ctx,
struct panfrost_resource *rsrc, uint64_t modifier,
- const char *reason)
+ bool copy_resource, const char *reason)
{
assert(!rsrc->modifier_constant);
- perf_debug_ctx(ctx, "%s AFBC with a blit. Reason: %s",
- drm_is_afbc(modifier) ? "Unpacking" : "Disabling", reason);
-
struct pipe_resource *tmp_prsrc = panfrost_resource_create_with_modifier(
ctx->base.screen, &rsrc->base, modifier);
struct panfrost_resource *tmp_rsrc = pan_resource(tmp_prsrc);
@@ -1316,31 +1322,33 @@
struct pipe_box box = {0, 0, 0, rsrc->base.width0, rsrc->base.height0,
depth};
- struct pipe_blit_info blit = {
- .dst.resource = &tmp_rsrc->base,
- .dst.format = tmp_rsrc->base.format,
- .dst.box = box,
- .src.resource = &rsrc->base,
- .src.format = rsrc->base.format,
- .src.box = box,
- .mask = util_format_get_mask(tmp_rsrc->base.format),
- .filter = PIPE_TEX_FILTER_NEAREST,
- };
-
- for (int i = 0; i <= rsrc->base.last_level; i++) {
- if (BITSET_TEST(rsrc->valid.data, i)) {
- blit.dst.level = blit.src.level = i;
- panfrost_blit(&ctx->base, &blit);
+ if (copy_resource) {
+ struct pipe_blit_info blit = {
+ .dst.resource = &tmp_rsrc->base,
+ .dst.format = tmp_rsrc->base.format,
+ .dst.box = box,
+ .src.resource = &rsrc->base,
+ .src.format = rsrc->base.format,
+ .src.box = box,
+ .mask = util_format_get_mask(tmp_rsrc->base.format),
+ .filter = PIPE_TEX_FILTER_NEAREST,
+ };
+
+ for (int i = 0; i <= rsrc->base.last_level; i++) {
+ if (BITSET_TEST(rsrc->valid.data, i)) {
+ blit.dst.level = blit.src.level = i;
+ panfrost_blit(&ctx->base, &blit);
+ }
}
- }
- panfrost_bo_unreference(rsrc->image.data.bo);
+ panfrost_bo_unreference(rsrc->image.data.bo);
+ }
rsrc->image.data.bo = tmp_rsrc->image.data.bo;
panfrost_bo_reference(rsrc->image.data.bo);
panfrost_resource_setup(pan_device(ctx->base.screen), rsrc, modifier,
- blit.dst.format);
+ tmp_rsrc->base.format);
/* panfrost_resource_setup will force the modifier to stay constant when
* called with a specific modifier. We don't want that here, we want to
* be able to convert back to another modifier if needed */
@@ -1355,7 +1363,7 @@
void
pan_legalize_afbc_format(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
- enum pipe_format format, bool write)
+ enum pipe_format format, bool write, bool discard)
{
struct panfrost_device *dev = pan_device(ctx->base.screen);
@@ -1365,7 +1373,7 @@
if (panfrost_afbc_format(dev->arch, rsrc->base.format) !=
panfrost_afbc_format(dev->arch, format)) {
pan_resource_modifier_convert(
- ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
+ ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, !discard,
"Reinterpreting AFBC surface as incompatible format");
return;
}
@@ -1373,7 +1381,7 @@
if (write && (rsrc->image.layout.modifier & AFBC_FORMAT_MOD_SPARSE) == 0)
pan_resource_modifier_convert(
ctx, rsrc, rsrc->image.layout.modifier | AFBC_FORMAT_MOD_SPARSE,
- "Legalizing resource to allow writing");
+ !discard, "Legalizing resource to allow writing");
}
static bool
@@ -1580,6 +1588,10 @@
pan_resource(trans->staging.rsrc)->image.data.bo;
panfrost_bo_reference(prsrc->image.data.bo);
} else {
+ bool discard = panfrost_can_discard(&prsrc->base, &transfer->box,
+ transfer->usage);
+ pan_legalize_afbc_format(ctx, prsrc, prsrc->image.layout.format,
+ true, discard);
pan_blit_from_staging(pctx, trans);
panfrost_flush_batches_accessing_rsrc(
ctx, pan_resource(trans->staging.rsrc),
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/panfrost/pan_resource.h
^
|
@@ -189,11 +189,13 @@
void pan_resource_modifier_convert(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
- uint64_t modifier, const char *reason);
+ uint64_t modifier, bool copy_resource,
+ const char *reason);
void pan_legalize_afbc_format(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
- enum pipe_format format, bool write);
+ enum pipe_format format, bool write,
+ bool discard);
void pan_dump_resource(struct panfrost_context *ctx,
struct panfrost_resource *rsc);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_fs.c
^
|
@@ -65,11 +65,13 @@
case TGSI_SEMANTIC_TEXCOORD:
assert(index < ATTR_TEXCOORD_COUNT);
fs_inputs->texcoord[index] = i;
+ fs_inputs->num_texcoord++;
break;
case TGSI_SEMANTIC_GENERIC:
assert(index < ATTR_GENERIC_COUNT);
fs_inputs->generic[index] = i;
+ fs_inputs->num_generic++;
break;
case TGSI_SEMANTIC_FOG:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_screen.c
^
|
@@ -503,7 +503,6 @@
.lower_ftrunc = true, \
.lower_insert_byte = true, \
.lower_insert_word = true, \
- .lower_rotate = true, \
.lower_uniforms_to_ubo = true, \
.lower_vector_cmp = true, \
.no_integers = true, \
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r300/r300_state_derived.c
^
|
@@ -541,6 +541,14 @@
}
}
+ for (; i < ATTR_GENERIC_COUNT; i++) {
+ if (fs_inputs->generic[i] != ATTR_UNUSED) {
+ fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
+ "not enough hardware slots (it's not a bug, do not "
+ "report it).\n", i);
+ }
+ }
+
gen_offset = 0;
/* Re-use color varyings for texcoords if possible.
*
@@ -645,6 +653,14 @@
}
}
+ for (; i < ATTR_TEXCOORD_COUNT; i++) {
+ if (fs_inputs->texcoord[i] != ATTR_UNUSED) {
+ fprintf(stderr, "r300: ERROR: FS input texcoord %i unassigned, "
+ "not enough hardware slots (it's not a bug, do not "
+ "report it).\n", i);
+ }
+ }
+
/* Rasterize pointcoord. */
if (fs_inputs->pcoord != ATTR_UNUSED && tex_count < 8) {
@@ -666,14 +682,6 @@
tex_ptr += 2;
}
- for (; i < ATTR_GENERIC_COUNT; i++) {
- if (fs_inputs->generic[i] != ATTR_UNUSED) {
- fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
- "not enough hardware slots (it's not a bug, do not "
- "report it).\n", i);
- }
- }
-
/* Rasterize fog coordinates. */
if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) {
/* Set up the fog coordinates in VAP. */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r600/r600_pipe_common.c
^
|
@@ -1394,7 +1394,6 @@
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_ldexp = true,
- .lower_rotate = true,
/* due to a bug in the shader compiler, some loops hang
* if they are not unrolled, see:
* https://bugs.freedesktop.org/show_bug.cgi?id=86720
@@ -1453,7 +1452,8 @@
nir_lower_dceil |
nir_lower_dmod |
nir_lower_dsub |
- nir_lower_dtrunc;
+ nir_lower_dtrunc |
+ nir_lower_dround_even;
}
rscreen->nir_options_fs = rscreen->nir_options;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/r600/sfn/sfn_shader_cs.cpp
^
|
@@ -52,7 +52,9 @@
for (int i = 0; i < 3; ++i) {
m_local_invocation_id[i] = vf.allocate_pinned_register(thread_id_sel, i);
+ m_local_invocation_id[i]->set_flag(Register::pin_end);
m_workgroup_id[i] = vf.allocate_pinned_register(wg_id_sel, i);
+ m_workgroup_id[i]->set_flag(Register::pin_end);
}
return 2;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c
^
|
@@ -918,7 +918,8 @@
radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5);
/* ref_pic_list_modification() */
- if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) {
+ if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR &&
+ enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) {
radeon_enc_code_fixed_bits(enc, 0x0, 1);
/* long-term reference */
@@ -960,6 +961,7 @@
}
if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) &&
+ (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) &&
(enc->enc_pic.spec_misc.cabac_enable))
radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/radeon_vcn_enc_3_0.c
^
|
@@ -292,7 +292,8 @@
radeon_enc_code_fixed_bits(enc, 0x1, 1); /* direct_spatial_mv_pred_flag */
/* ref_pic_list_modification() */
- if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) {
+ if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR &&
+ enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) {
radeon_enc_code_fixed_bits(enc, 0x0, 1);
/* long-term reference */
@@ -338,6 +339,7 @@
}
if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) &&
+ (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) &&
(enc->enc_pic.spec_misc.cabac_enable))
radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_descriptors.c
^
|
@@ -382,17 +382,33 @@
state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch);
} else {
+ state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
+
+ uint32_t hw_format = G_008F14_DATA_FORMAT(state[1]);
uint16_t epitch = tex->surface.u.gfx9.epitch;
- if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM &&
- block_width == 1) {
- /* epitch is patched in ac_surface for sdma/vcn blocks to get
- * a value expressed in elements unit.
- * But here the texture is used with block_width == 1 so we
- * need epitch in pixel units.
- */
- epitch = (epitch + 1) / tex->surface.blk_w - 1;
+
+ /* epitch is surf_pitch - 1 and are in elements unit.
+ * For some reason I don't understand, when a packed YUV format
+ * like UYUV is used, we have to double epitch (making it a pixel
+ * pitch instead of an element pitch). Note that it's only done
+ * when sampling the texture using its native format; we don't
+ * need to do this when sampling it as UINT32 (as done by
+ * SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT).
+ * This looks broken, so it's possible that surf_pitch / epitch
+ * are computed incorrectly, but that's the only way I found
+ * to get these use cases to work properly:
+ * - yuyv dmabuf import (#6131)
+ * - jpeg vaapi decode
+ * - yuyv texture sampling (!26947)
+ * - jpeg vaapi get image (#10375)
+ */
+ if ((tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM ||
+ tex->buffer.b.b.format == PIPE_FORMAT_G8R8_B8R8_UNORM) &&
+ (hw_format == V_008F14_IMG_DATA_FORMAT_GB_GR ||
+ hw_format == V_008F14_IMG_DATA_FORMAT_BG_RG)) {
+ epitch = (epitch + 1) * 2 - 1;
}
- state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
+
state[4] |= S_008F20_PITCH(epitch);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_get.c
^
|
@@ -749,7 +749,7 @@
else
return 0;
case PIPE_VIDEO_CAP_EFC_SUPPORTED:
- return ((sscreen->info.family >= CHIP_RENOIR) &&
+ return ((sscreen->info.family > CHIP_RENOIR) &&
!(sscreen->debug_flags & DBG(NO_EFC)));
case PIPE_VIDEO_CAP_ENC_MAX_REFERENCES_PER_FRAME:
@@ -1323,7 +1323,6 @@
.lower_hadd = true,
.lower_hadd64 = true,
.lower_fisnormal = true,
- .lower_rotate = true,
.lower_to_scalar = true,
.lower_to_scalar_filter = sscreen->info.has_packed_math_16bit ?
si_alu_to_scalar_packed_math_filter : NULL,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pipe.c
^
|
@@ -967,6 +967,9 @@
si_resource_reference(&sscreen->attribute_ring, NULL);
+ util_queue_destroy(&sscreen->shader_compiler_queue);
+ util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
+
for (unsigned i = 0; i < ARRAY_SIZE(sscreen->aux_contexts); i++) {
if (!sscreen->aux_contexts[i].ctx)
continue;
@@ -989,9 +992,6 @@
sscreen->async_compute_context->destroy(sscreen->async_compute_context);
}
- util_queue_destroy(&sscreen->shader_compiler_queue);
- util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
-
/* Release the reference on glsl types of the compiler threads. */
glsl_type_singleton_decref();
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pm4.c
^
|
@@ -135,7 +135,7 @@
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
state->screen->info.family, reg_offset),
"SPI_SHADER_PGM_LO_")) {
- state->reg_va_low_idx = get_packed_reg_valueN_idx(state, i);
+ state->spi_shader_pgm_lo_reg = reg_offset;
break;
}
}
@@ -162,7 +162,8 @@
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
state->screen->info.family, reg_base_offset + i * 4),
"SPI_SHADER_PGM_LO_")) {
- state->reg_va_low_idx = state->last_pm4 + 2 + i;
+ state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4;
+
break;
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_pm4.h
^
|
@@ -45,7 +45,7 @@
uint16_t max_dw;
/* Used by SQTT to override the shader address */
- uint16_t reg_va_low_idx;
+ uint32_t spi_shader_pgm_lo_reg;
/* This must be the last field because the array can continue after the structure. */
uint32_t pm4[64];
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_sqtt.c
^
|
@@ -677,12 +677,14 @@
list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
&pso_correlation->record, list) {
list_del(&record->list);
+ pso_correlation->record_count--;
free(record);
}
list_for_each_entry_safe(struct rgp_loader_events_record, record,
&loader_events->record, list) {
list_del(&record->list);
+ loader_events->record_count--;
free(record);
}
@@ -698,6 +700,7 @@
}
list_del(&record->list);
free(record);
+ code_object->record_count--;
}
ac_sqtt_finish(sctx->sqtt);
@@ -1028,7 +1031,7 @@
struct rgp_code_object *code_object = &sctx->sqtt->rgp_code_object;
struct rgp_code_object_record *record;
- record = malloc(sizeof(struct rgp_code_object_record));
+ record = calloc(1, sizeof(struct rgp_code_object_record));
if (!record)
return false;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state.c
^
|
@@ -5441,6 +5441,7 @@
sctx->atoms.s.pm4_states[SI_STATE_IDX(rasterizer)].emit = si_pm4_emit_state;
sctx->atoms.s.pm4_states[SI_STATE_IDX(dsa)].emit = si_pm4_emit_state;
sctx->atoms.s.pm4_states[SI_STATE_IDX(poly_offset)].emit = si_pm4_emit_state;
+ sctx->atoms.s.pm4_states[SI_STATE_IDX(sqtt_pipeline)].emit = si_pm4_emit_state;
sctx->atoms.s.pm4_states[SI_STATE_IDX(ls)].emit = si_pm4_emit_shader;
sctx->atoms.s.pm4_states[SI_STATE_IDX(hs)].emit = si_pm4_emit_shader;
sctx->atoms.s.pm4_states[SI_STATE_IDX(es)].emit = si_pm4_emit_shader;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_draw.cpp
^
|
@@ -370,9 +370,8 @@
struct si_pm4_state *pm4 = &shader->pm4;
- uint32_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8;
- assert(PKT3_IT_OPCODE_G(pm4->pm4[pm4->reg_va_low_idx - 2]) == PKT3_SET_SH_REG);
- uint32_t reg = (pm4->pm4[pm4->reg_va_low_idx - 1] << 2) + SI_SH_REG_OFFSET;
+ uint64_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8;
+ uint32_t reg = pm4->spi_shader_pgm_lo_reg;
si_pm4_set_reg(&pipeline->pm4, reg, va_low);
}
}
@@ -883,8 +882,10 @@
sctx->family == CHIP_HAWAII && G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
num_instanced_prims_less_than<IS_DRAW_VERTEX_STATE>(indirect, prim, min_vertex_count,
instance_count, 2, sctx->patch_vertices)) {
- sctx->flags |= SI_CONTEXT_VGT_FLUSH;
- si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
+ /* The cache flushes should have been emitted already. */
+ assert(sctx->flags == 0);
+ sctx->flags = SI_CONTEXT_VGT_FLUSH;
+ si_emit_cache_flush_direct(sctx);
}
}
@@ -2238,12 +2239,13 @@
/* Emit states. */
si_emit_rasterizer_prim_state<GFX_VERSION, HAS_GS, NGG>(sctx);
- /* This must be done before si_emit_all_states because it can set cache flush flags. */
+ /* This emits states and flushes caches. */
+ si_emit_all_states(sctx, masked_atoms);
+ /* This can be done after si_emit_all_states because it doesn't set cache flush flags. */
si_emit_draw_registers<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE>
(sctx, indirect, prim, index_size, instance_count, primitive_restart,
info->restart_index, min_direct_count);
- /* This emits states and flushes caches. */
- si_emit_all_states(sctx, masked_atoms);
+
/* <-- CUs are idle here if the cache_flush state waited. */
/* This must be done after si_emit_all_states, which can affect this. */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp
^
|
@@ -1983,7 +1983,7 @@
assert(0);
}
- assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.reg_va_low_idx != 0);
+ assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.spi_shader_pgm_lo_reg != 0);
}
static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c
^
|
@@ -33,8 +33,14 @@
line_width = roundf(line_width);
line_width = MAX2(line_width, 1);
- info.clip_half_line_width[0] = line_width * 0.5 / fabs(info.scale[0]);
- info.clip_half_line_width[1] = line_width * 0.5 / fabs(info.scale[1]);
+ float half_line_width = line_width * 0.5;
+ if (info.scale[0] == 0 || info.scale[1] == 0) {
+ info.clip_half_line_width[0] = 0;
+ info.clip_half_line_width[1] = 0;
+ } else {
+ info.clip_half_line_width[0] = half_line_width / fabs(info.scale[0]);
+ info.clip_half_line_width[1] = half_line_width / fabs(info.scale[1]);
+ }
/* If the Y axis is inverted (OpenGL default framebuffer), reverse it.
* This is because the viewport transformation inverts the clip space
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/softpipe/sp_screen.c
^
|
@@ -85,7 +85,6 @@
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,
- .lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.lower_int64_options = nir_lower_imul_2x32_64,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/svga/svga_screen.c
^
|
@@ -737,7 +737,6 @@
.lower_fdph = true, \
.lower_flrp64 = true, \
.lower_ldexp = true, \
- .lower_rotate = true, \
.lower_uniforms_to_ubo = true, \
.lower_vector_cmp = true, \
.lower_cs_local_index_to_id = true, \
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/v3d/v3d_screen.c
^
|
@@ -729,7 +729,6 @@
.lower_ldexp = true,
.lower_mul_high = true,
.lower_wpos_pntc = true,
- .lower_rotate = true,
.lower_to_scalar = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.lower_fquantize2f16 = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/vc4/vc4_program.c
^
|
@@ -2174,7 +2174,6 @@
.lower_ldexp = true,
.lower_fneg = true,
.lower_ineg = true,
- .lower_rotate = true,
.lower_to_scalar = true,
.lower_umax = true,
.lower_umin = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/virgl/virgl_screen.c
^
|
@@ -97,6 +97,8 @@
return vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_MIRROR_CLAMP_TO_EDGE;
FALLTHROUGH;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ if (vscreen->caps.caps.v2.host_feature_check_version >= 22)
+ return vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_MIRROR_CLAMP;
return vscreen->caps.caps.v1.bset.mirror_clamp &&
!(vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_HOST_IS_GLES);
case PIPE_CAP_TEXTURE_SWIZZLE:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json
^
|
@@ -1,5 +1,5 @@
{
- "$schema": "https://schema.khronos.org/vulkan/profiles-0.8.1-251.json",
+ "$schema": "https://schema.khronos.org/vulkan/profiles-0.8.2-271.json",
"capabilities": {
"vulkan10requirements": {
"features": {
@@ -164,7 +164,7 @@
"VkPhysicalDeviceMaintenance4Features": {
"maintenance4": true
},
- "VkPhysicalDeviceMaintenance5Features": {
+ "VkPhysicalDeviceMaintenance5FeaturesKHR": {
"maintenance5": true
}
},
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_batch.c
^
|
@@ -414,6 +414,18 @@
if (bs == ctx->last_free_batch_state)
ctx->last_free_batch_state = NULL;
}
+ /* try from the ones that are given back to the screen next */
+ if (!bs) {
+ simple_mtx_lock(&screen->free_batch_states_lock);
+ if (screen->free_batch_states) {
+ bs = screen->free_batch_states;
+ bs->ctx = ctx;
+ screen->free_batch_states = bs->next;
+ if (bs == screen->last_free_batch_state)
+ screen->last_free_batch_state = NULL;
+ }
+ simple_mtx_unlock(&screen->free_batch_states_lock);
+ }
if (!bs && ctx->batch_states) {
/* states are stored sequentially, so if the first one doesn't work, none of them will */
if (zink_screen_check_last_finished(screen, ctx->batch_states->fence.batch_id) ||
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_compiler.c
^
|
@@ -1227,7 +1227,6 @@
.lower_ldexp = true,
.lower_mul_high = true,
- .lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_uadd_sat = true,
@@ -4912,7 +4911,7 @@
if (var->data.location == VARYING_SLOT_VAR0)
var->data.driver_location = 0;
else if (var->data.patch)
- var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
+ var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
else
var->data.driver_location = var->data.location;
}
@@ -4939,7 +4938,7 @@
size += glsl_count_vec4_slots(var->type, false, false);
}
if (var->data.patch)
- var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
+ var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
else
var->data.driver_location = slot;
found = true;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_context.c
^
|
@@ -124,7 +124,9 @@
if (util_queue_is_initialized(&screen->flush_queue))
util_queue_finish(&screen->flush_queue);
if (ctx->batch.state && !screen->device_lost) {
+ simple_mtx_lock(&screen->queue_lock);
VkResult result = VKSCR(QueueWaitIdle)(screen->queue);
+ simple_mtx_unlock(&screen->queue_lock);
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(result));
@@ -159,16 +161,42 @@
while (bs) {
struct zink_batch_state *bs_next = bs->next;
zink_clear_batch_state(ctx, bs);
- zink_batch_state_destroy(screen, bs);
+ /* restore link as we insert them into the screens free_batch_states
+ * list below
+ */
+ bs->next = bs_next;
bs = bs_next;
}
bs = ctx->free_batch_states;
while (bs) {
struct zink_batch_state *bs_next = bs->next;
zink_clear_batch_state(ctx, bs);
- zink_batch_state_destroy(screen, bs);
+ bs->ctx = NULL;
+ /* restore link as we insert them into the screens free_batch_states
+ * list below
+ */
+ bs->next = bs_next;
bs = bs_next;
}
+ simple_mtx_lock(&screen->free_batch_states_lock);
+ if (ctx->batch_states) {
+ if (screen->free_batch_states)
+ screen->last_free_batch_state->next = ctx->batch_states;
+ else {
+ screen->free_batch_states = ctx->batch_states;
+ screen->last_free_batch_state = screen->free_batch_states;
+ }
+ while (screen->last_free_batch_state->next)
+ screen->last_free_batch_state = screen->last_free_batch_state->next;
+ }
+ if (ctx->free_batch_states) {
+ if (screen->free_batch_states)
+ screen->last_free_batch_state->next = ctx->free_batch_states;
+ else
+ screen->free_batch_states = ctx->free_batch_states;
+ screen->last_free_batch_state = ctx->last_free_batch_state;
+ }
+ simple_mtx_unlock(&screen->free_batch_states_lock);
if (ctx->batch.state) {
zink_clear_batch_state(ctx, ctx->batch.state);
zink_batch_state_destroy(screen, ctx->batch.state);
@@ -707,7 +735,7 @@
if (res->obj->is_buffer) {
if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
ctx->di.db.tbos[shader][slot].address = res->obj->bda + ctx->sampler_views[shader][slot]->u.buf.offset;
- ctx->di.db.tbos[shader][slot].range = ctx->sampler_views[shader][slot]->u.buf.size;
+ ctx->di.db.tbos[shader][slot].range = zink_sampler_view(ctx->sampler_views[shader][slot])->tbo_size;
ctx->di.db.tbos[shader][slot].format = zink_get_format(screen, ctx->sampler_views[shader][slot]->format);
} else {
struct zink_buffer_view *bv = get_bufferview_for_binding(ctx, shader, type, slot);
@@ -1197,8 +1225,12 @@
}
err = !sampler_view->image_view;
} else {
- if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB)
+ if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
+ /* always enforce limit clamping */
+ unsigned blocksize = util_format_get_blocksize(state->format);
+ sampler_view->tbo_size = MIN2(state->u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) * blocksize;
return &sampler_view->base;
+ }
VkBufferViewCreateInfo bvci = create_bvci(ctx, res, state->format, state->u.buf.offset, state->u.buf.size);
sampler_view->buffer_view = get_buffer_view(ctx, res, &bvci);
err = !sampler_view->buffer_view;
@@ -1236,9 +1268,10 @@
struct pipe_sampler_view *pview)
{
struct zink_sampler_view *view = zink_sampler_view(pview);
- if (pview->texture->target == PIPE_BUFFER)
- zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL);
- else {
+ if (pview->texture->target == PIPE_BUFFER) {
+ if (zink_descriptor_mode != ZINK_DESCRIPTOR_MODE_DB)
+ zink_buffer_view_reference(zink_screen(pctx->screen), &view->buffer_view, NULL);
+ } else {
zink_surface_reference(zink_screen(pctx->screen), &view->image_view, NULL);
zink_surface_reference(zink_screen(pctx->screen), &view->cube_array, NULL);
zink_surface_reference(zink_screen(pctx->screen), &view->zs_view, NULL);
@@ -1919,6 +1952,11 @@
zink_resource_access_is_write(access), false);
}
memcpy(&a->base, images + i, sizeof(struct pipe_image_view));
+ if (b->resource->target == PIPE_BUFFER) {
+ /* always enforce limit clamping */
+ unsigned blocksize = util_format_get_blocksize(a->base.format);
+ a->base.u.buf.size = MIN2(a->base.u.buf.size / blocksize, screen->info.props.limits.maxTexelBufferElements) * blocksize;
+ }
update = true;
res->image_binds[shader_type] |= BITFIELD_BIT(start_slot + i);
} else if (a->base.resource) {
@@ -3812,7 +3850,8 @@
}
}
- if (!batch->has_work) {
+ /* TODO: if swapchains gain timeline semaphore semantics, `flags` can be eliminated and no-op fence can return timeline id */
+ if (!batch->has_work && flags) {
if (pfence) {
/* reuse last fence */
fence = ctx->last_fence;
@@ -4322,7 +4361,7 @@
}
rebind_mask &= ~BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
}
- if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask)
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
if ((rebind_mask & BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER)) || (!rebind_mask && res->vbo_bind_mask)) {
@@ -4337,7 +4376,7 @@
rebind_mask &= ~BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
ctx->vertex_buffers_dirty = true;
}
- if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask)
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const uint32_t ubo_mask = rebind_mask ?
@@ -4353,7 +4392,7 @@
}
}
rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_UBO_VS, MESA_SHADER_STAGES);
- if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask)
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const unsigned ssbo_mask = rebind_mask ?
@@ -4370,7 +4409,7 @@
}
}
rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SSBO_VS, MESA_SHADER_STAGES);
- if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask)
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const unsigned sampler_mask = rebind_mask ?
rebind_mask & BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES) :
@@ -4385,7 +4424,7 @@
}
}
rebind_mask &= ~BITFIELD_RANGE(TC_BINDING_SAMPLERVIEW_VS, MESA_SHADER_STAGES);
- if (num_rebinds && expected_num_rebinds >= num_rebinds && !rebind_mask)
+ if (expected_num_rebinds && num_rebinds >= expected_num_rebinds && !rebind_mask)
goto end;
const unsigned image_mask = rebind_mask ?
@@ -4894,6 +4933,11 @@
zink_resource_copies_reset(d);
/* force counter buffer reset */
d->so_valid = false;
+ /* FIXME: tc buffer sharedness tracking */
+ if (!num_rebinds) {
+ num_rebinds = d->bind_count[0] + d->bind_count[1];
+ rebind_mask = 0;
+ }
if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) < num_rebinds)
ctx->buffer_rebind_counter = p_atomic_inc_return(&screen->buffer_rebind_counter);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_descriptors.c
^
|
@@ -1501,7 +1501,7 @@
}
if (bs->dd.db_xfer)
- pipe_buffer_unmap(&bs->ctx->base, bs->dd.db_xfer);
+ zink_screen_buffer_unmap(&screen->base, bs->dd.db_xfer);
bs->dd.db_xfer = NULL;
if (bs->dd.db)
screen->base.resource_destroy(&screen->base, &bs->dd.db->base.b);
@@ -1593,7 +1593,7 @@
if (!pres)
return false;
bs->dd.db = zink_resource(pres);
- bs->dd.db_map = pipe_buffer_map(&bs->ctx->base, pres, PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT | PIPE_MAP_THREAD_SAFE, &bs->dd.db_xfer);
+ bs->dd.db_map = pipe_buffer_map(&bs->ctx->base, pres, PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT | PIPE_MAP_THREAD_SAFE, &bs->dd.db_xfer);
}
return true;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_draw.cpp
^
|
@@ -435,6 +435,8 @@
/* always rebind all stages */
VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
+ VKCTX(CmdSetTessellationDomainOriginEXT)(bs->cmdbuf, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT);
+ VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled);
}
ctx->shobj_draw = true;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_kopper.c
^
|
@@ -318,7 +318,9 @@
if (error == VK_ERROR_NATIVE_WINDOW_IN_USE_KHR) {
if (util_queue_is_initialized(&screen->flush_queue))
util_queue_finish(&screen->flush_queue);
+ simple_mtx_lock(&screen->queue_lock);
VkResult result = VKSCR(QueueWaitIdle)(screen->queue);
+ simple_mtx_unlock(&screen->queue_lock);
if (result != VK_SUCCESS)
mesa_loge("ZINK: vkQueueWaitIdle failed (%s)", vk_Result_to_str(result));
error = VKSCR(CreateSwapchainKHR)(screen->dev, &cswap->scci, NULL,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_resource.c
^
|
@@ -359,6 +359,7 @@
mod_info.drmFormatModifier = modifier;
mod_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
mod_info.queueFamilyIndexCount = 0;
+ mod_info.pQueueFamilyIndices = NULL;
info.pNext = &mod_info;
}
@@ -697,6 +698,7 @@
ici->flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
ici->usage = 0;
ici->queueFamilyIndexCount = 0;
+ ici->pQueueFamilyIndices = NULL;
/* assume we're going to be doing some CompressedTexSubImage */
if (util_format_is_compressed(templ->format) && (ici->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
@@ -2826,6 +2828,16 @@
unmap_resource(screen, res);
}
+void
+zink_screen_buffer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptrans)
+{
+ struct zink_screen *screen = zink_screen(pscreen);
+ struct zink_transfer *trans = (struct zink_transfer *)ptrans;
+ if (trans->base.b.usage & PIPE_MAP_ONCE && !trans->staging_res)
+ do_transfer_unmap(screen, trans);
+ transfer_unmap(NULL, ptrans);
+}
+
static void
zink_buffer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans)
{
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_resource.h
^
|
@@ -44,7 +44,8 @@
void
zink_context_resource_init(struct pipe_context *pctx);
-
+void
+zink_screen_buffer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptrans);
void
zink_get_depth_stencil_resources(struct pipe_resource *res,
struct zink_resource **out_z,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_screen.c
^
|
@@ -55,8 +55,13 @@
#include <xf86drm.h>
#include <fcntl.h>
#include <sys/stat.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
#include <sys/sysmacros.h>
#endif
+#endif
static int num_screens = 0;
bool zink_tracing = false;
@@ -1456,6 +1461,12 @@
zink_destroy_screen(struct pipe_screen *pscreen)
{
struct zink_screen *screen = zink_screen(pscreen);
+ struct zink_batch_state *bs = screen->free_batch_states;
+ while (bs) {
+ struct zink_batch_state *bs_next = bs->next;
+ zink_batch_state_destroy(screen, bs);
+ bs = bs_next;
+ }
#ifdef HAVE_RENDERDOC_APP_H
if (screen->renderdoc_capture_all && p_atomic_dec_zero(&num_screens))
@@ -2444,8 +2455,12 @@
{
struct zink_screen *screen = zink_screen(pscreen);
*count = screen->modifier_props[format].drmFormatModifierCount;
- for (int i = 0; i < MIN2(max, *count); i++)
+ for (int i = 0; i < MIN2(max, *count); i++) {
+ if (external_only)
+ external_only[i] = 0;
+
modifiers[i] = screen->modifier_props[format].pDrmFormatModifierProperties[i].drmFormatModifier;
+ }
}
static bool
@@ -3241,10 +3256,10 @@
{
uint64_t biggest_vis_vram = 0;
for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL_VISIBLE]; i++)
- biggest_vis_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[i].heapIndex].size);
+ biggest_vis_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL_VISIBLE][i]].heapIndex].size);
uint64_t biggest_vram = 0;
for (unsigned i = 0; i < screen->heap_count[ZINK_HEAP_DEVICE_LOCAL]; i++)
- biggest_vram = MAX2(biggest_vis_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[i].heapIndex].size);
+ biggest_vram = MAX2(biggest_vram, screen->info.mem_props.memoryHeaps[screen->info.mem_props.memoryTypes[screen->heap_map[ZINK_HEAP_DEVICE_LOCAL][i]].heapIndex].size);
/* determine if vis vram is roughly equal to total vram */
if (biggest_vis_vram > biggest_vram * 0.9)
screen->resizable_bar = true;
@@ -3491,6 +3506,7 @@
screen->base_descriptor_size = MAX4(screen->db_size[0], screen->db_size[1], screen->db_size[2], screen->db_size[3]);
}
+ simple_mtx_init(&screen->free_batch_states_lock, mtx_plain);
simple_mtx_init(&screen->dt_lock, mtx_plain);
util_idalloc_mt_init_tc(&screen->buffer_ids);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/drivers/zink/zink_types.h
^
|
@@ -1401,6 +1401,10 @@
simple_mtx_t copy_context_lock;
struct zink_context *copy_context;
+ struct zink_batch_state *free_batch_states; //unused batch states
+ struct zink_batch_state *last_free_batch_state; //for appending
+ simple_mtx_t free_batch_states_lock;
+
simple_mtx_t semaphores_lock;
struct util_dynarray semaphores;
struct util_dynarray fd_semaphores;
@@ -1664,6 +1668,7 @@
union {
struct zink_surface *image_view;
struct zink_buffer_view *buffer_view;
+ unsigned tbo_size;
};
struct zink_surface *cube_array;
/* Optional sampler view returning red (depth) in all channels, for shader rewrites. */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/d3d10umd/Draw.cpp
^
|
@@ -59,9 +59,12 @@
if (!pDevice->velems_changed)
return;
- for (unsigned i = 0; i < pDevice->velems.count; i++)
- pDevice->element_layout->velems.velems[i].src_stride = pDevice->vertex_strides[pDevice->element_layout->velems.velems[i].vertex_buffer_index];
- cso_set_vertex_elements(pDevice->cso, &pDevice->element_layout->velems);
+ if(pDevice->element_layout) {
+ struct cso_velems_state *state = &pDevice->element_layout->state;
+ for (unsigned i = 0; i < state->count; i++)
+ state->velems[i].src_stride = pDevice->vertex_strides[state->velems[i].vertex_buffer_index];
+ cso_set_vertex_elements(pDevice->cso, state);
+ }
pDevice->velems_changed = false;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/d3d10umd/InputAssembly.cpp
^
|
@@ -126,7 +126,6 @@
LOG_ENTRYPOINT();
Device *pDevice = CastDevice(hDevice);
- struct pipe_context *pipe = pDevice->pipe;
unsigned i;
for (i = 0; i < NumBuffers; i++) {
@@ -169,7 +168,7 @@
/* XXX this is odd... */
if (!vb->is_user_buffer && !vb->buffer.resource) {
- pDevice->vertex_strides[i]->stride = 0;
+ pDevice->vertex_strides[i] = 0;
vb->buffer_offset = 0;
vb->is_user_buffer = true;
vb->buffer.user = dummy;
@@ -271,9 +270,7 @@
LOG_ENTRYPOINT();
ElementLayout *pElementLayout = CastElementLayout(hElementLayout);
-
- struct cso_velems_state elements;
- memset(elements, 0, sizeof elements);
+ memset(pElementLayout, 0, sizeof *pElementLayout);
unsigned num_elements = pCreateElementLayout->NumElements;
unsigned max_elements = 0;
@@ -281,7 +278,7 @@
const D3D10DDIARG_INPUT_ELEMENT_DESC* pVertexElement =
&pCreateElementLayout->pVertexElements[i];
struct pipe_vertex_element *ve =
- &elements.velems[pVertexElement->InputRegister];
+ &pElementLayout->state.velems[pVertexElement->InputRegister];
ve->src_offset = pVertexElement->AlignedByteOffset;
ve->vertex_buffer_index = pVertexElement->InputSlot;
@@ -312,8 +309,7 @@
DebugPrintf("%s: gap\n", __func__);
}
- elements.count = max_elements;
- pElementLayout->velems = mem_dup(elements, sizeof(elements));
+ pElementLayout->state.count = max_elements;
}
@@ -335,10 +331,6 @@
{
LOG_ENTRYPOINT();
- struct pipe_context *pipe = CastPipeContext(hDevice);
- ElementLayout *pElementLayout = CastElementLayout(hElementLayout);
-
- free(pElementLayout->velems);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/d3d10umd/State.h
^
|
@@ -62,6 +62,7 @@
};
struct Query;
+struct ElementLayout;
struct Device
{
@@ -105,6 +106,7 @@
Query *pPredicate;
BOOL PredicateValue;
+ ElementLayout *element_layout;
BOOL velems_changed;
};
@@ -326,7 +328,7 @@
struct ElementLayout
{
- struct cso_velems_state *velems;
+ struct cso_velems_state state;
};
@@ -336,14 +338,6 @@
return static_cast<ElementLayout *>(hElementLayout.pDrvPrivate);
}
-static inline void *
-CastPipeInputLayout(D3D10DDI_HELEMENTLAYOUT hElementLayout)
-{
- ElementLayout *pElementLayout = CastElementLayout(hElementLayout);
- return pElementLayout ? pElementLayout->handle : NULL;
-}
-
-
struct SamplerState
{
void *handle;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/dri/dri_context.c
^
|
@@ -169,7 +169,7 @@
if (debug_get_bool_option("MESA_NO_ERROR", false) ||
driQueryOptionb(&screen->dev->option_cache, "mesa_no_error"))
#if !defined(_WIN32)
- if (geteuid() == getuid())
+ if (__normal_user())
#endif
attribs.flags |= ST_CONTEXT_FLAG_NO_ERROR;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/dri/dri_util.c
^
|
@@ -326,7 +326,7 @@
* for the X server's sake, and EGL will expect us to handle it because
* it iterates all __DRI_ATTRIBs.
*/
- *value = __DRI_ATTRIB_SWAP_EXCHANGE;
+ *value = __DRI_ATTRIB_SWAP_UNDEFINED;
break;
case __DRI_ATTRIB_MAX_SWAP_INTERVAL:
*value = INT_MAX;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_device.c
^
|
@@ -2509,6 +2509,8 @@
}
}
+ size *= pInfo->maxSequencesCount;
+
pMemoryRequirements->memoryRequirements.memoryTypeBits = 1;
pMemoryRequirements->memoryRequirements.alignment = 4;
pMemoryRequirements->memoryRequirements.size = align(size, pMemoryRequirements->memoryRequirements.alignment);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/lavapipe/lvp_execute.c
^
|
@@ -501,6 +501,7 @@
if (state->vb_strides_dirty) {
for (unsigned i = 0; i < state->velem.count; i++)
state->velem.velems[i].src_stride = state->vb_strides[state->velem.velems[i].vertex_buffer_index];
+ state->ve_dirty = true;
state->vb_strides_dirty = false;
}
@@ -3857,7 +3858,7 @@
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: {
VkBindVertexBufferIndirectCommandNV *data = input;
- cmd_size += sizeof(*cmd->u.bind_vertex_buffers.buffers) + sizeof(*cmd->u.bind_vertex_buffers.offsets);
+ cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.buffers) + sizeof(*cmd->u.bind_vertex_buffers2.offsets);
cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
if (max_size < size + cmd_size)
abort();
@@ -3866,12 +3867,20 @@
cmd->u.bind_vertex_buffers2.binding_count = 1;
cmd->u.bind_vertex_buffers2.buffers = (void*)cmdptr;
- cmd->u.bind_vertex_buffers2.offsets = (void*)(cmdptr + sizeof(*cmd->u.bind_vertex_buffers2.buffers));
+ uint32_t alloc_offset = sizeof(*cmd->u.bind_vertex_buffers2.buffers);
+
+ cmd->u.bind_vertex_buffers2.offsets = (void*)(cmdptr + alloc_offset);
+ alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.offsets);
+
+ cmd->u.bind_vertex_buffers2.sizes = (void*)(cmdptr + alloc_offset);
+ alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.sizes);
+
cmd->u.bind_vertex_buffers2.offsets[0] = 0;
cmd->u.bind_vertex_buffers2.buffers[0] = data->bufferAddress ? get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_vertex_buffers2.offsets[0]) : VK_NULL_HANDLE;
+ cmd->u.bind_vertex_buffers2.sizes[0] = data->size;
if (token->vertexDynamicStride) {
- cmd->u.bind_vertex_buffers2.strides = (void*)(cmdptr + sizeof(*cmd->u.bind_vertex_buffers2.buffers) + sizeof(*cmd->u.bind_vertex_buffers2.offsets) + sizeof(*cmd->u.bind_vertex_buffers2.sizes));
+ cmd->u.bind_vertex_buffers2.strides = (void*)(cmdptr + alloc_offset);
cmd->u.bind_vertex_buffers2.strides[0] = data->stride;
} else {
cmd->u.bind_vertex_buffers2.strides = NULL;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/api/kernel.rs
^
|
@@ -535,11 +535,14 @@
let device_bits = q.device.address_bits();
let device_max = u64::MAX >> (u64::BITS - device_bits);
+ let mut threads = 0;
for i in 0..work_dim as usize {
let lws = local_work_size[i];
let gws = global_work_size[i];
let gwo = global_work_offset[i];
+ threads *= lws;
+
// CL_INVALID_WORK_ITEM_SIZE if the number of work-items specified in any of
// local_work_size[0], … local_work_size[work_dim - 1] is greater than the corresponding
// values specified by
@@ -580,6 +583,14 @@
}
}
+ // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items
+ // in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater
+ // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries
+ // table.
+ if threads != 0 && threads > k.max_threads_per_block(q.device) {
+ return Err(CL_INVALID_WORK_GROUP_SIZE);
+ }
+
// If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel
// command will trivially succeed after its event dependencies are satisfied and subsequently
// update its completion event.
@@ -598,7 +609,6 @@
create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb)
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source.
- //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries table.
//• CL_MISALIGNED_SUB_BUFFER_OFFSET if a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code
//• CL_INVALID_IMAGE_SIZE if an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue.
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if an image object is specified as an argument value and the image format (image channel order and data type) is not supported by device associated with queue.
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/core/kernel.rs
^
|
@@ -317,7 +317,7 @@
res
}
-fn opt_nir(nir: &mut NirShader, dev: &Device) {
+fn opt_nir(nir: &mut NirShader, dev: &Device, has_explicit_types: bool) {
let nir_options = unsafe {
&*dev
.screen
@@ -342,7 +342,9 @@
}
progress |= nir_pass!(nir, nir_opt_deref);
- progress |= nir_pass!(nir, nir_opt_memcpy);
+ if has_explicit_types {
+ progress |= nir_pass!(nir, nir_opt_memcpy);
+ }
progress |= nir_pass!(nir, nir_opt_dce);
progress |= nir_pass!(nir, nir_opt_undef);
progress |= nir_pass!(nir, nir_opt_constant_folding);
@@ -452,11 +454,10 @@
printf_opts.max_buffer_size = dev.printf_buffer_size() as u32;
nir_pass!(nir, nir_lower_printf, &printf_opts);
- opt_nir(nir, dev);
+ opt_nir(nir, dev, false);
let mut args = KernelArg::from_spirv_nir(args, nir);
let mut internal_args = Vec::new();
- nir_pass!(nir, nir_lower_memcpy);
let dv_opts = nir_remove_dead_variables_options {
can_remove_var: Some(can_remove_var),
@@ -627,7 +628,8 @@
Some(glsl_get_cl_type_size_align),
);
- opt_nir(nir, dev);
+ opt_nir(nir, dev, true);
+ nir_pass!(nir, nir_lower_memcpy);
nir_pass!(
nir,
@@ -656,7 +658,7 @@
nir_pass!(nir, nir_lower_convert_alu_types, None);
- opt_nir(nir, dev);
+ opt_nir(nir, dev, true);
/* before passing it into drivers, assign locations as drivers might remove nir_variables or
* other things we depend on
@@ -734,6 +736,10 @@
*/
nir.preserve_fp16_denorms();
+ // Set to rtne for now until drivers are able to report their prefered rounding mode, that
+ // also matches what we report via the API.
+ nir.set_fp_rounding_mode_rtne();
+
let (args, internal_args) = lower_and_optimize_nir(dev, &mut nir, args, &dev.lib_clc);
if let Some(cache) = cache {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/core/platform.rs
^
|
@@ -79,6 +79,7 @@
"clc" => debug.clc = true,
"program" => debug.program = true,
"sync" => debug.sync_every_event = true,
+ "" => (),
_ => eprintln!("Unknown RUSTICL_DEBUG flag found: {}", flag),
}
}
@@ -90,6 +91,7 @@
match flag {
"fp16" => features.fp16 = true,
"fp64" => features.fp64 = true,
+ "" => (),
_ => eprintln!("Unknown RUSTICL_FEATURES flag found: {}", flag),
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/mesa/compiler/nir.rs
^
|
@@ -446,6 +446,15 @@
}
}
+ pub fn set_fp_rounding_mode_rtne(&mut self) {
+ unsafe {
+ self.nir.as_mut().info.float_controls_execution_mode |=
+ float_controls::FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 as u32
+ | float_controls::FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 as u32
+ | float_controls::FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64 as u32;
+ }
+ }
+
pub fn reads_sysval(&self, sysval: gl_system_value) -> bool {
let nir = unsafe { self.nir.as_ref() };
bitset::test_bit(&nir.info.system_values_read, sysval as u32)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/frontends/rusticl/meson.build
^
|
@@ -91,6 +91,13 @@
'-Aclippy::type_complexity',
]
+if rustc.version().version_compare('>=1.72')
+ rusticl_args += [
+ # Needs to be fixed
+ '-Aclippy::arc-with-non-send-sync'
+ ]
+endif
+
rusticl_gen_args = [
# can't do anything about it anyway
'-Aclippy::all',
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/gallium/targets/lavapipe/meson.build
^
|
@@ -50,7 +50,7 @@
command : [
prog_python, '@INPUT0@',
'--api-version', '1.1', '--xml', '@INPUT1@',
- '--lib-path', meson.current_build_dir() / 'libvulkan_lvp.so',
+ '--lib-path', meson.current_build_dir() / icd_file_name,
'--out', '@OUTPUT@',
],
build_by_default : true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/glx/glxext.c
^
|
@@ -563,12 +563,11 @@
case GLX_SAMPLES_SGIS:
config->samples = *bp++;
break;
-#ifdef GLX_USE_APPLEGL
case IGNORE_GLX_SWAP_METHOD_OML:
/* We ignore this tag. See the comment above this function. */
++bp;
break;
-#else
+#ifndef GLX_USE_APPLEGL
case GLX_BIND_TO_TEXTURE_RGB_EXT:
config->bindToTextureRgb = *bp++;
break;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/common/i915/intel_engine.c
^
|
@@ -25,7 +25,7 @@
#include <stdlib.h>
-#include "intel_gem.h"
+#include "i915/intel_gem.h"
static enum intel_engine_class
i915_engine_class_to_intel(enum drm_i915_gem_engine_class i915)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/common/intel_measure.c
^
|
@@ -108,7 +108,7 @@
*sep = '\0';
}
- if (filename && !__check_suid()) {
+ if (filename && __normal_user()) {
filename += 5;
config.file = fopen(filename, "w");
if (!config.file) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_compiler.c
^
|
@@ -189,7 +189,8 @@
nir_options->has_bfm = devinfo->ver >= 7;
nir_options->has_bfi = devinfo->ver >= 7;
- nir_options->lower_rotate = devinfo->ver < 11;
+ nir_options->has_rotate16 = devinfo->ver >= 11;
+ nir_options->has_rotate32 = devinfo->ver >= 11;
nir_options->lower_bitfield_reverse = devinfo->ver < 7;
nir_options->lower_find_lsb = devinfo->ver < 7;
nir_options->lower_ifind_msb = devinfo->ver < 7;
@@ -240,6 +241,9 @@
insert_u64_bit(&config, compiler->precise_trig);
bits++;
+ insert_u64_bit(&config, compiler->mesh.mue_compaction);
+ bits++;
+
uint64_t mask = DEBUG_DISK_CACHE_MASK;
bits += util_bitcount64(mask);
while (mask != 0) {
@@ -256,6 +260,12 @@
mask &= ~bit;
}
+ mask = 3;
+ bits += util_bitcount64(mask);
+
+ u_foreach_bit64(bit, mask)
+ insert_u64_bit(&config, (compiler->mesh.mue_header_packing & (1ULL << bit)) != 0);
+
assert(bits <= util_bitcount64(UINT64_MAX));
return config;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_fs.cpp
^
|
@@ -2788,6 +2788,29 @@
if (brw_reg_type_is_floating_point(inst->src[1].type))
break;
+ /* From the BDW PRM, Vol 2a, "mul - Multiply":
+ *
+ * "When multiplying integer datatypes, if src0 is DW and src1
+ * is W, irrespective of the destination datatype, the
+ * accumulator maintains full 48-bit precision."
+ * ...
+ * "When multiplying integer data types, if one of the sources
+ * is a DW, the resulting full precision data is stored in
+ * the accumulator."
+ *
+ * There are also similar notes in earlier PRMs.
+ *
+ * The MOV instruction can copy the bits of the source, but it
+ * does not clear the higher bits of the accumulator. So, because
+ * we might use the full accumulator in the MUL/MACH macro, we
+ * shouldn't replace such MULs with MOVs.
+ */
+ if ((brw_reg_type_to_size(inst->src[0].type) == 4 ||
+ brw_reg_type_to_size(inst->src[1].type) == 4) &&
+ (inst->dst.is_accumulator() ||
+ inst->writes_accumulator_implicitly(devinfo)))
+ break;
+
/* a * 1.0 = a */
if (inst->src[1].is_one()) {
inst->opcode = BRW_OPCODE_MOV;
@@ -6811,7 +6834,7 @@
static void
restore_instruction_order(struct cfg_t *cfg, fs_inst **inst_arr)
{
- int num_insts = cfg->last_block()->end_ip + 1;
+ ASSERTED int num_insts = cfg->last_block()->end_ip + 1;
int ip = 0;
foreach_block (block, cfg) {
@@ -7576,7 +7599,17 @@
case FRAG_DEPTH_LAYOUT_LESS:
return BRW_PSCDEPTH_ON_LE;
case FRAG_DEPTH_LAYOUT_UNCHANGED:
- return BRW_PSCDEPTH_OFF;
+ /* We initially set this to OFF, but having the shader write the
+ * depth means we allocate register space in the SEND message. The
+ * difference between the SEND register count and the OFF state
+ * programming makes the HW hang.
+ *
+ * Removing the depth writes also leads to test failures. So use
+ * LesserThanOrEqual, which fits writing the same value
+ * (unchanged/equal).
+ *
+ */
+ return BRW_PSCDEPTH_ON_LE;
}
}
return BRW_PSCDEPTH_OFF;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_fs_nir.cpp
^
|
@@ -361,6 +361,17 @@
invert = true;
cond_reg = get_nir_src(cond->src[0].src);
cond_reg = offset(cond_reg, bld, cond->src[0].swizzle[0]);
+
+ if (devinfo->ver <= 5 &&
+ (cond->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
+ /* redo boolean resolve on gen5 */
+ fs_reg masked = bld.vgrf(BRW_REGISTER_TYPE_D);
+ bld.AND(masked, cond_reg, brw_imm_d(1));
+ masked.negate = true;
+ fs_reg tmp = bld.vgrf(cond_reg.type);
+ bld.MOV(retype(tmp, BRW_REGISTER_TYPE_D), masked);
+ cond_reg = tmp;
+ }
} else {
invert = false;
cond_reg = get_nir_src(if_stmt->condition);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_nir.c
^
|
@@ -1753,14 +1753,6 @@
if (OPT(nir_opt_rematerialize_compares))
OPT(nir_opt_dce);
- /* This is the last pass we run before we start emitting stuff. It
- * determines when we need to insert boolean resolves on Gen <= 5. We
- * run it last because it stashes data in instr->pass_flags and we don't
- * want that to be squashed by other NIR passes.
- */
- if (devinfo->ver <= 5)
- brw_nir_analyze_boolean_resolves(nir);
-
OPT(nir_opt_dce);
/* The mesh stages require this pass to be called at the last minute,
@@ -1773,6 +1765,15 @@
brw_nir_adjust_payload(nir, compiler);
nir_trivialize_registers(nir);
+
+ /* This is the last pass we run before we start emitting stuff. It
+ * determines when we need to insert boolean resolves on Gen <= 5. We
+ * run it last because it stashes data in instr->pass_flags and we don't
+ * want that to be squashed by other NIR passes.
+ */
+ if (devinfo->ver <= 5)
+ brw_nir_analyze_boolean_resolves(nir);
+
nir_sweep(nir);
if (unlikely(debug_enabled)) {
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/compiler/brw_shader.cpp
^
|
@@ -29,6 +29,7 @@
#include "brw_vec4_tes.h"
#include "dev/intel_debug.h"
#include "util/macros.h"
+#include "util/u_debug.h"
enum brw_reg_type
brw_type_for_base_type(const struct glsl_type *type)
@@ -1243,7 +1244,7 @@
backend_shader::dump_instructions(const char *name) const
{
FILE *file = stderr;
- if (name && geteuid() != 0) {
+ if (name && __normal_user()) {
file = fopen(name, "w");
if (!file)
file = stderr;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/isl/isl.c
^
|
@@ -2548,19 +2548,42 @@
if (tile_info->tiling == ISL_TILING_GFX12_CCS)
base_alignment_B = MAX(base_alignment_B, 4096);
- /* Platforms using an aux map require that images be granularity-aligned
- * if they're going to used with CCS. This is because the Aux
- * translation table maps main surface addresses to aux addresses at a
- * granularity in the main surface. Because we don't know for sure in
- * ISL if a surface will use CCS, we have to guess based on the
- * DISABLE_AUX usage bit. The one thing we do know is that we haven't
- * enable CCS on linear images yet so we can avoid the extra alignment
- * there.
- */
if (dev->info->has_aux_map &&
!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
+ /* Wa_22015614752:
+ *
+ * Due to L3 cache being tagged with (engineID, vaID) and the CCS
+ * block/cacheline being 256 bytes, 2 engines accessing a 64Kb range
+ * with compression will generate 2 different CCS cacheline entries
+ * in L3, this will lead to corruptions. To avoid this, we need to
+ * ensure 2 images do not share a 256 bytes CCS cacheline. With a
+ * ratio of compression of 1/256, this is 64Kb alignment (even for
+ * Tile4...)
+ *
+ * ATS-M PRMS, Vol 2a: Command Reference: Instructions,
+ * XY_CTRL_SURF_COPY_BLT, "Size of Control Surface Copy" field, the
+ * CCS blocks are 256 bytes :
+ *
+ * "This field indicates size of the Control Surface or CCS copy.
+ * It is expressed in terms of number of 256B block of CCS, where
+ * each 256B block of CCS corresponds to 64KB of main surface."
+ */
+ if (intel_needs_workaround(dev->info, 22015614752)) {
+ base_alignment_B = MAX(base_alignment_B,
+ 256 /* cacheline */ * 256 /* AUX ratio */);
+ }
+
+ /* Platforms using an aux map require that images be
+ * granularity-aligned if they're going to used with CCS. This is
+ * because the Aux translation table maps main surface addresses to
+ * aux addresses at a granularity in the main surface. Because we
+ * don't know for sure in ISL if a surface will use CCS, we have to
+ * guess based on the DISABLE_AUX usage bit. The one thing we do know
+ * is that we haven't enable CCS on linear images yet so we can avoid
+ * the extra alignment there.
+ */
base_alignment_B = MAX(base_alignment_B, dev->info->verx10 >= 125 ?
- 1024 * 1024 : 64 * 1024);
+ 1024 * 1024 : 64 * 1024);
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_batch_chain.c
^
|
@@ -1016,27 +1016,9 @@
const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
if (cmd_buffer->device->physical->use_call_secondary) {
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN;
- /* If the secondary command buffer begins & ends in the same BO and
- * its length is less than the length of CS prefetch, add some NOOPs
- * instructions so the last MI_BATCH_BUFFER_START is outside the CS
- * prefetch.
- */
- if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
- const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
- /* Careful to have everything in signed integer. */
- int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
- int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start;
-
- for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
- anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop);
- }
void *jump_addr =
- anv_batch_emitn(&cmd_buffer->batch,
- GFX9_MI_BATCH_BUFFER_START_length,
- GFX9_MI_BATCH_BUFFER_START,
- .AddressSpaceIndicator = ASI_PPGTT,
- .SecondLevelBatchBuffer = Firstlevelbatch) +
+ anv_genX(devinfo, batch_emit_return)(&cmd_buffer->batch) +
(GFX9_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8);
cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr);
@@ -1156,18 +1138,10 @@
struct anv_batch_bo *first_bbo =
list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
- uint64_t *write_return_addr =
- anv_batch_emitn(&primary->batch,
- GFX9_MI_STORE_DATA_IMM_length + 1 /* QWord write */,
- GFX9_MI_STORE_DATA_IMM,
- .Address = secondary->return_addr)
- + (GFX9_MI_STORE_DATA_IMM_ImmediateData_start / 8);
-
- emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);
-
- *write_return_addr =
- anv_address_physical(anv_batch_address(&primary->batch,
- primary->batch.next));
+ anv_genX(primary->device->info, batch_emit_secondary_call)(
+ &primary->batch,
+ (struct anv_address) { .bo = first_bbo->bo },
+ secondary->return_addr);
anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
break;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_blorp.c
^
|
@@ -1793,7 +1793,7 @@
* experiment shows that flusing the data cache helps to resolve the
* corruption.
*/
- unsigned wa_flush = intel_device_info_is_dg2(cmd_buffer->device->info) ?
+ unsigned wa_flush = cmd_buffer->device->info->verx10 >= 125 ?
ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_device.c
^
|
@@ -77,7 +77,7 @@
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
DRI_CONF_VK_KHR_PRESENT_WAIT(false)
DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
- DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
+ DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
DRI_CONF_ANV_DISABLE_FCV(false)
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
@@ -1216,7 +1216,7 @@
/* Increase count below when other families are added as a reminder to
* increase the ANV_MAX_QUEUE_FAMILIES value.
*/
- STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 4);
+ STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 5);
} else {
/* Default to a single render queue */
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
@@ -1580,7 +1580,7 @@
instance->vk.app_info.engine_version);
instance->assume_full_subgroups =
- driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
+ driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
instance->limit_trig_input_range =
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
instance->sample_mask_out_opengl_behaviour =
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_genX.h
^
|
@@ -173,6 +173,12 @@
void genX(blorp_exec)(struct blorp_batch *batch,
const struct blorp_params *params);
+void genX(batch_emit_secondary_call)(struct anv_batch *batch,
+ struct anv_address secondary_addr,
+ struct anv_address secondary_return_addr);
+
+void *genX(batch_emit_return)(struct anv_batch *batch);
+
void genX(cmd_emit_timestamp)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address addr,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_image.c
^
|
@@ -2301,8 +2301,14 @@
if (!bo || !isl_aux_usage_has_ccs(image->planes[p].aux_usage))
continue;
- /* Do nothing if flat CCS requirements are satisfied. */
- if (device->info->has_flat_ccs && bo->vram_only)
+ /* Do nothing if flat CCS requirements are satisfied.
+ *
+ * Also, assume that imported BOs with a modifier including
+ * CCS live only in local memory. Otherwise the exporter should
+ * have failed the creation of the BO.
+ */
+ if (device->info->has_flat_ccs &&
+ (bo->vram_only || bo->is_external))
continue;
/* Add the plane to the aux map when applicable. */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_pipeline.c
^
|
@@ -771,7 +771,7 @@
}
if (stages[MESA_SHADER_MESH].info || stages[MESA_SHADER_TASK].info) {
- const bool afs = device->physical->instance->assume_full_subgroups;
+ const uint8_t afs = device->physical->instance->assume_full_subgroups;
_mesa_sha1_update(&ctx, &afs, sizeof(afs));
}
@@ -789,7 +789,7 @@
anv_pipeline_hash_common(&ctx, &pipeline->base);
- const bool afs = device->physical->instance->assume_full_subgroups;
+ const uint8_t afs = device->physical->instance->assume_full_subgroups;
_mesa_sha1_update(&ctx, &afs, sizeof(afs));
_mesa_sha1_update(&ctx, stage->shader_sha1,
@@ -1628,8 +1628,7 @@
static void
anv_pipeline_add_executables(struct anv_pipeline *pipeline,
- struct anv_pipeline_stage *stage,
- struct anv_shader_bin *bin)
+ struct anv_pipeline_stage *stage)
{
if (stage->stage == MESA_SHADER_FRAGMENT) {
/* We pull the prog data and stats out of the anv_shader_bin because
@@ -1637,8 +1636,8 @@
* looked up the shader in a cache.
*/
const struct brw_wm_prog_data *wm_prog_data =
- (const struct brw_wm_prog_data *)bin->prog_data;
- struct brw_compile_stats *stats = bin->stats;
+ (const struct brw_wm_prog_data *)stage->bin->prog_data;
+ struct brw_compile_stats *stats = stage->bin->stats;
if (wm_prog_data->dispatch_8) {
anv_pipeline_add_executable(pipeline, stage, stats++, 0);
@@ -1654,18 +1653,27 @@
wm_prog_data->prog_offset_32);
}
} else {
- anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
+ anv_pipeline_add_executable(pipeline, stage, stage->bin->stats, 0);
}
+}
+
+static void
+anv_pipeline_account_shader(struct anv_pipeline *pipeline,
+ struct anv_shader_bin *shader)
+{
+ pipeline->scratch_size = MAX2(pipeline->scratch_size,
+ shader->prog_data->total_scratch);
- pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
+ pipeline->ray_queries = MAX2(pipeline->ray_queries,
+ shader->prog_data->ray_queries);
- if (bin->push_desc_info.used_set_buffer) {
+ if (shader->push_desc_info.used_set_buffer) {
pipeline->use_push_descriptor_buffer |=
- BITFIELD_BIT(mesa_to_vk_shader_stage(stage->stage));
+ BITFIELD_BIT(mesa_to_vk_shader_stage(shader->stage));
}
- if (bin->push_desc_info.used_descriptors &
- ~bin->push_desc_info.fully_promoted_ubo_descriptors)
- pipeline->use_push_descriptor |= mesa_to_vk_shader_stage(stage->stage);
+ if (shader->push_desc_info.used_descriptors &
+ ~shader->push_desc_info.fully_promoted_ubo_descriptors)
+ pipeline->use_push_descriptor |= mesa_to_vk_shader_stage(shader->stage);
}
/* This function return true if a shader should not be looked at because of
@@ -1823,12 +1831,12 @@
int64_t stage_start = os_time_get_nano();
bool cache_hit;
- struct anv_shader_bin *bin =
+ stages[s].bin =
anv_device_search_for_kernel(device, cache, &stages[s].cache_key,
sizeof(stages[s].cache_key), &cache_hit);
- if (bin) {
+ if (stages[s].bin) {
found++;
- pipeline->shaders[s] = bin;
+ pipeline->shaders[s] = stages[s].bin;
}
if (cache_hit) {
@@ -1853,6 +1861,7 @@
if (stages[s].imported.bin == NULL)
continue;
+ stages[s].bin = stages[s].imported.bin;
pipeline->shaders[s] = anv_shader_bin_ref(stages[s].imported.bin);
imported++;
}
@@ -1868,8 +1877,12 @@
if (pipeline->shaders[s] == NULL)
continue;
- anv_pipeline_add_executables(&pipeline->base, &stages[s],
- pipeline->shaders[s]);
+ /* Only add the executables when we're not importing or doing link
+ * optimizations. The imported executables are added earlier. Link
+ * optimization can produce different binaries.
+ */
+ if (stages[s].imported.bin == NULL || link_optimize)
+ anv_pipeline_add_executables(&pipeline->base, &stages[s]);
pipeline->source_hashes[s] = stages[s].source_hash;
}
return true;
@@ -1991,7 +2004,9 @@
* a size.
*/
if (info->subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
- info->subgroup_size = BRW_SUBGROUP_SIZE;
+ info->subgroup_size =
+ device->physical->instance->assume_full_subgroups != 0 ?
+ device->physical->instance->assume_full_subgroups : BRW_SUBGROUP_SIZE;
}
static void
@@ -2331,7 +2346,6 @@
cur_info->patch_inputs_read |= prev_info->patch_outputs_written;
}
-
anv_fixup_subgroup_size(device, cur_info);
stage->feedback.duration += os_time_get_nano() - stage_start;
@@ -2436,7 +2450,7 @@
anv_nir_validate_push_layout(&stage->prog_data.base,
&stage->bind_map);
- struct anv_shader_bin *bin =
+ stage->bin =
anv_device_upload_kernel(device, cache, s,
&stage->cache_key,
sizeof(stage->cache_key),
@@ -2449,15 +2463,15 @@
&stage->bind_map,
&stage->push_desc_info,
stage->dynamic_push_values);
- if (!bin) {
+ if (!stage->bin) {
ralloc_free(stage_ctx);
result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
- anv_pipeline_add_executables(&pipeline->base, stage, bin);
+ anv_pipeline_add_executables(&pipeline->base, stage);
pipeline->source_hashes[s] = stage->source_hash;
- pipeline->shaders[s] = bin;
+ pipeline->shaders[s] = stage->bin;
ralloc_free(stage_ctx);
@@ -2481,7 +2495,6 @@
struct anv_pipeline_stage *stage = &stages[s];
- anv_pipeline_add_executables(&pipeline->base, stage, stage->imported.bin);
pipeline->source_hashes[s] = stage->source_hash;
pipeline->shaders[s] = anv_shader_bin_ref(stage->imported.bin);
}
@@ -2498,6 +2511,8 @@
struct anv_pipeline_stage *stage = &stages[s];
pipeline->feedback_index[s] = stage->feedback_idx;
pipeline->robust_flags[s] = stage->robust_flags;
+
+ anv_pipeline_account_shader(&pipeline->base, pipeline->shaders[s]);
}
pipeline_feedback->duration = os_time_get_nano() - pipeline_start;
@@ -2550,8 +2565,6 @@
};
anv_stage_write_shader_hash(&stage, device);
- struct anv_shader_bin *bin = NULL;
-
populate_cs_prog_key(&stage, device);
const bool skip_cache_lookup =
@@ -2561,18 +2574,18 @@
bool cache_hit = false;
if (!skip_cache_lookup) {
- bin = anv_device_search_for_kernel(device, cache,
- &stage.cache_key,
- sizeof(stage.cache_key),
- &cache_hit);
+ stage.bin = anv_device_search_for_kernel(device, cache,
+ &stage.cache_key,
+ sizeof(stage.cache_key),
+ &cache_hit);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/anv_private.h
^
|
@@ -805,7 +805,7 @@
enum intel_engine_class engine_class;
};
-#define ANV_MAX_QUEUE_FAMILIES 4
+#define ANV_MAX_QUEUE_FAMILIES 5
struct anv_memory_type {
/* Standard bits passed on to the client */
@@ -1022,7 +1022,7 @@
/**
* Workarounds for game bugs.
*/
- bool assume_full_subgroups;
+ uint8_t assume_full_subgroups;
bool limit_trig_input_range;
bool sample_mask_out_opengl_behaviour;
bool fp64_workaround_enabled;
@@ -1342,6 +1342,7 @@
bool RenderingDisable;
uint32_t RenderStreamSelect;
uint32_t ReorderMode;
+ uint32_t ForceRendering;
} so;
/* 3DSTATE_SAMPLE_MASK */
@@ -2825,215 +2826,43 @@
/* PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode.
* For more details see genX(emit_apply_pipe_flushes).
+ *
+ * Documentation says that untyped L1 dataport cache flush is controlled by
+ * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register:
+ *
+ * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
+ *
+ * "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to
+ * "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1
+ * cache based on the programming of HDC_Chicken0 register bits 13:11."
+ *
+ * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1
+ * cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the
+ * PIPE_CONTROL command."
+ *
+ * As part of Wa_22010960976 & Wa_14013347512, i915 is programming
+ * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol
+ * Dataport flush, and UAV coherency barrier event"). So there is no need
+ * to set "Untyped Data-Port Cache" in 3D mode.
+ *
+ * On MTL the HDC_CHICKEN0 default values changed to match what was programmed
+ * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the
+ * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0
+ * register to force L1 untyped flush with HDC pipeline flush has no effect on
+ * MTL.
+ *
+ * It seems like the HW change completely disconnected L1 untyped flush from
+ * HDC pipeline flush with no way to bring that behavior back. So leave the L1
+ * untyped flush active in 3D mode on all platforms since it doesn't seems to
+ * cause issues there too.
+ *
+ * Maybe we'll have some GPGPU only bits here at some point.
*/
-#define ANV_PIPE_GPGPU_BITS ( \
- (GFX_VERx10 >= 125 ? ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT : 0))
+#define ANV_PIPE_GPGPU_BITS (0)
enum intel_ds_stall_flag
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
-static inline enum anv_pipe_bits
-anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
- VkAccessFlags2 flags)
-{
- enum anv_pipe_bits pipe_bits = 0;
-
- u_foreach_bit64(b, flags) {
- switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
- case VK_ACCESS_2_SHADER_WRITE_BIT:
- case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
- case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
- /* We're transitioning a buffer that was previously used as write
- * destination through the data port. To make its content available
- * to future operations, flush the hdc pipeline.
- */
- pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
- pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
- break;
- case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
- /* We're transitioning a buffer that was previously used as render
- * target. To make its content available to future operations, flush
- * the render target cache.
- */
- pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
- break;
- case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
- /* We're transitioning a buffer that was previously used as depth
- * buffer. To make its content available to future operations, flush
- * the depth cache.
- */
- pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
- break;
- case VK_ACCESS_2_TRANSFER_WRITE_BIT:
- /* We're transitioning a buffer that was previously used as a
- * transfer write destination. Generic write operations include color
- * & depth operations as well as buffer operations like :
- * - vkCmdClearColorImage()
- * - vkCmdClearDepthStencilImage()
- * - vkCmdBlitImage()
- * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
- *
- * Most of these operations are implemented using Blorp which writes
- * through the render target, so flush that cache to make it visible
- * to future operations. And for depth related operations we also
- * need to flush the depth cache.
- */
- pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
- pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
- break;
- case VK_ACCESS_2_MEMORY_WRITE_BIT:
- /* We're transitioning a buffer for generic write operations. Flush
- * all the caches.
- */
- pipe_bits |= ANV_PIPE_FLUSH_BITS;
- break;
- case VK_ACCESS_2_HOST_WRITE_BIT:
- /* We're transitioning a buffer for access by CPU. Invalidate
- * all the caches. Since data and tile caches don't have invalidate,
- * we are forced to flush those as well.
- */
- pipe_bits |= ANV_PIPE_FLUSH_BITS;
- pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
- break;
- case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
- case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
- /* We're transitioning a buffer written either from VS stage or from
- * the command streamer (see CmdEndTransformFeedbackEXT), we just
- * need to stall the CS.
- *
- * Streamout writes apparently bypassing L3, in order to make them
- * visible to the destination, we need to invalidate the other
- * caches.
- */
- pipe_bits |= ANV_PIPE_CS_STALL_BIT | ANV_PIPE_INVALIDATE_BITS;
- break;
- default:
- break; /* Nothing to do */
- }
- }
-
- return pipe_bits;
-}
-
-static inline enum anv_pipe_bits
-anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
- VkAccessFlags2 flags)
-{
- enum anv_pipe_bits pipe_bits = 0;
-
- u_foreach_bit64(b, flags) {
- switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
- case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
- /* Indirect draw commands take a buffer as input that we're going to
- * read from the command streamer to load some of the HW registers
- * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
- * command streamer stall so that all the cache flushes have
- * completed before the command streamer loads from memory.
- */
- pipe_bits |= ANV_PIPE_CS_STALL_BIT;
- /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
- * through a vertex buffer, so invalidate that cache.
- */
- pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
- /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
- * UBO from the buffer, so we need to invalidate constant cache.
- */
- pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
- pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
- /* Tile cache flush needed For CmdDipatchIndirect since command
- * streamer and vertex fetch aren't L3 coherent.
- */
- pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
- break;
- case VK_ACCESS_2_INDEX_READ_BIT:
- case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
- /* We transitioning a buffer to be used for as input for vkCmdDraw*
- * commands, so we invalidate the VF cache to make sure there is no
- * stale data when we start rendering.
- */
- pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
- break;
- case VK_ACCESS_2_UNIFORM_READ_BIT:
- case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR:
- /* We transitioning a buffer to be used as uniform data. Because
- * uniform is accessed through the data port & sampler, we need to
- * invalidate the texture cache (sampler) & constant cache (data
- * port) to avoid stale data.
- */
- pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
- if (device->physical->compiler->indirect_ubos_use_sampler) {
- pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_cmd_buffer.c
^
|
@@ -399,6 +399,23 @@
anv_image_hiz_op(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
0, base_layer, layer_count, ISL_AUX_OP_AMBIGUATE);
}
+
+#if GFX_VER == 12
+ /* Depth/Stencil writes by the render pipeline to D16 & S8 formats use a
+ * different pairing bit for the compression cache line. This means that
+ * there is potential for aliasing with the wrong cache if you use another
+ * format OR a piece of HW that does not use the same pairing. To avoid
+ * this, flush the tile cache as the compression data does not live in the
+ * color/depth cache.
+ */
+ if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_HIZ_CCS &&
+ final_needs_depth && !initial_depth_valid &&
+ anv_image_format_is_d16_or_s8(image)) {
+ anv_add_pending_pipe_bits(cmd_buffer,
+ ANV_PIPE_TILE_CACHE_FLUSH_BIT,
+ "D16 or S8 HIZ-CCS flush");
+ }
+#endif
}
/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
@@ -454,6 +471,19 @@
clear_rect, 0 /* Stencil clear value */);
}
}
+
+ /* Depth/Stencil writes by the render pipeline to D16 & S8 formats use a
+ * different pairing bit for the compression cache line. This means that
+ * there is potential for aliasing with the wrong cache if you use another
+ * format OR a piece of HW that does not use the same pairing. To avoid
+ * this, flush the tile cache as the compression data does not live in the
+ * color/depth cache.
+ */
+ if (anv_image_format_is_d16_or_s8(image)) {
+ anv_add_pending_pipe_bits(cmd_buffer,
+ ANV_PIPE_TILE_CACHE_FLUSH_BIT,
+ "D16 or S8 HIZ-CCS flush");
+ }
#endif
}
@@ -868,7 +898,7 @@
0 : src_queue_family].queueFlags;
const VkQueueFlagBits dst_queue_flags =
device->physical->queue.families[
- (dst_queue_external || src_queue_family == VK_QUEUE_FAMILY_IGNORED) ?
+ (dst_queue_external || dst_queue_family == VK_QUEUE_FAMILY_IGNORED) ?
0 : dst_queue_family].queueFlags;
/* Simultaneous acquire and release on external queues is illegal. */
@@ -915,7 +945,7 @@
dst_queue_family != VK_QUEUE_FAMILY_IGNORED &&
src_queue_family != dst_queue_family) {
enum intel_engine_class src_engine =
- cmd_buffer->queue_family[src_queue_family].engine_class;
+ cmd_buffer->queue_family->engine_class;
if (src_engine != INTEL_ENGINE_CLASS_RENDER)
return;
}
@@ -1514,36 +1544,20 @@
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
#if GFX_VERx10 >= 125
- /* BSpec 47112: PIPE_CONTROL::Untyped Data-Port Cache Flush:
- *
- * "'HDC Pipeline Flush' bit must be set for this bit to take
- * effect."
- *
- * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
- *
- * "When the "Pipeline Select" mode in PIPELINE_SELECT command is
- * set to "3D", HDC Pipeline Flush can also flush/invalidate the
- * LSC Untyped L1 cache based on the programming of HDC_Chicken0
- * register bits 13:11."
- *
- * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC
- * Untyped L1 cache flush is controlled by 'Untyped Data-Port
- * Cache Flush' bit in the PIPE_CONTROL command."
- *
- * As part of Wa_1608949956 & Wa_14010198302, i915 is programming
- * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D
- * Pipecontrol Dataport flush, and UAV coherency barrier event").
- * So there is no need to set "Untyped Data-Port Cache" in 3D
- * mode.
- */
if (current_pipeline != GPGPU) {
- flush_bits &= ~ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
+ if (flush_bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT)
+ flush_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
} else {
if (flush_bits & (ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
ANV_PIPE_DATA_CACHE_FLUSH_BIT))
flush_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
}
+ /* BSpec 47112: PIPE_CONTROL::Untyped Data-Port Cache Flush:
+ *
+ * "'HDC Pipeline Flush' bit must be set for this bit to take
+ * effect."
+ */
if (flush_bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT)
flush_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
#endif
@@ -3295,6 +3309,19 @@
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
+#if GFX_VER >= 12
+ /* Reenable prefetching at the beginning of secondary command buffers. We
+ * do this so that the return instruction edition is not prefetched before
+ * completion.
+ */
+ if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
+ arb.PreParserDisableMask = true;
+ arb.PreParserDisable = false;
+ }
+ }
+#endif
+
trace_intel_begin_cmd_buffer(&cmd_buffer->trace);
if (anv_cmd_buffer_is_video_queue(cmd_buffer) ||
@@ -3722,6 +3749,232 @@
}
}
+static inline enum anv_pipe_bits
+anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
+ VkAccessFlags2 flags)
+{
+ enum anv_pipe_bits pipe_bits = 0;
+
+ u_foreach_bit64(b, flags) {
+ switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
+ case VK_ACCESS_2_SHADER_WRITE_BIT:
+ case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
+ case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
+ /* We're transitioning a buffer that was previously used as write
+ * destination through the data port. To make its content available
+ * to future operations, flush the hdc pipeline.
+ */
+ pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
+ pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
+ break;
+ case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as render
+ * target. To make its content available to future operations, flush
+ * the render target cache.
+ */
+ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+ break;
+ case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as depth
+ * buffer. To make its content available to future operations, flush
+ * the depth cache.
+ */
+ pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
+ break;
+ case VK_ACCESS_2_TRANSFER_WRITE_BIT:
+ /* We're transitioning a buffer that was previously used as a
+ * transfer write destination. Generic write operations include color
+ * & depth operations as well as buffer operations like :
+ * - vkCmdClearColorImage()
+ * - vkCmdClearDepthStencilImage()
+ * - vkCmdBlitImage()
+ * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
+ *
+ * Most of these operations are implemented using Blorp which writes
+ * through the render target cache or the depth cache on the graphics
+ * queue. On the compute queue, the writes are done through the data
+ * port.
+ */
+ if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
+ pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
+ pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
+ } else {
+ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+ pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
+ }
+ break;
+ case VK_ACCESS_2_MEMORY_WRITE_BIT:
+ /* We're transitioning a buffer for generic write operations. Flush
+ * all the caches.
+ */
+ pipe_bits |= ANV_PIPE_FLUSH_BITS;
+ break;
+ case VK_ACCESS_2_HOST_WRITE_BIT:
+ /* We're transitioning a buffer for access by CPU. Invalidate
+ * all the caches. Since data and tile caches don't have invalidate,
+ * we are forced to flush those as well.
+ */
+ pipe_bits |= ANV_PIPE_FLUSH_BITS;
+ pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
+ break;
+ case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
+ case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_gfx_state.c
^
|
@@ -354,8 +354,8 @@
SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
-#if INTEL_NEEDS_WA_14017076903
- /* Wa_14017076903 :
+#if INTEL_NEEDS_WA_18022508906
+ /* Wa_18022508906 :
*
* SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
*
@@ -383,8 +383,9 @@
* Here we force rendering to get SOL_INT::Render_Enable when occlusion
* queries are active.
*/
- if (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0)
- SET(STREAMOUT, so.ForceRendering, Force_on);
+ SET(STREAMOUT, so.ForceRendering,
+ (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
+ Force_on : 0);
#endif
switch (dyn->rs.provoking_vertex) {
@@ -1304,6 +1305,7 @@
SET(so, so, RenderingDisable);
SET(so, so, RenderStreamSelect);
SET(so, so, ReorderMode);
+ SET(so, so, ForceRendering);
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_gpu_memcpy.c
^
|
@@ -272,7 +272,7 @@
void
genX(emit_so_memcpy_end)(struct anv_memcpy_state *state)
{
- if (intel_device_info_is_dg2(state->device->info))
+ if (intel_needs_workaround(state->device->info, 16013994831))
genX(batch_set_preemption)(state->batch, state->device->info, true);
anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan/genX_video.c
^
|
@@ -551,7 +551,8 @@
cum += pps->column_width_minus1[4 * i + 2] + 1;
tile.ColumnPosition[i].CtbPos3i = cum;
- if ((4 * i + 3) == pps->num_tile_columns_minus1)
+ if ((4 * i + 3) >= MIN2(pps->num_tile_columns_minus1,
+ ARRAY_SIZE(pps->column_width_minus1)))
break;
cum += pps->column_width_minus1[4 * i + 3] + 1;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan_hasvk/anv_device.c
^
|
@@ -67,7 +67,7 @@
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
- DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
+ DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
DRI_CONF_NO_16BIT(false)
DRI_CONF_SECTION_END
@@ -1324,7 +1324,7 @@
instance->vk.app_info.engine_version);
instance->assume_full_subgroups =
- driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
+ driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
instance->limit_trig_input_range =
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
instance->sample_mask_out_opengl_behaviour =
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan_hasvk/anv_pipeline.c
^
|
@@ -472,7 +472,7 @@
const bool rba = device->vk.enabled_features.robustBufferAccess;
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
- const bool afs = device->physical->instance->assume_full_subgroups;
+ const uint8_t afs = device->physical->instance->assume_full_subgroups;
_mesa_sha1_update(&ctx, &afs, sizeof(afs));
_mesa_sha1_update(&ctx, stage->shader_sha1,
@@ -1581,7 +1581,9 @@
* a size.
*/
if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
- stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE;
+ stage.nir->info.subgroup_size =
+ device->physical->instance->assume_full_subgroups != 0 ?
+ device->physical->instance->assume_full_subgroups : BRW_SUBGROUP_SIZE;
stage.num_stats = 1;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/intel/vulkan_hasvk/anv_private.h
^
|
@@ -944,7 +944,7 @@
/**
* Workarounds for game bugs.
*/
- bool assume_full_subgroups;
+ uint8_t assume_full_subgroups;
bool limit_trig_input_range;
bool sample_mask_out_opengl_behaviour;
float lower_depth_range_rate;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/loader/loader.c
^
|
@@ -675,7 +675,7 @@
* user's problem, but this allows vc4 simulator to run on an i965 host,
* and may be useful for some touch testing of i915 on an i965 host.
*/
- if (geteuid() == getuid()) {
+ if (__normal_user()) {
const char *override = os_get_option("MESA_LOADER_DRIVER_OVERRIDE");
if (override)
return strdup(override);
@@ -780,7 +780,7 @@
const char *search_paths, *next, *end;
search_paths = NULL;
- if (geteuid() == getuid() && search_path_vars) {
+ if (__normal_user() && search_path_vars) {
for (int i = 0; search_path_vars[i] != NULL; i++) {
search_paths = getenv(search_path_vars[i]);
if (search_paths)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/get_hash_params.py
^
|
@@ -494,6 +494,10 @@
# GL_EXT_framebuffer_EXT / GLES 3.0 + EXT_sRGB_write_control
[ "FRAMEBUFFER_SRGB_EXT", "CONTEXT_BOOL(Color.sRGBEnabled), extra_EXT_framebuffer_sRGB" ],
+
+# GL_ARB_cull_distance, GL_EXT_clip_cull_distance
+ [ "MAX_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ],
+ [ "MAX_COMBINED_CLIP_AND_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ],
]},
{ "apis": ["GLES", "GLES2"], "params": [
@@ -1009,10 +1013,6 @@
[ "GPU_MEMORY_INFO_EVICTION_COUNT_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
[ "GPU_MEMORY_INFO_EVICTED_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
-# GL_ARB_cull_distance
- [ "MAX_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ],
- [ "MAX_COMBINED_CLIP_AND_CULL_DISTANCES", "CONTEXT_INT(Const.MaxClipPlanes), extra_ARB_cull_distance" ],
-
# GL_ARB_compute_variable_group_size
[ "MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB", "CONTEXT_INT(Const.MaxComputeVariableGroupInvocations), extra_ARB_compute_variable_group_size" ],
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/glthread_draw.c
^
|
@@ -813,7 +813,8 @@
* Others prevent syncing, such as disallowing buffer objects because we
* can't map them without syncing.
*/
- return util_is_vbo_upload_ratio_too_large(count, num_upload_vertices) &&
+ return ctx->API == API_OPENGL_COMPAT &&
+ util_is_vbo_upload_ratio_too_large(count, num_upload_vertices) &&
instance_count == 1 && /* no instancing */
vao->CurrentElementBufferName == 0 && /* only user indices */
!ctx->GLThread._PrimitiveRestart && /* no primitive restart */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/shaderapi.c
^
|
@@ -177,7 +177,7 @@
static const char *path = NULL;
if (!read_env_var) {
- path = getenv("MESA_SHADER_CAPTURE_PATH");
+ path = secure_getenv("MESA_SHADER_CAPTURE_PATH");
read_env_var = true;
#if ANDROID_SHADER_CAPTURE
@@ -1971,7 +1971,7 @@
if (!path_exists)
return;
- dump_path = getenv("MESA_SHADER_DUMP_PATH");
+ dump_path = secure_getenv("MESA_SHADER_DUMP_PATH");
if (!dump_path) {
path_exists = false;
return;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/mesa/main/texobj.c
^
|
@@ -817,7 +817,8 @@
return;
}
if (t->Image[face][baseLevel]->InternalFormat !=
- baseImage->InternalFormat) {
+ baseImage->InternalFormat ||
+ t->Image[face][baseLevel]->TexFormat != baseImage->TexFormat) {
incomplete(t, BASE, "Cube face format mismatch");
return;
}
@@ -876,7 +877,8 @@
incomplete(t, MIPMAP, "TexImage[%d] is missing", i);
return;
}
- if (img->InternalFormat != baseImage->InternalFormat) {
+ if (img->InternalFormat != baseImage->InternalFormat ||
+ img->TexFormat != baseImage->TexFormat) {
incomplete(t, MIPMAP, "Format[i] != Format[baseLevel]");
return;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/microsoft/compiler/nir_to_dxil.c
^
|
@@ -119,7 +119,6 @@
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_high = true,
- .lower_rotate = true,
.lower_pack_half_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_snorm_4x8 = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/microsoft/vulkan/dzn_device.c
^
|
@@ -2806,7 +2806,7 @@
if (!device->dev13)
goto cleanup;
- if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, host_pointer, heap_desc.SizeInBytes, &IID_ID3D12Heap, &mem->heap)))
+ if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, host_pointer, heap_desc.SizeInBytes, &IID_ID3D12Heap, (void**)&mem->heap)))
goto cleanup;
D3D12_HEAP_DESC desc = dzn_ID3D12Heap_GetDesc(mem->heap);
@@ -3920,7 +3920,7 @@
return VK_ERROR_FEATURE_NOT_PRESENT;
ID3D12Heap *heap;
- if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, pHostPointer, 1, &IID_ID3D12Heap, &heap)))
+ if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, pHostPointer, 1, &IID_ID3D12Heap, (void **)&heap)))
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/nouveau/codegen/nv50_ir_from_nir.cpp
^
|
@@ -3465,7 +3465,7 @@
op.unify_interfaces = false;
op.use_interpolated_input_intrinsics = true;
op.lower_mul_2x32_64 = true; // TODO
- op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
+ op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET);
op.has_imul24 = false;
op.has_fmulz = (chipset > NVISA_G80_CHIPSET);
op.intel_vec4 = false;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/nouveau/vulkan/nvk_image.c
^
|
@@ -187,7 +187,7 @@
{
VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
- const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
+ const VkPhysicalDeviceExternalImageFormatInfo *external_info =
vk_find_struct_const(pImageFormatInfo->pNext,
PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/ci/panfrost-g52-fails.txt
^
|
@@ -526,17 +526,6 @@
dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.int32,Crash
dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.uint32,Crash
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_uint_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_unorm_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_sint_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_uint_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_unorm_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sfloat,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_uint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_sint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_uint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_unorm,Fail
dEQP-VK.api.command_buffers.record_many_draws_secondary_2,Fail
dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.b5g6r5_unorm_pack16.r16_snorm.general_general,Fail
dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8_snorm.r8_uint.general_general,Fail
@@ -550,24 +539,6 @@
dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8g8_unorm.r16_sint.general_general,Fail
dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8g8_unorm.r5g6b5_unorm_pack16.general_general,Fail
dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.2d.r8g8_unorm.r8g8_unorm.general_optimal,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.mix_1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.mix_array0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_2,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_4,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_5,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array2,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_2,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_4,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_5,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array2,Fail
dEQP-VK.glsl.operator.sequence.no_side_effects.highp_bool_vec2_fragment,Fail
dEQP-VK.glsl.operator.sequence.no_side_effects.highp_float_uint_fragment,Fail
dEQP-VK.glsl.operator.sequence.no_side_effects.highp_vec4_ivec4_bvec4_fragment,Fail
@@ -580,27 +551,6 @@
dEQP-VK.glsl.operator.sequence.side_effects.mediump_bool_vec2_fragment,Fail
dEQP-VK.glsl.operator.sequence.side_effects.mediump_float_uint_fragment,Fail
dEQP-VK.glsl.operator.sequence.side_effects.mediump_vec4_fragment,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_storage_read,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_uniform_read,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_write.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_write.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_write.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_3_texels,Fail
dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_linear,Fail
dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_nearest,Fail
dEQP-VK.texture.explicit_lod.2d.derivatives.linear_nearest_mipmap_linear,Fail
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/compiler/bifrost_compile.h
^
|
@@ -55,7 +55,6 @@
.lower_bitfield_insert = true, \
.lower_bitfield_extract = true, \
.lower_insert_byte = true, \
- .lower_rotate = true, \
\
/* Vertex ID is zero based in the traditional geometry flows, but not in \
* the memory-allocated IDVS flow introduced and used exclusively in \
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/midgard/midgard_compile.h
^
|
@@ -73,7 +73,6 @@
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_ldexp = true,
- .lower_rotate = true,
.lower_pack_half_2x16 = true,
.lower_pack_unorm_2x16 = true,
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_private.h
^
|
@@ -397,7 +397,6 @@
unsigned num_dyn_ubos;
unsigned num_dyn_ssbos;
uint32_t num_imgs;
- uint32_t num_sets;
struct {
uint32_t size;
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_cmd_buffer.c
^
|
@@ -514,7 +514,7 @@
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
- for (unsigned s = 0; s < pipeline->layout->num_sets; s++) {
+ for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
const struct panvk_descriptor_set *set = desc_state->sets[s];
if (!set)
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/panfrost/vulkan/panvk_vX_device.c
^
|
@@ -117,7 +117,7 @@
}
if (debug & PANVK_DEBUG_TRACE)
- pandecode_next_frame(0);
+ pandecode_next_frame(pdev->decode_ctx);
batch->issued = true;
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/00-mesa-defaults.conf
^
|
@@ -960,6 +960,10 @@
<application name="Waterfox" executable="waterfox">
<option name="no_fp16" value="true" />
</application>
+ <!-- Game does not consider larger image count in non-vsynced modes. -->
+ <application name="Detroit Become Human" application_name_match="DetroitBecomeHuman">
+ <option name="vk_x11_strict_image_count" value="true" />
+ </application>
</device>
<!-- vmwgfx doesn't like full buffer swaps and can't sync to vertical retraces.-->
<device driver="vmwgfx">
@@ -1114,10 +1118,10 @@
</device>
<device driver="anv">
<application name="Aperture Desk Job" executable="deskjob">
- <option name="anv_assume_full_subgroups" value="true" />
+ <option name="anv_assume_full_subgroups" value="32" />
</application>
<application name="DOOMEternal" executable="DOOMEternalx64vk.exe">
- <option name="anv_assume_full_subgroups" value="true" />
+ <option name="anv_assume_full_subgroups" value="32" />
<option name="fp64_workaround_enabled" value="true" />
</application>
<application name="Wolfenstein: Youngblood(x64vk)" executable="Youngblood_x64vk.exe">
@@ -1166,6 +1170,12 @@
<application name="DEATH STRANDING" executable="ds.exe">
<option name="force_vk_vendor" value="-1" />
</application>
+ <application name="Baldur's Gate 3" executable="bg3.exe">
+ <option name="anv_disable_fcv" value="true" />
+ </application>
+ <application name="The Finals" executable="Discovery.exe">
+ <option name="force_vk_vendor" value="-1" />
+ </application>
<!--
Disable 16-bit feature on zink and angle so that GLES mediump doesn't
lower to our inefficent 16-bit shader support. No need to do so for
@@ -1178,6 +1188,7 @@
<!-- Disable FCV optimization for Unreal Engine 5.1 workloads. -->
<engine engine_name_match="UnrealEngine5.1">
<option name="anv_disable_fcv" value="true" />
+ <option name="anv_assume_full_subgroups" value="16" />
</engine>
</device>
<device driver="dzn">
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/00-radv-defaults.conf
^
|
@@ -104,6 +104,7 @@
<application name="DOOM Eternal" application_name_match="DOOMEternal">
<option name="radv_zero_vram" value="true" />
+ <option name="radv_force_active_accel_struct_leaves" value="true" />
</application>
<application name="No Man's Sky" application_name_match="No Man's Sky">
@@ -148,6 +149,14 @@
<option name="radv_invariant_geom" value="true"/>
</application>
+ <application name="Crysis 2 Remastered" executable="Crysis2Remastered.exe">
+ <option name="radv_ssbo_non_uniform" value="true" />
+ </application>
+
+ <application name="Crysis 3 Remastered" executable="Crysis3Remastered.exe">
+ <option name="radv_ssbo_non_uniform" value="true" />
+ </application>
+
<!-- OpenGL Game workarounds (zink) -->
<application name="Black Geyser: Couriers of Darkness" executable="BlackGeyser.x86_64">
<option name="radv_zero_vram" value="true" />
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/detect_arch.h
^
|
@@ -97,6 +97,10 @@
#define DETECT_ARCH_MIPS 1
#endif
+#if defined(__hppa__)
+#define DETECT_ARCH_HPPA 1
+#endif
+
#ifndef DETECT_ARCH_X86
#define DETECT_ARCH_X86 0
#endif
@@ -137,4 +141,8 @@
#define DETECT_ARCH_MIPS 0
#endif
+#ifndef DETECT_ARCH_HPPA
+#define DETECT_ARCH_HPPA 0
+#endif
+
#endif /* UTIL_DETECT_ARCH_H_ */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/disk_cache_os.c
^
|
@@ -33,6 +33,7 @@
#include "util/compress.h"
#include "util/crc32.h"
+#include "util/u_debug.h"
#include "util/disk_cache.h"
#include "util/disk_cache_os.h"
@@ -850,10 +851,10 @@
else if (cache_type == DISK_CACHE_DATABASE)
cache_dir_name = CACHE_DIR_NAME_DB;
- char *path = getenv("MESA_SHADER_CACHE_DIR");
+ char *path = secure_getenv("MESA_SHADER_CACHE_DIR");
if (!path) {
- path = getenv("MESA_GLSL_CACHE_DIR");
+ path = secure_getenv("MESA_GLSL_CACHE_DIR");
if (path)
fprintf(stderr,
"*** MESA_GLSL_CACHE_DIR is deprecated; "
@@ -870,7 +871,7 @@
}
if (path == NULL) {
- char *xdg_cache_home = getenv("XDG_CACHE_HOME");
+ char *xdg_cache_home = secure_getenv("XDG_CACHE_HOME");
if (xdg_cache_home) {
if (mkdir_if_needed(xdg_cache_home) == -1)
@@ -940,7 +941,7 @@
return false;
/* If running as a users other than the real user disable cache */
- if (geteuid() != getuid())
+ if (!__normal_user())
return false;
/* At user request, disable shader cache entirely. */
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/driconf.h
^
|
@@ -674,6 +674,10 @@
DRI_CONF_OPT_B(radv_tex_non_uniform, def, \
"Always mark texture sample operations as non-uniform.")
+#define DRI_CONF_RADV_SSBO_NON_UNIFORM(def) \
+ DRI_CONF_OPT_B(radv_ssbo_non_uniform, def, \
+ "Always mark SSBO operations as non-uniform.")
+
#define DRI_CONF_RADV_FLUSH_BEFORE_TIMESTAMP_WRITE(def) \
DRI_CONF_OPT_B(radv_flush_before_timestamp_write, def, \
"Wait for previous commands to finish before writing timestamps")
@@ -684,13 +688,17 @@
#define DRI_CONF_RADV_APP_LAYER() DRI_CONF_OPT_S_NODEF(radv_app_layer, "Select an application layer.")
+#define DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(def) \
+ DRI_CONF_OPT_B(radv_force_active_accel_struct_leaves, def, \
+ "Force leaf nodes of acceleration structures to be marked active.")
+
/**
* \brief ANV specific configuration options
*/
#define DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(def) \
- DRI_CONF_OPT_B(anv_assume_full_subgroups, def, \
- "Allow assuming full subgroups requirement even when it's not specified explicitly")
+ DRI_CONF_OPT_I(anv_assume_full_subgroups, def, 0, 32, \
+ "Allow assuming full subgroups requirement even when it's not specified explicitly and set the given size")
#define DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(def) \
DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/hash_table.c
^
|
@@ -427,8 +427,7 @@
}
static struct hash_entry *
-hash_table_insert(struct hash_table *ht, uint32_t hash,
- const void *key, void *data)
+hash_table_get_entry(struct hash_table *ht, uint32_t hash, const void *key)
{
struct hash_entry *available_entry = NULL;
@@ -469,11 +468,8 @@
*/
if (!entry_is_deleted(ht, entry) &&
entry->hash == hash &&
- ht->key_equals_function(key, entry->key)) {
- entry->key = key;
- entry->data = data;
+ ht->key_equals_function(key, entry->key))
return entry;
- }
hash_address += double_hash;
if (hash_address >= size)
@@ -484,8 +480,6 @@
if (entry_is_deleted(ht, available_entry))
ht->deleted_entries--;
available_entry->hash = hash;
- available_entry->key = key;
- available_entry->data = data;
ht->entries++;
return available_entry;
}
@@ -496,6 +490,20 @@
return NULL;
}
+static struct hash_entry *
+hash_table_insert(struct hash_table *ht, uint32_t hash,
+ const void *key, void *data)
+{
+ struct hash_entry *entry = hash_table_get_entry(ht, hash, key);
+
+ if (entry) {
+ entry->key = key;
+ entry->data = data;
+ }
+
+ return entry;
+}
+
/**
* Inserts the key with the given hash into the table.
*
@@ -769,6 +777,13 @@
#define FREED_KEY_VALUE 0
+static void _mesa_hash_table_u64_delete_keys(void *data)
+{
+ struct hash_table_u64 *ht = ralloc_parent(data);
+
+ _mesa_hash_table_u64_clear(ht);
+}
+
struct hash_table_u64 *
_mesa_hash_table_u64_create(void *mem_ctx)
{
@@ -785,6 +800,31 @@
} else {
ht->table = _mesa_hash_table_create(ht, key_u64_hash,
key_u64_equals);
+
+ /* Allocate a ralloc sub-context which takes the u64 hash table
+ * as a parent and attach a destructor to it so we can free the
+ * hash_key_u64 objects that were allocated by
+ * _mesa_hash_table_u64_insert().
+ *
+ * The order of creation of this sub-context is crucial: it needs
+ * to happen after the _mesa_hash_table_create() call to guarantee
+ * that the destructor is called before ht->table and its children
+ * are freed, otherwise the _mesa_hash_table_u64_clear() call in the
+ * destructor leads to a use-after-free situation.
+ */
+ if (ht->table) {
+ void *dummy_ctx = ralloc_context(ht);
+
+ /* If we can't allocate a sub-context, free the hash table
+ * immediately and return NULL to avoid future leaks.
+ */
+ if (!dummy_ctx) {
+ ralloc_free(ht);
+ return NULL;
+ }
+
+ ralloc_set_destructor(dummy_ctx, _mesa_hash_table_u64_delete_keys);
+ }
}
if (ht->table)
@@ -802,7 +842,7 @@
struct hash_key_u64 *_key = (struct hash_key_u64 *)entry->key;
if (_key)
- free(_key);
+ FREE(_key);
}
void
@@ -847,7 +887,19 @@
return;
_key->value = key;
- _mesa_hash_table_insert(ht->table, _key, data);
+ struct hash_entry *entry =
+ hash_table_get_entry(ht->table, key_u64_hash(_key), _key);
+
+ if (!entry) {
+ FREE(_key);
+ return;
+ }
+
+ entry->data = data;
+ if (!entry_is_present(ht->table, entry))
+ entry->key = _key;
+ else
+ FREE(_key);
}
}
@@ -905,6 +957,6 @@
struct hash_key *_key = (struct hash_key *)entry->key;
_mesa_hash_table_remove(ht->table, entry);
- free(_key);
+ FREE(_key);
}
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/log.c
^
|
@@ -94,7 +94,7 @@
mesa_log_file = stderr;
#if !DETECT_OS_WINDOWS
- if (geteuid() == getuid()) {
+ if (__normal_user()) {
const char *log_file = os_get_option("MESA_LOG_FILE");
if (log_file) {
FILE *fp = fopen(log_file, "w");
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/perf/u_trace.c
^
|
@@ -385,7 +385,7 @@
u_trace_state.enabled_traces =
debug_get_flags_option("MESA_GPU_TRACES", config_control, 0);
const char *tracefile_name = debug_get_option_trace_file();
- if (tracefile_name && !__check_suid()) {
+ if (tracefile_name && __normal_user()) {
u_trace_state.trace_file = fopen(tracefile_name, "w");
if (u_trace_state.trace_file != NULL) {
atexit(trace_file_fini);
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/tests/half_float_test.cpp
^
|
@@ -46,18 +46,35 @@
}
#endif
-/* Sanity test our test values */
-TEST(half_to_float_test, nan_test)
+/* The sign of the bit for signaling is different on some old processors
+ * (PA-RISC, old MIPS without IEEE-754-2008 support).
+ *
+ * Disable the tests on those platforms, because it's not clear how to
+ * correctly handle NaNs when the CPU and GPU differ in their convention.
+ */
+#if DETECT_ARCH_HPPA || ((DETECT_ARCH_MIPS || DETECT_ARCH_MIPS64) && !defined __mips_nan2008)
+#define IEEE754_2008_NAN 0
+#else
+#define IEEE754_2008_NAN 1
+#endif
+
+/* Sanity test our inf test values */
+TEST(half_to_float_test, inf_test)
{
EXPECT_TRUE(isinf(TEST_POS_INF));
EXPECT_TRUE(isinf(TEST_NEG_INF));
+}
+/* Make sure that our 32-bit float nan test value we're using is a
+ * non-signaling NaN.
+ */
+#if IEEE754_2008_NAN
+TEST(half_to_float_test, nan_test)
+#else
+TEST(half_to_float_test, DISABLED_nan_test)
+#endif
+{
EXPECT_TRUE(isnan(TEST_NAN));
- /* Make sure that our 32-bit float nan test value we're using is a
- * non-signaling NaN. The sign of the bit for signaling was apparently
- * different on some old processors (PA-RISC, MIPS?). This test value should
- * cover Intel, ARM, and PPC, for sure.
- */
EXPECT_FALSE(issignaling(TEST_NAN));
}
@@ -82,12 +99,20 @@
}
/* Test the optionally HW instruction-using path. */
+#if IEEE754_2008_NAN
TEST(half_to_float_test, half_to_float_test)
+#else
+TEST(half_to_float_test, DISABLED_half_to_float_test)
+#endif
{
test_half_to_float_limits(_mesa_half_to_float);
}
+#if IEEE754_2008_NAN
TEST(half_to_float_test, half_to_float_slow_test)
+#else
+TEST(half_to_float_test, DISABLED_half_to_float_slow_test)
+#endif
{
test_half_to_float_limits(_mesa_half_to_float_slow);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/util/u_debug.h
^
|
@@ -39,6 +39,7 @@
#define U_DEBUG_H_
#include <stdarg.h>
+#include <stdlib.h>
#include <string.h>
#if !defined(_WIN32)
#include <sys/types.h>
@@ -394,15 +395,22 @@
}
static inline bool
-__check_suid(void)
+__normal_user(void)
{
-#if !defined(_WIN32)
- if (geteuid() != getuid())
- return true;
+#if defined(_WIN32)
+ return true;
+#else
+ return geteuid() == getuid() && getegid() == getgid();
#endif
- return false;
}
+#ifndef HAVE_SECURE_GETENV
+static inline char *secure_getenv(const char *name)
+{
+ return getenv(name);
+}
+#endif
+
#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
static bool \
debug_get_option_ ## sufix (void) \
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/virtio/virtio-gpu/virgl_hw.h
^
|
@@ -586,6 +586,7 @@
#define VIRGL_CAP_V2_DRAW_PARAMETERS (1 << 14)
#define VIRGL_CAP_V2_GROUP_VOTE (1 << 15)
#define VIRGL_CAP_V2_MIRROR_CLAMP_TO_EDGE (1 << 16)
+#define VIRGL_CAP_V2_MIRROR_CLAMP (1 << 17)
/* virgl bind flags - these are compatible with mesa 10.5 gallium.
* but are fixed, no other should be passed to virgl either.
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/virtio/vulkan/vn_pipeline.c
^
|
@@ -320,7 +320,7 @@
}
}
- layout->has_push_constant_ranges = pCreateInfo->pPushConstantRanges > 0;
+ layout->has_push_constant_ranges = pCreateInfo->pushConstantRangeCount > 0;
VkPipelineLayout layout_handle = vn_pipeline_layout_to_handle(layout);
vn_async_vkCreatePipelineLayout(dev->instance, device, pCreateInfo, NULL,
@@ -1119,6 +1119,31 @@
state->gpl.fragment_output = true;
}
+ /* After direct_gpl states collection, check the final state to validate
+ * VkPipelineLayout in case of being the final layout in linked pipeline.
+ *
+ * From the Vulkan 1.3.275 spec:
+ * VUID-VkGraphicsPipelineCreateInfo-layout-06602
+ *
+ * If the pipeline requires fragment shader state or pre-rasterization
+ * shader state, layout must be a valid VkPipelineLayout handle
+ */
+ if ((state->gpl.fragment_shader && !is_raster_statically_disabled) ||
+ state->gpl.pre_raster_shaders)
+ valid.pipeline_layout = true;
+
+ /* Pipeline Derivatives
+ *
+ * VUID-VkGraphicsPipelineCreateInfo-flags-07984
+ *
+ * If flags contains the VK_PIPELINE_CREATE_DERIVATIVE_BIT flag, and
+ * basePipelineIndex is -1, basePipelineHandle must be a valid graphics
+ * VkPipeline handle
+ */
+ if ((info->flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT) &&
+ info->basePipelineIndex == -1)
+ valid.base_pipeline_handle = true;
+
*out_fix_desc = (struct vn_graphics_pipeline_fix_desc) {
.erase = {
/* clang-format off
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/meson.build
^
|
@@ -92,6 +92,7 @@
idep_vulkan_wsi_defines = declare_dependency(
compile_args : vulkan_wsi_list,
+ dependencies : vulkan_wsi_deps,
)
vulkan_wsi_deps += idep_vulkan_wsi_defines
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/runtime/vk_graphics_state.c
^
|
@@ -2338,7 +2338,7 @@
VK_FROM_HANDLE(vk_command_buffer, cmd, commandBuffer);
struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state;
- SET_DYN_VALUE(dyn, RS_PROVOKING_VERTEX,
+ SET_DYN_VALUE(dyn, RS_RASTERIZATION_STREAM,
rs.rasterization_stream, rasterizationStream);
}
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/runtime/vk_instance.c
^
|
@@ -199,7 +199,7 @@
instance->trace_mode = parse_debug_string(getenv("MESA_VK_TRACE"), trace_options);
instance->trace_frame = (uint32_t)debug_get_num_option("MESA_VK_TRACE_FRAME", 0xFFFFFFFF);
- instance->trace_trigger_file = getenv("MESA_VK_TRACE_TRIGGER");
+ instance->trace_trigger_file = secure_getenv("MESA_VK_TRACE_TRIGGER");
glsl_type_singleton_init_or_ref();
|
[-]
[+]
|
Changed |
_service:tar_git:mesa-23.3.5+git1.tar.bz2/mesa/src/vulkan/wsi/wsi_common_wayland.c
^
|
@@ -2215,6 +2215,8 @@
pthread_mutex_destroy(&chain->present_ids.lock);
}
+ vk_free(pAllocator, (void *)chain->drm_modifiers);
+
wsi_swapchain_finish(&chain->base);
}
@@ -2260,7 +2262,8 @@
*/
if (wsi_wl_surface->chain &&
wsi_swapchain_to_handle(&wsi_wl_surface->chain->base) != pCreateInfo->oldSwapchain) {
- return VK_ERROR_NATIVE_WINDOW_IN_USE_KHR;
+ result = VK_ERROR_NATIVE_WINDOW_IN_USE_KHR;
+ goto fail;
}
if (pCreateInfo->oldSwapchain) {
VK_FROM_HANDLE(wsi_wl_swapchain, old_chain, pCreateInfo->oldSwapchain);
@@ -2370,11 +2373,24 @@
chain->shm_format = wl_shm_format_for_vk_format(chain->vk_format, alpha);
}
chain->num_drm_modifiers = num_drm_modifiers;
- chain->drm_modifiers = drm_modifiers;
+ if (num_drm_modifiers) {
+ uint64_t *drm_modifiers_copy =
+ vk_alloc(pAllocator, sizeof(*drm_modifiers) * num_drm_modifiers, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!drm_modifiers_copy) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail_free_wl_chain;
+ }
+
+ typed_memcpy(drm_modifiers_copy, drm_modifiers, num_drm_modifiers);
+ chain->drm_modifiers = drm_modifiers_copy;
+ }
if (chain->wsi_wl_surface->display->wp_presentation_notwrapped) {
- if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced))
- goto fail;
+ if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced)) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail_free_wl_chain;
+ }
pthread_mutex_init(&chain->present_ids.lock, NULL);
wl_list_init(&chain->present_ids.outstanding_list);
@@ -2392,7 +2408,7 @@
result = wsi_wl_image_init(chain, &chain->images[i],
pCreateInfo, pAllocator);
if (result != VK_SUCCESS)
- goto fail_image_init;
+ goto fail_free_wl_images;
chain->images[i].busy = false;
}
@@ -2400,14 +2416,15 @@
return VK_SUCCESS;
-fail_image_init:
+fail_free_wl_images:
wsi_wl_swapchain_images_free(chain);
-
+fail_free_wl_chain:
wsi_wl_swapchain_chain_free(chain, pAllocator);
fail:
vk_free(pAllocator, chain);
wsi_wl_surface->chain = NULL;
+ assert(result != VK_SUCCESS);
return result;
}
|