From 2b31de32388eb61aedcec24292539826caba0369 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 18 Jan 2021 00:31:15 +0100 Subject: [PATCH] early-access version 1335 --- README.md | 2 +- src/audio_core/voice_context.h | 36 ++--- src/common/uuid.h | 4 +- src/core/CMakeLists.txt | 1 + src/core/file_sys/savedata_factory.h | 4 +- src/core/hle/ipc.h | 4 +- src/core/hle/service/acc/acc.cpp | 2 +- src/core/hle/service/acc/profile_manager.cpp | 10 +- src/core/hle/service/acc/profile_manager.h | 18 +-- src/core/hle/service/audio/audout_u.cpp | 2 +- src/core/hle/service/hid/controllers/npad.cpp | 6 +- src/core/hle/service/hid/controllers/npad.h | 23 ++- src/core/hle/service/hid/hid.cpp | 148 +++++++++--------- src/core/hle/service/mii/manager.cpp | 1 + src/core/hle/service/mii/manager.h | 106 ++++++------- src/core/hle/service/time/clock_types.h | 26 +-- src/core/hle/service/time/time_zone_types.h | 22 +-- src/video_core/buffer_cache/buffer_cache.h | 45 ++++-- .../renderer_opengl/gl_texture_cache.cpp | 21 ++- .../renderer_opengl/gl_texture_cache.h | 11 +- .../renderer_opengl/util_shaders.cpp | 18 +-- src/video_core/renderer_opengl/util_shaders.h | 6 +- .../renderer_vulkan/vk_buffer_cache.cpp | 17 +- .../renderer_vulkan/vk_buffer_cache.h | 7 + .../renderer_vulkan/vk_compute_pass.cpp | 61 +++----- .../renderer_vulkan/vk_compute_pass.h | 9 +- .../vk_staging_buffer_pool.cpp | 143 ++++++++++++++++- .../renderer_vulkan/vk_staging_buffer_pool.h | 20 +++ .../renderer_vulkan/vk_texture_cache.cpp | 14 +- .../renderer_vulkan/vk_texture_cache.h | 9 +- src/video_core/texture_cache/texture_cache.h | 38 ++--- .../vulkan_common/vulkan_memory_allocator.h | 5 +- .../vulkan_common/vulkan_wrapper.cpp | 20 ++- src/video_core/vulkan_common/vulkan_wrapper.h | 5 +- src/yuzu/applets/profile_select.cpp | 2 +- .../configure_profile_manager.cpp | 6 +- 36 files changed, 531 insertions(+), 341 deletions(-) diff --git a/README.md b/README.md index 8f1eb159b..cf004b4b8 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1334. +This is the source code for early-access 1335. ## Legal Notice diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h index 863248761..70359cadb 100755 --- a/src/audio_core/voice_context.h +++ b/src/audio_core/voice_context.h @@ -86,28 +86,28 @@ struct BehaviorFlags { static_assert(sizeof(BehaviorFlags) == 0x4, "BehaviorFlags is an invalid size"); struct ADPCMContext { - u16 header{}; - s16 yn1{}; - s16 yn2{}; + u16 header; + s16 yn1; + s16 yn2; }; static_assert(sizeof(ADPCMContext) == 0x6, "ADPCMContext is an invalid size"); struct VoiceState { - s64 played_sample_count{}; - s32 offset{}; - s32 wave_buffer_index{}; - std::array is_wave_buffer_valid{}; - s32 wave_buffer_consumed{}; - std::array sample_history{}; - s32 fraction{}; - VAddr context_address{}; - Codec::ADPCM_Coeff coeff{}; - ADPCMContext context{}; - std::array biquad_filter_state{}; - std::array previous_samples{}; - u32 external_context_size{}; - bool is_external_context_used{}; - bool voice_dropped{}; + s64 played_sample_count; + s32 offset; + s32 wave_buffer_index; + std::array is_wave_buffer_valid; + s32 wave_buffer_consumed; + std::array sample_history; + s32 fraction; + VAddr context_address; + Codec::ADPCM_Coeff coeff; + ADPCMContext context; + std::array biquad_filter_state; + std::array previous_samples; + u32 external_context_size; + bool is_external_context_used; + bool voice_dropped; }; class VoiceChannelResource { diff --git a/src/common/uuid.h b/src/common/uuid.h index 4ab9a25f0..2e7a18405 100755 --- a/src/common/uuid.h +++ b/src/common/uuid.h @@ -14,8 +14,8 @@ constexpr u128 INVALID_UUID{{0, 0}}; struct UUID { // UUIDs which are 0 are considered invalid! - u128 uuid = INVALID_UUID; - constexpr UUID() = default; + u128 uuid; + UUID() = default; constexpr explicit UUID(const u128& id) : uuid{id} {} constexpr explicit UUID(const u64 lo, const u64 hi) : uuid{{lo, hi}} {} diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 3906aaf9b..02439b6bb 100755 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -644,6 +644,7 @@ else() -Werror=implicit-fallthrough -Werror=sign-compare + $<$:-Werror=class-memaccess> $<$:-Werror=unused-but-set-parameter> $<$:-Werror=unused-but-set-variable> diff --git a/src/core/file_sys/savedata_factory.h b/src/core/file_sys/savedata_factory.h index 17f774baa..86c9f5350 100755 --- a/src/core/file_sys/savedata_factory.h +++ b/src/core/file_sys/savedata_factory.h @@ -58,7 +58,7 @@ struct SaveDataAttribute { SaveDataType type; SaveDataRank rank; u16 index; - INSERT_PADDING_BYTES(4); + INSERT_PADDING_BYTES_NOINIT(4); u64 zero_1; u64 zero_2; u64 zero_3; @@ -72,7 +72,7 @@ struct SaveDataExtraData { u64 owner_id; s64 timestamp; SaveDataFlags flags; - INSERT_PADDING_BYTES(4); + INSERT_PADDING_BYTES_NOINIT(4); s64 available_size; s64 journal_size; s64 commit_id; diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h index 79bcf5762..55b1716e4 100755 --- a/src/core/hle/ipc.h +++ b/src/core/hle/ipc.h @@ -146,7 +146,7 @@ static_assert(sizeof(BufferDescriptorC) == 8, "BufferDescriptorC size is incorre struct DataPayloadHeader { u32_le magic; - INSERT_PADDING_WORDS(1); + INSERT_PADDING_WORDS_NOINIT(1); }; static_assert(sizeof(DataPayloadHeader) == 8, "DataPayloadHeader size is incorrect"); @@ -174,7 +174,7 @@ struct DomainMessageHeader { INSERT_PADDING_WORDS_NOINIT(2); }; - std::array raw{}; + std::array raw; }; }; static_assert(sizeof(DomainMessageHeader) == 16, "DomainMessageHeader size is incorrect"); diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 6981f8ee7..bb07f6ccc 100755 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -534,7 +534,7 @@ private: rb.Push(RESULT_SUCCESS); } - Common::UUID user_id; + Common::UUID user_id{Common::INVALID_UUID}; }; // 6.0.0+ diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp index 9b829e957..d9865d56f 100755 --- a/src/core/hle/service/acc/profile_manager.cpp +++ b/src/core/hle/service/acc/profile_manager.cpp @@ -227,17 +227,17 @@ void ProfileManager::CloseUser(UUID uuid) { /// Gets all valid user ids on the system UserIDArray ProfileManager::GetAllUsers() const { - UserIDArray output; - std::transform(profiles.begin(), profiles.end(), output.begin(), - [](const ProfileInfo& p) { return p.user_uuid; }); + UserIDArray output{}; + std::ranges::transform(profiles, output.begin(), + [](const ProfileInfo& p) { return p.user_uuid; }); return output; } /// Get all the open users on the system and zero out the rest of the data. This is specifically /// needed for GetOpenUsers and we need to ensure the rest of the output buffer is zero'd out UserIDArray ProfileManager::GetOpenUsers() const { - UserIDArray output; - std::transform(profiles.begin(), profiles.end(), output.begin(), [](const ProfileInfo& p) { + UserIDArray output{}; + std::ranges::transform(profiles, output.begin(), [](const ProfileInfo& p) { if (p.is_open) return p.user_uuid; return UUID{Common::INVALID_UUID}; diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h index 5310637a6..71b9d5518 100755 --- a/src/core/hle/service/acc/profile_manager.h +++ b/src/core/hle/service/acc/profile_manager.h @@ -23,12 +23,12 @@ using UserIDArray = std::array; /// Contains extra data related to a user. /// TODO: RE this structure struct ProfileData { - INSERT_PADDING_WORDS(1); - u32 icon_id{}; - u8 bg_color_id{}; - INSERT_PADDING_BYTES(0x7); - INSERT_PADDING_BYTES(0x10); - INSERT_PADDING_BYTES(0x60); + INSERT_PADDING_WORDS_NOINIT(1); + u32 icon_id; + u8 bg_color_id; + INSERT_PADDING_BYTES_NOINIT(0x7); + INSERT_PADDING_BYTES_NOINIT(0x10); + INSERT_PADDING_BYTES_NOINIT(0x60); }; static_assert(sizeof(ProfileData) == 0x80, "ProfileData structure has incorrect size"); @@ -43,9 +43,9 @@ struct ProfileInfo { }; struct ProfileBase { - Common::UUID user_uuid{Common::INVALID_UUID}; - u64_le timestamp{}; - ProfileUsername username{}; + Common::UUID user_uuid; + u64_le timestamp; + ProfileUsername username; // Zero out all the fields to make the profile slot considered "Empty" void Invalidate() { diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 0cd797109..02ca711fb 100755 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -29,7 +29,7 @@ constexpr int DefaultSampleRate{48000}; struct AudoutParams { s32_le sample_rate; u16_le channel_count; - INSERT_PADDING_BYTES(2); + INSERT_PADDING_BYTES_NOINIT(2); }; static_assert(sizeof(AudoutParams) == 0x8, "AudoutParams is an invalid size"); diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index d280e7caf..1082be489 100755 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -141,7 +141,9 @@ bool Controller_NPad::IsDeviceHandleValid(const DeviceHandle& device_handle) { device_handle.device_index < DeviceIndex::MaxDeviceIndex; } -Controller_NPad::Controller_NPad(Core::System& system) : ControllerBase(system), system(system) {} +Controller_NPad::Controller_NPad(Core::System& system) : ControllerBase(system), system(system) { + latest_vibration_values.fill({DEFAULT_VIBRATION_VALUE, DEFAULT_VIBRATION_VALUE}); +} Controller_NPad::~Controller_NPad() { OnRelease(); @@ -732,7 +734,7 @@ bool Controller_NPad::VibrateControllerAtIndex(std::size_t npad_index, std::size // Send an empty vibration to stop any vibrations. vibrations[npad_index][device_index]->SetRumblePlay(0.0f, 160.0f, 0.0f, 320.0f); // Then reset the vibration value to its default value. - latest_vibration_values[npad_index][device_index] = {}; + latest_vibration_values[npad_index][device_index] = DEFAULT_VIBRATION_VALUE; } return false; diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index e2e826623..bc85ca4df 100755 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -97,10 +97,10 @@ public: }; struct DeviceHandle { - NpadType npad_type{}; - u8 npad_id{}; - DeviceIndex device_index{}; - INSERT_PADDING_BYTES(1); + NpadType npad_type; + u8 npad_id; + DeviceIndex device_index; + INSERT_PADDING_BYTES_NOINIT(1); }; static_assert(sizeof(DeviceHandle) == 4, "DeviceHandle is an invalid size"); @@ -120,13 +120,20 @@ public: static_assert(sizeof(NpadStyleSet) == 4, "NpadStyleSet is an invalid size"); struct VibrationValue { - f32 amp_low{0.0f}; - f32 freq_low{160.0f}; - f32 amp_high{0.0f}; - f32 freq_high{320.0f}; + f32 amp_low; + f32 freq_low; + f32 amp_high; + f32 freq_high; }; static_assert(sizeof(VibrationValue) == 0x10, "Vibration is an invalid size"); + static constexpr VibrationValue DEFAULT_VIBRATION_VALUE{ + .amp_low = 0.0f, + .freq_low = 160.0f, + .amp_high = 0.0f, + .freq_high = 320.0f, + }; + struct LedPattern { explicit LedPattern(u64 light1, u64 light2, u64 light3, u64 light4) { position1.Assign(light1); diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 8d95f74e6..2b13d6fe6 100755 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -401,9 +401,9 @@ void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - u32 basic_xpad_id{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + u32 basic_xpad_id; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -431,9 +431,9 @@ void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) { void Hid::ActivateSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -452,9 +452,9 @@ void Hid::ActivateSixAxisSensor(Kernel::HLERequestContext& ctx) { void Hid::DeactivateSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -473,9 +473,9 @@ void Hid::DeactivateSixAxisSensor(Kernel::HLERequestContext& ctx) { void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -494,9 +494,9 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) { void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -515,10 +515,10 @@ void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { void Hid::EnableSixAxisSensorFusion(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - bool enable_sixaxis_sensor_fusion{}; - INSERT_PADDING_BYTES(3); - Controller_NPad::DeviceHandle sixaxis_handle{}; - u64 applet_resource_user_id{}; + bool enable_sixaxis_sensor_fusion; + INSERT_PADDING_BYTES_NOINIT(3); + Controller_NPad::DeviceHandle sixaxis_handle; + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -556,9 +556,9 @@ void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -577,9 +577,9 @@ void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -599,9 +599,9 @@ void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { void Hid::IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -620,9 +620,9 @@ void Hid::IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx) { void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - u32 unknown{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + u32 unknown; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -702,10 +702,10 @@ void Hid::DeactivateNpad(Kernel::HLERequestContext& ctx) { void Hid::AcquireNpadStyleSetUpdateEventHandle(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - u32 npad_id{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; - u64 unknown{}; + u32 npad_id; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; + u64 unknown; }; const auto parameters{rp.PopRaw()}; @@ -722,9 +722,9 @@ void Hid::AcquireNpadStyleSetUpdateEventHandle(Kernel::HLERequestContext& ctx) { void Hid::DisconnectNpad(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - u32 npad_id{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + u32 npad_id; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -756,9 +756,9 @@ void Hid::ActivateNpadWithRevision(Kernel::HLERequestContext& ctx) { // Should have no effect with how our npad sets up the data IPC::RequestParser rp{ctx}; struct Parameters { - u32 unknown{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + u32 unknown; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -800,9 +800,9 @@ void Hid::GetNpadJoyHoldType(Kernel::HLERequestContext& ctx) { void Hid::SetNpadJoyAssignmentModeSingleByDefault(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - u32 npad_id{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + u32 npad_id; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -821,10 +821,10 @@ void Hid::SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx) { // TODO: Check the differences between this and SetNpadJoyAssignmentModeSingleByDefault IPC::RequestParser rp{ctx}; struct Parameters { - u32 npad_id{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; - u64 npad_joy_device_type{}; + u32 npad_id; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; + u64 npad_joy_device_type; }; const auto parameters{rp.PopRaw()}; @@ -844,9 +844,9 @@ void Hid::SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx) { void Hid::SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - u32 npad_id{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + u32 npad_id; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -952,9 +952,9 @@ void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) { void Hid::IsUnintendedHomeButtonInputProtectionEnabled(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - u32 npad_id{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + u32 npad_id; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -971,10 +971,10 @@ void Hid::IsUnintendedHomeButtonInputProtectionEnabled(Kernel::HLERequestContext void Hid::EnableUnintendedHomeButtonInputProtection(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - bool unintended_home_button_input_protection{}; - INSERT_PADDING_BYTES(3); - u32 npad_id{}; - u64 applet_resource_user_id{}; + bool unintended_home_button_input_protection; + INSERT_PADDING_BYTES_NOINIT(3); + u32 npad_id; + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -1026,10 +1026,10 @@ void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) { void Hid::SendVibrationValue(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle vibration_device_handle{}; - Controller_NPad::VibrationValue vibration_value{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle vibration_device_handle; + Controller_NPad::VibrationValue vibration_value; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -1050,9 +1050,9 @@ void Hid::SendVibrationValue(Kernel::HLERequestContext& ctx) { void Hid::GetActualVibrationValue(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle vibration_device_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle vibration_device_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -1147,9 +1147,9 @@ void Hid::EndPermitVibrationSession(Kernel::HLERequestContext& ctx) { void Hid::IsVibrationDeviceMounted(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle vibration_device_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle vibration_device_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -1180,9 +1180,9 @@ void Hid::ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; @@ -1200,9 +1200,9 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { - Controller_NPad::DeviceHandle sixaxis_handle{}; - INSERT_PADDING_WORDS(1); - u64 applet_resource_user_id{}; + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; }; const auto parameters{rp.PopRaw()}; diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp index d73b90015..567a4e345 100755 --- a/src/core/hle/service/mii/manager.cpp +++ b/src/core/hle/service/mii/manager.cpp @@ -100,6 +100,7 @@ MiiInfo ConvertStoreDataToInfo(const MiiStoreData& data) { .mole_scale = static_cast(bf.mole_scale.Value()), .mole_x = static_cast(bf.mole_x.Value()), .mole_y = static_cast(bf.mole_y.Value()), + .padding = 0, }; } diff --git a/src/core/hle/service/mii/manager.h b/src/core/hle/service/mii/manager.h index 927451dea..32c27ee65 100755 --- a/src/core/hle/service/mii/manager.h +++ b/src/core/hle/service/mii/manager.h @@ -27,58 +27,58 @@ enum class SourceFlag : u32 { DECLARE_ENUM_FLAG_OPERATORS(SourceFlag); struct MiiInfo { - Common::UUID uuid{Common::INVALID_UUID}; - std::array name{}; - u8 font_region{}; - u8 favorite_color{}; - u8 gender{}; - u8 height{}; - u8 build{}; - u8 type{}; - u8 region_move{}; - u8 faceline_type{}; - u8 faceline_color{}; - u8 faceline_wrinkle{}; - u8 faceline_make{}; - u8 hair_type{}; - u8 hair_color{}; - u8 hair_flip{}; - u8 eye_type{}; - u8 eye_color{}; - u8 eye_scale{}; - u8 eye_aspect{}; - u8 eye_rotate{}; - u8 eye_x{}; - u8 eye_y{}; - u8 eyebrow_type{}; - u8 eyebrow_color{}; - u8 eyebrow_scale{}; - u8 eyebrow_aspect{}; - u8 eyebrow_rotate{}; - u8 eyebrow_x{}; - u8 eyebrow_y{}; - u8 nose_type{}; - u8 nose_scale{}; - u8 nose_y{}; - u8 mouth_type{}; - u8 mouth_color{}; - u8 mouth_scale{}; - u8 mouth_aspect{}; - u8 mouth_y{}; - u8 beard_color{}; - u8 beard_type{}; - u8 mustache_type{}; - u8 mustache_scale{}; - u8 mustache_y{}; - u8 glasses_type{}; - u8 glasses_color{}; - u8 glasses_scale{}; - u8 glasses_y{}; - u8 mole_type{}; - u8 mole_scale{}; - u8 mole_x{}; - u8 mole_y{}; - INSERT_PADDING_BYTES(1); + Common::UUID uuid; + std::array name; + u8 font_region; + u8 favorite_color; + u8 gender; + u8 height; + u8 build; + u8 type; + u8 region_move; + u8 faceline_type; + u8 faceline_color; + u8 faceline_wrinkle; + u8 faceline_make; + u8 hair_type; + u8 hair_color; + u8 hair_flip; + u8 eye_type; + u8 eye_color; + u8 eye_scale; + u8 eye_aspect; + u8 eye_rotate; + u8 eye_x; + u8 eye_y; + u8 eyebrow_type; + u8 eyebrow_color; + u8 eyebrow_scale; + u8 eyebrow_aspect; + u8 eyebrow_rotate; + u8 eyebrow_x; + u8 eyebrow_y; + u8 nose_type; + u8 nose_scale; + u8 nose_y; + u8 mouth_type; + u8 mouth_color; + u8 mouth_scale; + u8 mouth_aspect; + u8 mouth_y; + u8 beard_color; + u8 beard_type; + u8 mustache_type; + u8 mustache_scale; + u8 mustache_y; + u8 glasses_type; + u8 glasses_color; + u8 glasses_scale; + u8 glasses_y; + u8 mole_type; + u8 mole_scale; + u8 mole_x; + u8 mole_y; + u8 padding; std::u16string Name() const; }; @@ -324,7 +324,7 @@ public: ResultCode GetIndex(const MiiInfo& info, u32& index); private: - const Common::UUID user_id; + const Common::UUID user_id{Common::INVALID_UUID}; u64 update_counter{}; }; diff --git a/src/core/hle/service/time/clock_types.h b/src/core/hle/service/time/clock_types.h index 72e1921ec..b78892223 100755 --- a/src/core/hle/service/time/clock_types.h +++ b/src/core/hle/service/time/clock_types.h @@ -73,19 +73,19 @@ struct TimeSpanType { static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size"); struct ClockSnapshot { - SystemClockContext user_context{}; - SystemClockContext network_context{}; - s64 user_time{}; - s64 network_time{}; - TimeZone::CalendarTime user_calendar_time{}; - TimeZone::CalendarTime network_calendar_time{}; - TimeZone::CalendarAdditionalInfo user_calendar_additional_time{}; - TimeZone::CalendarAdditionalInfo network_calendar_additional_time{}; - SteadyClockTimePoint steady_clock_time_point{}; - TimeZone::LocationName location_name{}; - u8 is_automatic_correction_enabled{}; - u8 type{}; - INSERT_PADDING_BYTES(0x2); + SystemClockContext user_context; + SystemClockContext network_context; + s64 user_time; + s64 network_time; + TimeZone::CalendarTime user_calendar_time; + TimeZone::CalendarTime network_calendar_time; + TimeZone::CalendarAdditionalInfo user_calendar_additional_time; + TimeZone::CalendarAdditionalInfo network_calendar_additional_time; + SteadyClockTimePoint steady_clock_time_point; + TimeZone::LocationName location_name; + u8 is_automatic_correction_enabled; + u8 type; + INSERT_PADDING_BYTES_NOINIT(0x2); static ResultCode GetCurrentTime(s64& current_time, const SteadyClockTimePoint& steady_clock_time_point, diff --git a/src/core/hle/service/time/time_zone_types.h b/src/core/hle/service/time/time_zone_types.h index 9be15b53e..4a57e036d 100755 --- a/src/core/hle/service/time/time_zone_types.h +++ b/src/core/hle/service/time/time_zone_types.h @@ -45,23 +45,23 @@ static_assert(sizeof(TimeZoneRule) == 0x4000, "TimeZoneRule is incorrect size"); /// https://switchbrew.org/wiki/Glue_services#CalendarAdditionalInfo struct CalendarAdditionalInfo { - u32 day_of_week{}; - u32 day_of_year{}; + u32 day_of_week; + u32 day_of_year; std::array timezone_name; - u32 is_dst{}; - s32 gmt_offset{}; + u32 is_dst; + s32 gmt_offset; }; static_assert(sizeof(CalendarAdditionalInfo) == 0x18, "CalendarAdditionalInfo is incorrect size"); /// https://switchbrew.org/wiki/Glue_services#CalendarTime struct CalendarTime { - s16 year{}; - s8 month{}; - s8 day{}; - s8 hour{}; - s8 minute{}; - s8 second{}; - INSERT_PADDING_BYTES(1); + s16 year; + s8 month; + s8 day; + s8 hour; + s8 minute; + s8 second; + INSERT_PADDING_BYTES_NOINIT(1); }; static_assert(sizeof(CalendarTime) == 0x8, "CalendarTime is incorrect size"); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e4f3c8e35..bd8507610 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -239,8 +239,7 @@ private: void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, std::span copies); - void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, - std::span copies); + void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span copies); void DeleteBuffer(BufferId buffer_id); @@ -362,11 +361,17 @@ void BufferCache

::DownloadMemory(VAddr cpu_addr, u64 size) { auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); const u8* const mapped_memory = download_staging.mapped_span.data(); const std::span copies_span(copies.data(), copies.data() + copies.size()); + for (BufferCopy& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dst_offset += download_staging.offset; + } runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); runtime.Finish(); for (const BufferCopy& copy : copies) { const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; - const u8* copy_mapped_memory = mapped_memory + copy.dst_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* copy_mapped_memory = mapped_memory + dst_offset; cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); } } else { @@ -554,7 +559,9 @@ void BufferCache

::PopAsyncFlushes() { } if constexpr (USE_MEMORY_MAPS) { auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); - for (const auto [copy, buffer_id] : downloads) { + for (auto& [copy, buffer_id] : downloads) { + // Have in mind the staging buffer offset for the copy + copy.dst_offset += download_staging.offset; const std::array copies{copy}; runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); } @@ -562,7 +569,9 @@ void BufferCache

::PopAsyncFlushes() { for (const auto [copy, buffer_id] : downloads) { const Buffer& buffer = slot_buffers[buffer_id]; const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; - const u8* read_mapped_memory = download_staging.mapped_span.data() + copy.dst_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); } } else { @@ -651,24 +660,25 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = binding.size; Buffer& buffer = slot_buffers[binding.buffer_id]; - if constexpr (IS_OPENGL) { - if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { + if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { + if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia if (!HasFastUniformBufferBound(stage, binding_index)) { // We only have to bind when the currently bound buffer is not the fast version - fast_bound_uniform_buffers[stage] |= 1U << binding_index; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); runtime.PushFastUniformBuffer(stage, binding_index, span); - } else { - // Stream buffer path to avoid stalling on non-Nvidia drivers - const auto span = runtime.BindMappedUniformBuffer(stage, binding_index, size); - cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); + return; } - return; } + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + + // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan + const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); + cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); + return; } // Classic cached path SynchronizeBuffer(buffer, cpu_addr, size); @@ -1117,13 +1127,16 @@ void BufferCache

::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, template void BufferCache

::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, - std::span copies) { + std::span copies) { auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); const std::span staging_pointer = upload_staging.mapped_span; - for (const BufferCopy& copy : copies) { - const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; + for (BufferCopy& copy : copies) { u8* const src_pointer = staging_pointer.data() + copy.src_offset; + const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); + + // Apply the staging offset + copy.src_offset += upload_staging.offset; } runtime.CopyBuffer(buffer, upload_staging.buffer, copies); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 37572ab28..31eb54123 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -550,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, } void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, - size_t buffer_offset, std::span swizzles) { switch (image.info.type) { case ImageType::e2D: - return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); + return util_shaders.BlockLinearUpload2D(image, map, swizzles); case ImageType::e3D: - return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); + return util_shaders.BlockLinearUpload3D(image, map, swizzles); case ImageType::Linear: - return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); + return util_shaders.PitchUpload(image, map, swizzles); default: UNREACHABLE(); break; @@ -710,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, } } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +void Image::UploadMemory(const ImageBufferMap& map, std::span copies) { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); - glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); @@ -729,19 +728,19 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, current_image_height = copy.buffer_image_height; glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); } - CopyBufferToImage(copy, buffer_offset); + CopyBufferToImage(copy, map.offset); } } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +void Image::UploadMemory(const ImageBufferMap& map, std::span copies) { for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset, + glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, copy.dst_offset, copy.size); } } -void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, +void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -760,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, current_image_height = copy.buffer_image_height; glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); } - CopyImageToBuffer(copy, buffer_offset); + CopyImageToBuffer(copy, map.offset); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 60d08d6d6..874cf54f4 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -35,6 +35,7 @@ struct ImageBufferMap { ~ImageBufferMap(); std::span mapped_span; + size_t offset = 0; OGLSync* sync; GLuint buffer; }; @@ -78,7 +79,7 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void AccelerateImageUpload(Image& image, const ImageBufferMap& map, std::span swizzles); void InsertUploadMemoryBarrier(); @@ -137,14 +138,12 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + void UploadMemory(const ImageBufferMap& map, std::span copies); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, - std::span copies); + void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, - std::span copies); + void DownloadMemory(ImageBufferMap& map, std::span copies); GLuint Handle() const noexcept { return texture.handle; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index aeb36551c..1b58e8617 100755 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) UtilShaders::~UtilShaders() = default; -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, std::span swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); - glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); @@ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, std::span swizzles) { static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); @@ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s program_manager.RestoreGuestCompute(); } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, std::span swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_INPUT_BUFFER = 0; @@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu "Non-power of two images are not implemented"); program_manager.BindHostCompute(pitch_unswizzle_program.handle); - glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); @@ -167,7 +167,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index bec026bc3..7b1d16b09 100755 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -24,13 +24,13 @@ public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, std::span swizzles); - void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, std::span swizzles); - void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void PitchUpload(Image& image, const ImageBufferMap& map, std::span swizzles); void CopyBC4(Image& dst_image, Image& src_image, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 48fc5d966..4561c72e4 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -138,17 +138,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 base_vertex, u32 num_indices, VkBuffer buffer, u32 offset, [[maybe_unused]] u32 size) { - VkIndexType index_type = MaxwellToVK::IndexFormat(index_format); + VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); + VkDeviceSize vk_offset = offset; if (topology == PrimitiveTopology::Quads) { - index_type = VK_INDEX_TYPE_UINT32; - std::tie(buffer, offset) = + vk_index_type = VK_INDEX_TYPE_UINT32; + std::tie(buffer, vk_offset) = quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); - } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { - index_type = VK_INDEX_TYPE_UINT16; - std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset); + } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { + vk_index_type = VK_INDEX_TYPE_UINT16; + std::tie(buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); } - scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(buffer, offset, index_type); + scheduler.Record([buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer, vk_offset, vk_index_type); }); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index d232e1f2d..7ff7e0d55 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -69,6 +69,13 @@ public: void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + std::span BindMappedUniformBuffer([[maybe_unused]] size_t stage, + [[maybe_unused]] u32 binding_index, u32 size) { + const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload); + BindBuffer(ref.buffer, static_cast(ref.offset), size); + return ref.mapped_span; + } + void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) { BindBuffer(buffer, offset, size); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a4fdcdf81..2f9a7b028 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,6 +10,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/div_ceil.h" #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -148,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, Uint8Pass::~Uint8Pass() = default; -std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, - u32 src_offset) { +std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, + u32 src_offset) { const u32 staging_size = static_cast(num_vertices * sizeof(u16)); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); + update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { - constexpr u32 dispatch_size = 1024; + static constexpr u32 DISPATCH_SIZE = 1024; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + }; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = static_cast(num_vertices * sizeof(u16)); + cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); }); - return {staging.buffer, 0}; + return {staging.buffer, staging.offset}; } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, @@ -194,7 +190,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, QuadIndexedPass::~QuadIndexedPass() = default; -std::pair QuadIndexedPass::Assemble( +std::pair QuadIndexedPass::Assemble( Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, u32 src_offset) { const u32 index_shift = [index_format] { @@ -217,34 +213,29 @@ std::pair QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); - update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); + update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { - static constexpr u32 dispatch_size = 1024; + static constexpr u32 DISPATCH_SIZE = 1024; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + }; const std::array push_constants = {base_vertex, index_shift}; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); - cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = static_cast(num_tri_vertices * sizeof(u32)); + cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); }); - return {staging.buffer, 0}; + return {staging.buffer, staging.offset}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 4904019f5..17d781d99 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -50,7 +50,8 @@ public: /// Assemble uint8 indices into an uint16 index buffer /// Returns a pair with the staging buffer, and the offset where the assembled data is - std::pair Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset); + std::pair Assemble(u32 num_vertices, VkBuffer src_buffer, + u32 src_offset); private: VKScheduler& scheduler; @@ -66,9 +67,9 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); - std::pair Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, - u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, - u32 src_offset); + std::pair Assemble( + Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, + u32 base_vertex, VkBuffer src_buffer, u32 src_offset); private: VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 97fd41cc1..bc71202e2 100755 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -8,6 +8,7 @@ #include +#include "common/alignment.h" #include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" @@ -17,14 +18,118 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +// Maximum potential alignment of a Vulkan buffer +constexpr VkDeviceSize MAX_ALIGNMENT = 256; +// Maximum size to put elements in the stream buffer +constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; +// Stream buffer size in bytes +constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; + +constexpr VkMemoryPropertyFlags HOST_FLAGS = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; +constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; + +bool IsStreamHeap(VkMemoryHeap heap) noexcept { + return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; +} + +std::optional FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, + VkMemoryPropertyFlags flags) noexcept { + for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { + if (((type_mask >> type_index) & 1) == 0) { + // Memory type is incompatible + continue; + } + const VkMemoryType& memory_type = props.memoryTypes[type_index]; + if ((memory_type.propertyFlags & flags) != flags) { + // Memory type doesn't have the flags we want + continue; + } + if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { + // Memory heap is not suitable for streaming + continue; + } + // Success! + return type_index; + } + return std::nullopt; +} + +u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { + // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this + std::optional type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); + if (type) { + return *type; + } + // Otherwise try without the DEVICE_LOCAL_BIT + type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); + if (type) { + return *type; + } + // This should never happen, and in case it does, signal it as an out of memory situation + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); +} + +size_t Region(size_t iterator) noexcept { + return iterator / REGION_SIZE; +} +} // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_) - : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + const vk::Device& dev = device.GetLogical(); + stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = STREAM_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + if (device.HasDebuggingToolAttached()) { + stream_buffer.SetObjectNameEXT("Stream Buffer"); + } + VkMemoryDedicatedRequirements dedicated_reqs{ + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + .pNext = nullptr, + .prefersDedicatedAllocation = VK_FALSE, + .requiresDedicatedAllocation = VK_FALSE, + }; + const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); + const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || + dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; + const VkMemoryDedicatedAllocateInfo dedicated_info{ + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = nullptr, + .image = nullptr, + .buffer = *stream_buffer, + }; + const auto memory_properties = device.GetPhysical().GetMemoryProperties(); + stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = make_dedicated ? &dedicated_info : nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), + }); + if (device.HasDebuggingToolAttached()) { + stream_memory.SetObjectNameEXT("Stream Buffer Memory"); + } + stream_buffer.BindMemory(*stream_memory, 0); + stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); +} StagingBufferPool::~StagingBufferPool() = default; StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { + if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { + return GetStreamBuffer(size); + } if (const std::optional ref = TryGetReservedBuffer(size, usage)) { return *ref; } @@ -39,6 +144,42 @@ void StagingBufferPool::TickFrame() { ReleaseCache(MemoryUsage::Download); } +StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { + for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; + ++region) { + sync_ticks[region] = scheduler.CurrentTick(); + } + used_iterator = iterator; + + for (size_t region = Region(free_iterator) + 1, + region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region < region_end; ++region) { + scheduler.Wait(sync_ticks[region]); + } + if (iterator + size > free_iterator) { + free_iterator = iterator + size; + } + if (iterator + size > STREAM_BUFFER_SIZE) { + for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { + sync_ticks[region] = scheduler.CurrentTick(); + } + used_iterator = 0; + iterator = 0; + free_iterator = size; + + for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { + scheduler.Wait(sync_ticks[region]); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return StagingBufferRef{ + .buffer = *stream_buffer, + .offset = static_cast(offset), + .mapped_span = std::span(stream_pointer + offset, size), + }; +} + std::optional StagingBufferPool::TryGetReservedBuffer(size_t size, MemoryUsage usage) { StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d42918a47..4ed99c0df 100755 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -19,11 +19,14 @@ class VKScheduler; struct StagingBufferRef { VkBuffer buffer; + VkDeviceSize offset; std::span mapped_span; }; class StagingBufferPool { public: + static constexpr size_t NUM_SYNCS = 16; + explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler); ~StagingBufferPool(); @@ -33,6 +36,11 @@ public: void TickFrame(); private: + struct StreamBufferCommit { + size_t upper_bound; + u64 tick; + }; + struct StagingBuffer { vk::Buffer buffer; MemoryCommit commit; @@ -42,6 +50,7 @@ private: StagingBufferRef Ref() const noexcept { return { .buffer = *buffer, + .offset = 0, .mapped_span = mapped_span, }; } @@ -56,6 +65,8 @@ private: static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; using StagingBuffersCache = std::array; + StagingBufferRef GetStreamBuffer(size_t size); + std::optional TryGetReservedBuffer(size_t size, MemoryUsage usage); StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); @@ -70,6 +81,15 @@ private: MemoryAllocator& memory_allocator; VKScheduler& scheduler; + vk::Buffer stream_buffer; + vk::DeviceMemory stream_memory; + u8* stream_pointer = nullptr; + + size_t iterator = 0; + size_t used_iterator = 0; + size_t free_iterator = 0; + std::array sync_ticks{}; + StagingBuffersCache device_local_cache; StagingBuffersCache upload_cache; StagingBuffersCache download_cache; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 35b988d9a..977c80a46 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -802,11 +802,10 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } -void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, - std::span copies) { +void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { // TODO: Move this to another API scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); const VkBuffer src_buffer = map.buffer; const VkImage vk_image = *image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; @@ -817,11 +816,11 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, }); } -void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, +void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { // TODO: Move this to another API scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); + std::vector vk_copies = TransformBufferCopies(copies, map.offset); const VkBuffer src_buffer = map.buffer; const VkBuffer dst_buffer = *buffer; scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { @@ -830,9 +829,8 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, }); } -void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, - std::span copies) { - std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); +void Image::DownloadMemory(const StagingBufferRef& map, std::span copies) { + std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier read_barrier{ diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 405021d30..6f336b08a 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -82,7 +82,7 @@ struct TextureCacheRuntime { return false; } - void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t, + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span) { UNREACHABLE(); } @@ -100,13 +100,12 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, + void UploadMemory(const StagingBufferRef& map, std::span copies); - void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, - std::span copies); + void UploadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, + void DownloadMemory(const StagingBufferRef& map, std::span copies); [[nodiscard]] VkImage Handle() const noexcept { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f336b705f..b1da69971 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -212,7 +212,7 @@ private: /// Upload data from guest to an image template - void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset); + void UploadImageContents(Image& image, StagingBuffer& staging_buffer); /// Find or create an image view from a guest descriptor [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); @@ -592,7 +592,7 @@ void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { Image& image = slot_images[image_id]; auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, 0, copies); + image.DownloadMemory(map, copies); runtime.Finish(); SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } @@ -750,24 +750,24 @@ void TextureCache

::PopAsyncFlushes() { total_size_bytes += slot_images[image_id].unswizzled_size_bytes; } auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - size_t buffer_offset = 0; + const size_t original_offset = download_map.offset; for (const ImageId image_id : download_ids) { Image& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, buffer_offset, copies); - buffer_offset += image.unswizzled_size_bytes; + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; } // Wait for downloads to finish runtime.Finish(); - buffer_offset = 0; - const std::span download_span = download_map.mapped_span; + download_map.offset = original_offset; + std::span download_span = download_map.mapped_span; for (const ImageId image_id : download_ids) { const ImageBase& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - const std::span image_download_span = download_span.subspan(buffer_offset); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); - buffer_offset += image.unswizzled_size_bytes; + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); } committed_downloads.pop(); } @@ -798,32 +798,32 @@ void TextureCache

::RefreshContents(Image& image) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); return; } - auto map = runtime.UploadStagingBuffer(MapSizeBytes(image)); - UploadImageContents(image, map, 0); + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); runtime.InsertUploadMemoryBarrier(); } template -template -void TextureCache

::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { - const std::span mapped_span = map.mapped_span.subspan(buffer_offset); +template +void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span mapped_span = staging.mapped_span; const GPUVAddr gpu_addr = image.gpu_addr; if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); + runtime.AccelerateImageUpload(image, staging, uploads); } else if (True(image.flags & ImageFlagBits::Converted)) { std::vector unswizzled_data(image.unswizzled_size_bytes); auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } else if (image.info.type == ImageType::Buffer) { const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } } diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index 85cd7ea86..d1ce29450 100755 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -85,11 +85,10 @@ public: MemoryAllocator(const MemoryAllocator&) = delete; /** - * Commits a memory with the specified requeriments. + * Commits a memory with the specified requirements. * * @param requirements Requirements returned from a Vulkan call. - * @param host_visible Signals the allocator that it *must* use host visible and coherent - * memory. When passing false, it will try to allocate device local memory. + * @param usage Indicates how the memory will be used. * * @returns A memory commit. */ diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d39bbdc70..311d3d22d 100755 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -168,7 +168,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkFreeCommandBuffers); X(vkFreeDescriptorSets); X(vkFreeMemory); - X(vkGetBufferMemoryRequirements); + X(vkGetBufferMemoryRequirements2); X(vkGetDeviceQueue); X(vkGetEventStatus); X(vkGetFenceStatus); @@ -786,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { return DeviceMemory(memory, handle, *dld); } -VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { - VkMemoryRequirements requirements; - dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); - return requirements; +VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer, + void* pnext) const noexcept { + const VkBufferMemoryRequirementsInfo2 info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + .pNext = nullptr, + .buffer = buffer, + }; + VkMemoryRequirements2 requirements{ + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = pnext, + .memoryRequirements{}, + }; + dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements); + return requirements.memoryRequirements; } VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 7f781b081..3e36d356a 100755 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -283,7 +283,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; PFN_vkFreeMemory vkFreeMemory{}; - PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; + PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{}; PFN_vkGetDeviceQueue vkGetDeviceQueue{}; PFN_vkGetEventStatus vkGetEventStatus{}; PFN_vkGetFenceStatus vkGetFenceStatus{}; @@ -871,7 +871,8 @@ public: DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; - VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; + VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer, + void* pnext = nullptr) const noexcept; VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp index 4bf2bfd40..0a4c48b3d 100755 --- a/src/yuzu/applets/profile_select.cpp +++ b/src/yuzu/applets/profile_select.cpp @@ -93,7 +93,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent) const auto& profiles = profile_manager->GetAllUsers(); for (const auto& user : profiles) { - Service::Account::ProfileBase profile; + Service::Account::ProfileBase profile{}; if (!profile_manager->GetProfileBase(user, profile)) continue; diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp index 13d9a4757..d102a43af 100755 --- a/src/yuzu/configuration/configure_profile_manager.cpp +++ b/src/yuzu/configuration/configure_profile_manager.cpp @@ -40,7 +40,7 @@ QString GetImagePath(Common::UUID uuid) { } QString GetAccountUsername(const Service::Account::ProfileManager& manager, Common::UUID uuid) { - Service::Account::ProfileBase profile; + Service::Account::ProfileBase profile{}; if (!manager.GetProfileBase(uuid, profile)) { return {}; } @@ -147,7 +147,7 @@ void ConfigureProfileManager::SetConfiguration() { void ConfigureProfileManager::PopulateUserList() { const auto& profiles = profile_manager->GetAllUsers(); for (const auto& user : profiles) { - Service::Account::ProfileBase profile; + Service::Account::ProfileBase profile{}; if (!profile_manager->GetProfileBase(user, profile)) continue; @@ -212,7 +212,7 @@ void ConfigureProfileManager::RenameUser() { const auto uuid = profile_manager->GetUser(user); ASSERT(uuid); - Service::Account::ProfileBase profile; + Service::Account::ProfileBase profile{}; if (!profile_manager->GetProfileBase(*uuid, profile)) return;