From 61ffc1086878ab664018f1f8e9b38d01e3827dce Mon Sep 17 00:00:00 2001
From: pineappleEA <pineaea@gmail.com>
Date: Sat, 11 Feb 2023 20:06:54 +0100
Subject: [PATCH] early-access version 3385

---
 README.md                                     |  2 +-
 .../renderer/command/effect/aux_.cpp          | 76 ++++++++++++++++---
 src/video_core/gpu_thread.cpp                 |  3 +-
 src/video_core/gpu_thread.h                   |  4 +-
 src/video_core/host1x/vic.cpp                 | 14 ++--
 5 files changed, 74 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 48cdd1564..a507b0303 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 yuzu emulator early access
 =============
 
-This is the source code for early-access 3384.
+This is the source code for early-access 3385.
 
 ## Legal Notice
 
diff --git a/src/audio_core/renderer/command/effect/aux_.cpp b/src/audio_core/renderer/command/effect/aux_.cpp
index 32873ec49..e487feae0 100755
--- a/src/audio_core/renderer/command/effect/aux_.cpp
+++ b/src/audio_core/renderer/command/effect/aux_.cpp
@@ -20,10 +20,24 @@ static void ResetAuxBufferDsp(Core::Memory::Memory& memory, const CpuAddr aux_in
         return;
     }
 
-    auto info{reinterpret_cast<AuxInfo::AuxInfoDsp*>(memory.GetPointer(aux_info))};
-    info->read_offset = 0;
-    info->write_offset = 0;
-    info->total_sample_count = 0;
+    AuxInfo::AuxInfoDsp info{};
+    auto info_ptr{&info};
+    bool host_safe{(aux_info & Core::Memory::YUZU_PAGEMASK) <=
+                   (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp))};
+
+    if (host_safe) [[likely]] {
+        info_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(aux_info);
+    } else {
+        memory.ReadBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp));
+    }
+
+    info_ptr->read_offset = 0;
+    info_ptr->write_offset = 0;
+    info_ptr->total_sample_count = 0;
+
+    if (!host_safe) [[unlikely]] {
+        memory.WriteBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp));
+    }
 }
 
 /**
@@ -71,9 +85,18 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
         return 0;
     }
 
-    auto send_info{reinterpret_cast<AuxInfo::AuxInfoDsp*>(memory.GetPointer(send_info_))};
+    AuxInfo::AuxInfoDsp send_info{};
+    auto send_ptr = &send_info;
+    bool host_safe = (send_info_ & Core::Memory::YUZU_PAGEMASK) <=
+                     (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp));
 
-    u32 target_write_offset{send_info->write_offset + write_offset};
+    if (host_safe) [[likely]] {
+        send_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(send_info_);
+    } else {
+        memory.ReadBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp));
+    }
+
+    u32 target_write_offset{send_ptr->write_offset + write_offset};
     if (target_write_offset > count_max) {
         return 0;
     }
@@ -82,7 +105,13 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
     u32 read_pos{0};
     while (write_count > 0) {
         u32 to_write{std::min(count_max - target_write_offset, write_count)};
-        if (to_write) {
+        const auto write_addr = send_buffer + target_write_offset * sizeof(s32);
+        bool write_safe{(write_addr & Core::Memory::YUZU_PAGEMASK) <=
+                        (Core::Memory::YUZU_PAGESIZE - (write_addr + to_write * sizeof(s32)))};
+        if (write_safe) [[likely]] {
+            auto ptr = memory.GetPointer(write_addr);
+            std::memcpy(ptr, &input[read_pos], to_write * sizeof(s32));
+        } else {
             memory.WriteBlockUnsafe(send_buffer + target_write_offset * sizeof(s32),
                                     &input[read_pos], to_write * sizeof(s32));
         }
@@ -92,7 +121,11 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
     }
 
     if (update_count) {
-        send_info->write_offset = (send_info->write_offset + update_count) % count_max;
+        send_ptr->write_offset = (send_ptr->write_offset + update_count) % count_max;
+    }
+
+    if (!host_safe) [[unlikely]] {
+        memory.WriteBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp));
     }
 
     return write_count_;
@@ -140,9 +173,18 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
         return 0;
     }
 
-    auto return_info{reinterpret_cast<AuxInfo::AuxInfoDsp*>(memory.GetPointer(return_info_))};
+    AuxInfo::AuxInfoDsp return_info{};
+    auto return_ptr = &return_info;
+    bool host_safe = (return_info_ & Core::Memory::YUZU_PAGEMASK) <=
+                     (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp));
 
-    u32 target_read_offset{return_info->read_offset + read_offset};
+    if (host_safe) [[likely]] {
+        return_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(return_info_);
+    } else {
+        memory.ReadBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp));
+    }
+
+    u32 target_read_offset{return_ptr->read_offset + read_offset};
     if (target_read_offset > count_max) {
         return 0;
     }
@@ -151,7 +193,13 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
     u32 write_pos{0};
     while (read_count > 0) {
         u32 to_read{std::min(count_max - target_read_offset, read_count)};
-        if (to_read) {
+        const auto read_addr = return_buffer + target_read_offset * sizeof(s32);
+        bool read_safe{(read_addr & Core::Memory::YUZU_PAGEMASK) <=
+                       (Core::Memory::YUZU_PAGESIZE - (read_addr + to_read * sizeof(s32)))};
+        if (read_safe) [[likely]] {
+            auto ptr = memory.GetPointer(read_addr);
+            std::memcpy(&output[write_pos], ptr, to_read * sizeof(s32));
+        } else {
             memory.ReadBlockUnsafe(return_buffer + target_read_offset * sizeof(s32),
                                    &output[write_pos], to_read * sizeof(s32));
         }
@@ -161,7 +209,11 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
     }
 
     if (update_count) {
-        return_info->read_offset = (return_info->read_offset + update_count) % count_max;
+        return_ptr->read_offset = (return_ptr->read_offset + update_count) % count_max;
+    }
+
+    if (!host_safe) [[unlikely]] {
+        memory.WriteBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp));
     }
 
     return read_count_;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 33f9acf6c..036aebe0c 100755
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -32,7 +32,8 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
     VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
 
     while (!stop_token.stop_requested()) {
-        CommandDataContainer next = state.queue.PopWait(stop_token);
+        CommandDataContainer next;
+        state.queue.Pop(next, stop_token);
         if (stop_token.stop_requested()) {
             break;
         }
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index f27debf31..ceaed6abc 100755
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,8 +10,8 @@
 #include <thread>
 #include <variant>
 
+#include "common/bounded_threadsafe_queue.h"
 #include "common/polyfill_thread.h"
-#include "common/threadsafe_queue.h"
 #include "video_core/framebuffer_config.h"
 
 namespace Tegra {
@@ -97,7 +97,7 @@ struct CommandDataContainer {
 
 /// Struct used to synchronize the GPU thread
 struct SynchState final {
-    using CommandQueue = Common::MPSCQueue<CommandDataContainer, true>;
+    using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
     std::mutex write_lock;
     CommandQueue queue;
     u64 last_fence{};
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index cfa129743..281d7d276 100755
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -189,9 +189,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
     for (std::size_t y = 0; y < frame_height; ++y) {
         const std::size_t src = y * stride;
         const std::size_t dst = y * aligned_width;
-        for (std::size_t x = 0; x < frame_width; ++x) {
-            luma_buffer[dst + x] = luma_src[src + x];
-        }
+        std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width);
     }
     host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
                                       luma_buffer.size());
@@ -205,15 +203,15 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
         // Frame from FFmpeg software
         // Populate chroma buffer from both channels with interleaving.
         const std::size_t half_width = frame_width / 2;
+        u8* chroma_buffer_data = chroma_buffer.data();
         const u8* chroma_b_src = frame->data[1];
         const u8* chroma_r_src = frame->data[2];
         for (std::size_t y = 0; y < half_height; ++y) {
             const std::size_t src = y * half_stride;
             const std::size_t dst = y * aligned_width;
-
             for (std::size_t x = 0; x < half_width; ++x) {
-                chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
-                chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
+                chroma_buffer_data[dst + x * 2] = chroma_b_src[src + x];
+                chroma_buffer_data[dst + x * 2 + 1] = chroma_r_src[src + x];
             }
         }
         break;
@@ -225,9 +223,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
         for (std::size_t y = 0; y < half_height; ++y) {
             const std::size_t src = y * stride;
             const std::size_t dst = y * aligned_width;
-            for (std::size_t x = 0; x < frame_width; ++x) {
-                chroma_buffer[dst + x] = chroma_src[src + x];
-            }
+            std::memcpy(chroma_buffer.data() + dst, chroma_src + src, frame_width);
         }
         break;
     }