early-access version 1959

This commit is contained in:
pineappleEA 2021-08-02 19:12:49 +02:00
parent c1ab8428b1
commit ec76444e96
15 changed files with 54 additions and 293 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 1958.
This is the source code for early-access 1959.
## Legal Notice

View File

@ -68,7 +68,6 @@ void nvhost_nvdec::OnOpen(DeviceFD fd) {}
void nvhost_nvdec::OnClose(DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
system.GPU().ClearCdmaInstance();
system.GPU().MemoryManager().InvalidateQueuedCaches();
}
} // namespace Service::Nvidia::Devices

View File

@ -166,8 +166,6 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
} else {
cmd_buffer.map_address = object->dma_map_addr;
AddBufferMap(object->dma_map_addr, object->size, object->addr,
object->status == nvmap::Object::Status::Allocated);
}
}
std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
@ -178,35 +176,10 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
}
NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
auto& gpu = system.GPU();
for (auto& cmd_buffer : cmd_buffer_handles) {
const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
if (!object) {
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
}
if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) {
if (vic_device) {
// UnmapVicFrame defers texture_cache invalidation of the frame address until
// the stream is over
gpu.MemoryManager().UnmapVicFrame(object->dma_map_addr, *size);
} else {
gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
}
} else {
// This occurs quite frequently, however does not seem to impact functionality
LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr,
object->dma_map_addr);
}
object->dma_map_addr = 0;
}
// This is intentionally left stubbed.
// By unmapping GPU buffers here, we break the continuity of the VP9 reference frame addresses,
// and risk invalidating data before the async GPU thread is done with it
LOG_DEBUG(Service_NVDRV, "(STUBBED) called");
std::memset(output.data(), 0, output.size());
return NvResult::Success;
}
@ -218,33 +191,4 @@ NvResult nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input,
return NvResult::Success;
}
std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap(
GPUVAddr gpu_addr) const {
const auto it = std::find_if(
buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) {
return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr());
});
ASSERT(it != buffer_mappings.end());
return it->second;
}
void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
bool is_allocated) {
buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated});
}
std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) {
const auto iter{buffer_mappings.find(gpu_addr)};
if (iter == buffer_mappings.end()) {
return std::nullopt;
}
std::size_t size = 0;
if (iter->second.IsAllocated()) {
size = iter->second.Size();
}
buffer_mappings.erase(iter);
return size;
}
} // namespace Service::Nvidia::Devices

View File

@ -23,45 +23,6 @@ public:
~nvhost_nvdec_common() override;
protected:
class BufferMap final {
public:
constexpr BufferMap() = default;
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
bool is_allocated_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
is_allocated{is_allocated_} {}
constexpr VAddr StartAddr() const {
return start_addr;
}
constexpr VAddr EndAddr() const {
return end_addr;
}
constexpr std::size_t Size() const {
return end_addr - start_addr;
}
constexpr VAddr CpuAddr() const {
return cpu_addr;
}
constexpr bool IsAllocated() const {
return is_allocated;
}
private:
GPUVAddr start_addr{};
GPUVAddr end_addr{};
VAddr cpu_addr{};
bool is_allocated{};
};
struct IoctlSetNvmapFD {
s32_le nvmap_fd{};
};
@ -154,18 +115,11 @@ protected:
NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
s32_le nvmap_fd{};
u32_le submit_timeout{};
bool vic_device{};
std::shared_ptr<nvmap> nvmap_dev;
SyncpointManager& syncpoint_manager;
std::array<u32, MaxSyncPoints> device_syncpoints{};
// This is expected to be ordered, therefore we must use a map, not unordered_map
std::map<GPUVAddr, BufferMap> buffer_mappings;
};
}; // namespace Devices
} // namespace Service::Nvidia

View File

@ -12,9 +12,8 @@
namespace Service::Nvidia::Devices {
nvhost_vic::nvhost_vic(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_)
: nvhost_nvdec_common(system_, std::move(nvmap_dev_), syncpoint_manager_) {
vic_device = true;
}
: nvhost_nvdec_common{system_, std::move(nvmap_dev_), syncpoint_manager_} {}
nvhost_vic::~nvhost_vic() = default;
NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,

View File

@ -11,6 +11,9 @@
namespace Tegra::Decoder {
namespace {
constexpr u32 diff_update_probability = 252;
constexpr u32 frame_sync_code = 0x498342;
// Default compressed header probabilities once frame context resets
constexpr Vp9EntropyProbs default_probs{
.y_mode_prob{
@ -361,8 +364,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
// order: last, golden, altref, current. It may be worthwhile to track the updates done here
// to avoid buffering frame data needed for reference frame updating in the header composition.
// order: last, golden, altref, current.
std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
vp9_info.frame_offsets.begin());
@ -384,33 +386,18 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
current_frame.info.bitstream_size);
}
// Buffer two frames, saving the last show frame info
if (!next_next_frame.bit_stream.empty()) {
if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp{
.info = current_frame.info,
.bit_stream = std::move(current_frame.bit_stream),
};
next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
current_frame.info = next_next_frame.info;
current_frame.bit_stream = std::move(next_next_frame.bit_stream);
next_next_frame = std::move(temp);
if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp2{
.info = current_frame.info,
.bit_stream = std::move(current_frame.bit_stream),
};
next_frame.info.show_frame = current_frame.info.last_frame_shown;
current_frame.info = next_frame.info;
current_frame.bit_stream = std::move(next_frame.bit_stream);
next_frame = std::move(temp2);
} else {
next_frame.info = current_frame.info;
next_frame.bit_stream = std::move(current_frame.bit_stream);
}
next_frame.info.show_frame = current_frame.info.last_frame_shown;
current_frame.info = next_frame.info;
current_frame.bit_stream = std::move(next_frame.bit_stream);
next_frame = std::move(temp);
} else {
next_next_frame.info = current_frame.info;
next_next_frame.bit_stream = std::move(current_frame.bit_stream);
next_frame.info = current_frame.info;
next_frame.bit_stream = std::move(current_frame.bit_stream);
}
return current_frame;
}
@ -613,86 +600,64 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
// Reset context
prev_frame_probs = default_probs;
swap_next_golden = false;
swap_ref_indices = false;
loop_filter_ref_deltas.fill(0);
loop_filter_mode_deltas.fill(0);
// allow frames offsets to stabilize before checking for golden frames
grace_period = 4;
// On key frames, all frame slots are set to the current frame,
// so the value of the selected slot doesn't really matter.
frame_ctxs.fill({current_frame_number, false, default_probs});
frame_ctxs.fill(default_probs);
// intra only, meaning the frame can be recreated with no other references
current_frame_info.intra_only = true;
} else {
if (!current_frame_info.show_frame) {
uncomp_writer.WriteBit(current_frame_info.intra_only);
if (!current_frame_info.last_frame_was_key) {
swap_next_golden = !swap_next_golden;
}
} else {
current_frame_info.intra_only = false;
}
if (!current_frame_info.error_resilient_mode) {
uncomp_writer.WriteU(0, 2); // Reset frame context.
}
// Last, Golden, Altref frames
std::array<s32, 3> ref_frame_index{0, 1, 2};
// Set when next frame is hidden
// altref and golden references are swapped
if (swap_next_golden) {
ref_frame_index = std::array<s32, 3>{0, 2, 1};
const auto& curr_offsets = current_frame_info.frame_offsets;
const auto& next_offsets = next_frame.info.frame_offsets;
const bool ref_frames_different = curr_offsets[1] != curr_offsets[2];
const bool next_references_swap =
(next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]);
const bool needs_ref_swap = ref_frames_different && next_references_swap;
if (needs_ref_swap) {
swap_ref_indices = !swap_ref_indices;
}
union {
u32 raw;
BitField<0, 1, u32> refresh_last;
BitField<1, 2, u32> refresh_golden;
BitField<2, 1, u32> refresh_alt;
} refresh_frame_flags;
// update Last Frame
u64 refresh_frame_flags = 1;
// golden frame may refresh, determined if the next golden frame offset is changed
bool golden_refresh = false;
if (grace_period <= 0) {
for (s32 index = 1; index < 3; ++index) {
if (current_frame_info.frame_offsets[index] !=
next_frame.info.frame_offsets[index]) {
current_frame_info.refresh_frame[index] = true;
golden_refresh = true;
grace_period = 3;
}
refresh_frame_flags.raw = 0;
for (u32 index = 0; index < 3; ++index) {
// Refresh indices that use the current frame as an index
if (curr_offsets[3] == next_offsets[index]) {
refresh_frame_flags.raw |= 1u << index;
}
}
if (current_frame_info.show_frame &&
(!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
// Update golden frame
refresh_frame_flags = swap_next_golden ? 2 : 4;
if (swap_ref_indices) {
const u32 temp = refresh_frame_flags.refresh_golden;
refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value());
refresh_frame_flags.refresh_alt.Assign(temp);
}
if (!current_frame_info.show_frame) {
// Update altref
refresh_frame_flags = swap_next_golden ? 2 : 4;
} else if (golden_refresh) {
refresh_frame_flags = 3;
}
if (current_frame_info.intra_only) {
uncomp_writer.WriteU(frame_sync_code, 24);
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
uncomp_writer.WriteBit(false); // Render and frame size different.
} else {
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
for (s32 index = 1; index < 4; index++) {
const bool swap_indices = needs_ref_swap ^ swap_ref_indices;
const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2};
uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
for (size_t index = 1; index < 4; index++) {
uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
}
uncomp_writer.WriteBit(true); // Frame size with refs.
uncomp_writer.WriteBit(false); // Render and frame size different.
uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
@ -714,10 +679,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
frame_ctx_idx = 1;
}
uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
prev_frame_probs =
frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header
frame_ctxs[frame_ctx_idx] = current_frame_info.entropy;
uncomp_writer.WriteU(current_frame_info.first_level, 6);
uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
@ -812,7 +776,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
current_frame_info = curr_frame.info;
bitstream = std::move(curr_frame.bit_stream);
}
// The uncompressed header routine sets PrevProb parameters needed for the compressed header
auto uncomp_writer = ComposeUncompressedHeader();
std::vector<u8> compressed_header = ComposeCompressedHeader();
@ -828,13 +791,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
frame.begin() + uncompressed_header.size());
std::copy(bitstream.begin(), bitstream.end(),
frame.begin() + uncompressed_header.size() + compressed_header.size());
// keep track of frame number
current_frame_number++;
grace_period--;
// don't display hidden frames
hidden = !current_frame_info.show_frame;
return frame;
}

View File

@ -14,7 +14,6 @@
namespace Tegra {
class GPU;
enum class FrameType { KeyFrame = 0, InterFrame = 1 };
namespace Decoder {
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
@ -124,7 +123,7 @@ public:
/// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const {
return hidden;
return !current_frame_info.show_frame;
}
private:
@ -178,19 +177,12 @@ private:
std::array<s8, 4> loop_filter_ref_deltas{};
std::array<s8, 2> loop_filter_mode_deltas{};
bool hidden = false;
s64 current_frame_number = -2; // since we buffer 2 frames
s32 grace_period = 6; // frame offsets need to stabilize
std::array<FrameContexts, 4> frame_ctxs{};
Vp9FrameContainer next_frame{};
Vp9FrameContainer next_next_frame{};
bool swap_next_golden{};
std::array<Vp9EntropyProbs, 4> frame_ctxs{};
bool swap_ref_indices{};
Vp9PictureInfo current_frame_info{};
Vp9EntropyProbs prev_frame_probs{};
s32 diff_update_probability = 252;
s32 frame_sync_code = 0x498342;
};
} // namespace Decoder

View File

@ -296,12 +296,6 @@ struct RefPoolElement {
bool refresh{};
};
struct FrameContexts {
s64 from;
bool adapted;
Vp9EntropyProbs probs;
};
#define ASSERT_POSITION(field_name, position) \
static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
"Field " #field_name " has invalid position")

View File

@ -118,25 +118,6 @@ void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) {
.IsSuccess());
}
void MemoryManager::UnmapVicFrame(GPUVAddr gpu_addr, std::size_t size) {
if (!size) {
return;
}
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
rasterizer->InvalidateExceptTextureCache(*cpu_addr, size);
cache_invalidate_queue.push_back({*cpu_addr, size});
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
void MemoryManager::InvalidateQueuedCaches() {
for (const auto& entry : cache_invalidate_queue) {
rasterizer->InvalidateTextureCache(entry.first, entry.second);
}
cache_invalidate_queue.clear();
}
PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const {
return page_table[PageEntryIndex(gpu_addr)];
}

View File

@ -143,14 +143,6 @@ public:
[[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
/**
* Some Decoded NVDEC frames require that texture cache does not get invalidated.
* UnmapVicFrame defers the texture cache invalidation until the stream ends
* by invoking InvalidateQueuedCaches to invalidate all frame texture caches.
*/
void UnmapVicFrame(GPUVAddr gpu_addr, std::size_t size);
void InvalidateQueuedCaches();
private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);

View File

@ -77,12 +77,6 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer to flush the texture cache to Switch memory
virtual void InvalidateExceptTextureCache(VAddr addr, u64 size) = 0;
/// Notify rasterizer to invalidate the texture cache
virtual void InvalidateTextureCache(VAddr addr, u64 size) = 0;
/// Check if the the specified memory area requires flushing to CPU Memory.
virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;

View File

@ -322,26 +322,6 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateExceptTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
shader_cache.InvalidateRegion(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::InvalidateTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {

View File

@ -86,8 +86,6 @@ public:
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;

View File

@ -316,26 +316,6 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size);
}
void Vulkan::RasterizerVulkan::InvalidateExceptTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
pipeline_cache.InvalidateRegion(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
query_cache.InvalidateRegion(addr, size);
}
void Vulkan::RasterizerVulkan::InvalidateTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
if (!Settings::IsGPULevelHigh()) {

View File

@ -79,8 +79,6 @@ public:
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;