// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include #include #include #include #include #include #include #include "common/assert.h" #include "common/cityhash.h" #include "common/common_types.h" #include "common/div_ceil.h" #include "common/fs/fs.h" #include "common/fs/path_util.h" #include "common/logging/log.h" #include "shader_recompiler/environment.h" #include "video_core/engines/kepler_compute.h" #include "video_core/memory_manager.h" #include "video_core/shader_environment.h" #include "video_core/textures/texture.h" namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; constexpr size_t INST_SIZE = sizeof(u64); using Maxwell = Tegra::Engines::Maxwell3D::Regs; static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { switch (entry.texture_type) { case Tegra::Texture::TextureType::Texture1D: return Shader::TextureType::Color1D; case Tegra::Texture::TextureType::Texture2D: case Tegra::Texture::TextureType::Texture2DNoMipmap: return Shader::TextureType::Color2D; case Tegra::Texture::TextureType::Texture3D: return Shader::TextureType::Color3D; case Tegra::Texture::TextureType::TextureCubemap: return Shader::TextureType::ColorCube; case Tegra::Texture::TextureType::Texture1DArray: return Shader::TextureType::ColorArray1D; case Tegra::Texture::TextureType::Texture2DArray: return Shader::TextureType::ColorArray2D; case Tegra::Texture::TextureType::Texture1DBuffer: return Shader::TextureType::Buffer; case Tegra::Texture::TextureType::TextureCubeArray: return Shader::TextureType::ColorArrayCube; default: throw Shader::NotImplementedException("Unknown texture type"); } } static std::string_view StageToPreffix(Shader::Stage stage) { switch (stage) { case Shader::Stage::VertexB: return "VB"; case Shader::Stage::TessellationControl: return "TC"; case Shader::Stage::TessellationEval: return "TE"; case Shader::Stage::Geometry: return "GS"; case Shader::Stage::Fragment: return "FS"; case Shader::Stage::Compute: return "CS"; case Shader::Stage::VertexA: return "VA"; default: return "UK"; } } static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest, u32 initial_offset, Shader::Stage stage) { const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; const auto base_dir{shader_dir / fmt::format("shaders")}; if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories"); return; } const auto prefix = StageToPreffix(stage); const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)}; const size_t real_size = read_highest - read_lowest + initial_offset; const size_t padding_needed = ((32 - (real_size % 32)) % 32); std::fstream shader_file(name, std::ios::out | std::ios::binary); const size_t jump_index = initial_offset / sizeof(u64); shader_file.write(reinterpret_cast(code + jump_index), real_size); for (size_t i = 0; i < padding_needed; i++) { shader_file.put(0); } } GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, u32 start_address_) : gpu_memory{&gpu_memory_}, program_base{program_base_} { start_address = start_address_; } GenericEnvironment::~GenericEnvironment() = default; u32 GenericEnvironment::TextureBoundBuffer() const { return texture_bound; } u32 GenericEnvironment::LocalMemorySize() const { return local_memory_size; } u32 GenericEnvironment::SharedMemorySize() const { return shared_memory_size; } std::array GenericEnvironment::WorkgroupSize() const { return workgroup_size; } u64 GenericEnvironment::ReadInstruction(u32 address) { read_lowest = std::min(read_lowest, address); read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[(address - cached_lowest) / INST_SIZE]; } has_unbound_instructions = true; return gpu_memory->Read(program_base + address); } std::optional GenericEnvironment::Analyze() { const std::optional size{TryFindSize()}; if (!size) { return std::nullopt; } cached_lowest = start_address; cached_highest = start_address + static_cast(*size); return Common::CityHash64(reinterpret_cast(code.data()), *size); } void GenericEnvironment::SetCachedSize(size_t size_bytes) { cached_lowest = start_address; cached_highest = start_address + static_cast(size_bytes); code.resize(CachedSize()); gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); } size_t GenericEnvironment::CachedSize() const noexcept { return cached_highest - cached_lowest + INST_SIZE; } size_t GenericEnvironment::ReadSize() const noexcept { return read_highest - read_lowest + INST_SIZE; } bool GenericEnvironment::CanBeSerialized() const noexcept { return !has_unbound_instructions; } u64 GenericEnvironment::CalculateHash() const { const size_t size{ReadSize()}; const auto data{std::make_unique(size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); return Common::CityHash64(data.get(), size); } void GenericEnvironment::Dump(u64 hash) { DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage); } void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 code_size{static_cast(CachedSize())}; const u64 num_texture_types{static_cast(texture_types.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(reinterpret_cast(code.data()), code_size); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } for (const auto [key, type] : cbuf_values) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } if (stage == Shader::Stage::Compute) { file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); if (stage == Shader::Stage::Geometry) { file.write(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); } } } std::optional GenericEnvironment::TryFindSize() { static constexpr size_t BLOCK_SIZE = 0x1000; static constexpr size_t MAXIMUM_SIZE = 0x100000; static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; GPUVAddr guest_addr{program_base + start_address}; size_t offset{0}; size_t size{BLOCK_SIZE}; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { const u64 inst = data[index / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { return offset + index; } } guest_addr += BLOCK_SIZE; size += BLOCK_SIZE; offset += BLOCK_SIZE; } return std::nullopt; } Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); const Shader::TextureType result{ConvertType(entry)}; texture_types.emplace(raw, result); return result; } GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, GPUVAddr program_base_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); initial_offset = sizeof(sph); gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; stage_index = 0; break; case Maxwell::ShaderProgram::VertexB: stage = Shader::Stage::VertexB; stage_index = 0; break; case Maxwell::ShaderProgram::TesselationControl: stage = Shader::Stage::TessellationControl; stage_index = 1; break; case Maxwell::ShaderProgram::TesselationEval: stage = Shader::Stage::TessellationEval; stage_index = 2; break; case Maxwell::ShaderProgram::Geometry: stage = Shader::Stage::Geometry; stage_index = 3; break; case Maxwell::ShaderProgram::Fragment: stage = Shader::Stage::Fragment; stage_index = 4; break; default: UNREACHABLE_MSG("Invalid program={}", program); break; } const u64 local_size{sph.LocalMemorySize()}; ASSERT(local_size <= std::numeric_limits::max()); local_memory_size = static_cast(local_size) + sph.common3.shader_local_memory_crs_size; texture_bound = maxwell3d->regs.tex_cb_index; } u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; ASSERT(cbuf.enabled); u32 value{}; if (cbuf_offset < cbuf.size) { value = gpu_memory->Read(cbuf.address + cbuf_offset); } cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); return value; } Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); } ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ &kepler_compute_} { const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; texture_bound = kepler_compute->regs.tex_cb_index; shared_memory_size = qmd.shared_alloc; workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { const auto& qmd{kepler_compute->launch_description}; ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; u32 value{}; if (cbuf_offset < cbuf.size) { value = gpu_memory->Read(cbuf.Address() + cbuf_offset); } cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); return value; } Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); } void FileEnvironment::Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; u64 num_cbuf_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) .read(reinterpret_cast(&read_highest), sizeof(read_highest)) .read(reinterpret_cast(&stage), sizeof(stage)); code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); for (size_t i = 0; i < num_texture_types; ++i) { u32 key; Shader::TextureType type; file.read(reinterpret_cast(&key), sizeof(key)) .read(reinterpret_cast(&type), sizeof(type)); texture_types.emplace(key, type); } for (size_t i = 0; i < num_cbuf_values; ++i) { u64 key; u32 value; file.read(reinterpret_cast(&key), sizeof(key)) .read(reinterpret_cast(&value), sizeof(value)); cbuf_values.emplace(key, value); } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); initial_offset = 0; } else { file.read(reinterpret_cast(&sph), sizeof(sph)); initial_offset = sizeof(sph); if (stage == Shader::Stage::Geometry) { file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); } } } void FileEnvironment::Dump(u64 [[maybe_unused]] hash) { DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage); } u64 FileEnvironment::ReadInstruction(u32 address) { if (address < read_lowest || address > read_highest) { throw Shader::LogicError("Out of bounds address {}", address); } return code[(address - read_lowest) / sizeof(u64)]; } u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; if (it == cbuf_values.end()) { throw Shader::LogicError("Uncached read texture type"); } return it->second; } Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { const auto it{texture_types.find(handle)}; if (it == texture_types.end()) { throw Shader::LogicError("Uncached read texture type"); } return it->second; } u32 FileEnvironment::LocalMemorySize() const { return local_memory_size; } u32 FileEnvironment::SharedMemorySize() const { return shared_memory_size; } u32 FileEnvironment::TextureBoundBuffer() const { return texture_bound; } std::array FileEnvironment::WorkgroupSize() const { return workgroup_size; } void SerializePipeline(std::span key, std::span envs, const std::filesystem::path& filename, u32 cache_version) try { std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); file.exceptions(std::ifstream::failbit); if (!file.is_open()) { LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", Common::FS::PathToUTF8String(filename)); return; } if (file.tellp() == 0) { // Write header file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) .write(reinterpret_cast(&cache_version), sizeof(cache_version)); } if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { return; } const u32 num_envs{static_cast(envs.size())}; file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); for (const GenericEnvironment* const env : envs) { env->Serialize(file); } file.write(key.data(), key.size_bytes()); } catch (const std::ios_base::failure& e) { LOG_ERROR(Common_Filesystem, "{}", e.what()); if (!Common::FS::RemoveFile(filename)) { LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", Common::FS::PathToUTF8String(filename)); } } void LoadPipelines( std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, Common::UniqueFunction load_compute, Common::UniqueFunction> load_graphics) try { std::ifstream file(filename, std::ios::binary | std::ios::ate); if (!file.is_open()) { return; } file.exceptions(std::ifstream::failbit); const auto end{file.tellg()}; file.seekg(0, std::ios::beg); std::array magic_number; u32 cache_version; file.read(magic_number.data(), magic_number.size()) .read(reinterpret_cast(&cache_version), sizeof(cache_version)); if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) { file.close(); if (Common::FS::RemoveFile(filename)) { if (magic_number != MAGIC_NUMBER) { LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); } if (cache_version != expected_cache_version) { LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); } } else { LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file and failed to delete it in \"{}\"", Common::FS::PathToUTF8String(filename)); } return; } while (file.tellg() != end) { if (stop_loading.stop_requested()) { return; } u32 num_envs{}; file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); std::vector envs(num_envs); for (FileEnvironment& env : envs) { env.Deserialize(file); } if (envs.front().ShaderStage() == Shader::Stage::Compute) { load_compute(file, std::move(envs.front())); } else { load_graphics(file, std::move(envs)); } } } catch (const std::ios_base::failure& e) { LOG_ERROR(Common_Filesystem, "{}", e.what()); if (!Common::FS::RemoveFile(filename)) { LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", Common::FS::PathToUTF8String(filename)); } } } // namespace VideoCommon