From 6786a0913a1f895c736673e7dbd17815167b126c Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 19 Sep 2021 14:13:48 +0200 Subject: [PATCH 01/10] Don't compress dylib metadata Dylib metadata compression increases code complexity and will become less important once -Zsplit-metadata is introduced --- Cargo.lock | 8 ----- compiler/rustc_codegen_ssa/Cargo.toml | 1 - .../rustc_codegen_ssa/src/back/metadata.rs | 11 ++---- compiler/rustc_metadata/Cargo.toml | 1 - compiler/rustc_metadata/src/locator.rs | 36 +++---------------- 5 files changed, 7 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0f344dc920e5..fce95ae80400 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3595,7 +3595,6 @@ dependencies = [ "rustc_symbol_mangling", "rustc_target", "smallvec", - "snap", "tempfile", "thorin-dwp", "tracing", @@ -3967,7 +3966,6 @@ dependencies = [ "rustc_span", "rustc_target", "smallvec", - "snap", "tracing", ] @@ -4795,12 +4793,6 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" -[[package]] -name = "snap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da73c8f77aebc0e40c300b93f0a5f1bece7a248a36eee287d4e095f35c7b7d6e" - [[package]] name = "socket2" version = "0.4.1" diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index 8bbf25ce030f..7062c0c7eadc 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -16,7 +16,6 @@ jobserver = "0.1.22" tempfile = "3.2" thorin-dwp = "0.2" pathdiff = "0.2.0" -snap = "1" smallvec = { version = "1.6.1", features = ["union", "may_dangle"] } regex = "1.4" diff --git a/compiler/rustc_codegen_ssa/src/back/metadata.rs b/compiler/rustc_codegen_ssa/src/back/metadata.rs index 6849533abc04..be8e71b4f423 100644 --- a/compiler/rustc_codegen_ssa/src/back/metadata.rs +++ b/compiler/rustc_codegen_ssa/src/back/metadata.rs @@ -1,7 +1,6 @@ //! Reading of the rustc metadata for rlibs and dylibs use std::fs::File; -use std::io::Write; use std::path::Path; use object::write::{self, StandardSegment, Symbol, SymbolSection}; @@ -10,8 +9,6 @@ use object::{ SectionFlags, SectionKind, SymbolFlags, SymbolKind, SymbolScope, }; -use snap::write::FrameEncoder; - use rustc_data_structures::memmap::Mmap; use rustc_data_structures::owning_ref::OwningRef; use rustc_data_structures::rustc_erase_owner; @@ -260,12 +257,10 @@ pub fn create_compressed_metadata_file( metadata: &EncodedMetadata, symbol_name: &str, ) -> Vec { - let mut compressed = rustc_metadata::METADATA_HEADER.to_vec(); - FrameEncoder::new(&mut compressed).write_all(metadata.raw_data()).unwrap(); let mut file = if let Some(file) = create_object_file(sess) { file } else { - return compressed.to_vec(); + return metadata.raw_data().to_vec(); }; let section = file.add_section( file.segment_name(StandardSegment::Data).to_vec(), @@ -279,14 +274,14 @@ pub fn create_compressed_metadata_file( } _ => {} }; - let offset = file.append_section_data(section, &compressed, 1); + let offset = file.append_section_data(section, metadata.raw_data(), 1); // For MachO and probably PE this is necessary to prevent the linker from throwing away the // .rustc section. For ELF this isn't necessary, but it also doesn't harm. file.add_symbol(Symbol { name: symbol_name.as_bytes().to_vec(), value: offset, - size: compressed.len() as u64, + size: metadata.raw_data().len() as u64, kind: SymbolKind::Data, scope: SymbolScope::Dynamic, weak: false, diff --git a/compiler/rustc_metadata/Cargo.toml b/compiler/rustc_metadata/Cargo.toml index 59796dd65294..9419af0d76a5 100644 --- a/compiler/rustc_metadata/Cargo.toml +++ b/compiler/rustc_metadata/Cargo.toml @@ -9,7 +9,6 @@ doctest = false [dependencies] libloading = "0.7.1" odht = { version = "0.3.1", features = ["nightly"] } -snap = "1" tracing = "0.1" smallvec = { version = "1.6.1", features = ["union", "may_dangle"] } rustc_middle = { path = "../rustc_middle" } diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 550b22a2a3c6..0cf28f9a032a 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -213,7 +213,7 @@ //! metadata::locator or metadata::creader for all the juicy details! use crate::creader::Library; -use crate::rmeta::{rustc_version, MetadataBlob, METADATA_HEADER}; +use crate::rmeta::{rustc_version, MetadataBlob}; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::memmap::Mmap; @@ -231,11 +231,10 @@ use rustc_span::symbol::{sym, Symbol}; use rustc_span::Span; use rustc_target::spec::{Target, TargetTriple}; -use snap::read::FrameDecoder; use std::fmt::Write as _; -use std::io::{Read, Result as IoResult, Write}; +use std::io::{Result as IoResult, Write}; use std::path::{Path, PathBuf}; -use std::{cmp, fmt, fs}; +use std::{fmt, fs}; use tracing::{debug, info}; #[derive(Clone)] @@ -757,34 +756,7 @@ fn get_metadata_section<'p>( loader.get_rlib_metadata(target, filename).map_err(MetadataError::LoadFailure)? } CrateFlavor::Dylib => { - let buf = - loader.get_dylib_metadata(target, filename).map_err(MetadataError::LoadFailure)?; - // The header is uncompressed - let header_len = METADATA_HEADER.len(); - debug!("checking {} bytes of metadata-version stamp", header_len); - let header = &buf[..cmp::min(header_len, buf.len())]; - if header != METADATA_HEADER { - return Err(MetadataError::LoadFailure(format!( - "invalid metadata version found: {}", - filename.display() - ))); - } - - // Header is okay -> inflate the actual metadata - let compressed_bytes = &buf[header_len..]; - debug!("inflating {} bytes of compressed metadata", compressed_bytes.len()); - // Assume the decompressed data will be at least the size of the compressed data, so we - // don't have to grow the buffer as much. - let mut inflated = Vec::with_capacity(compressed_bytes.len()); - match FrameDecoder::new(compressed_bytes).read_to_end(&mut inflated) { - Ok(_) => rustc_erase_owner!(OwningRef::new(inflated).map_owner_box()), - Err(_) => { - return Err(MetadataError::LoadFailure(format!( - "failed to decompress metadata: {}", - filename.display() - ))); - } - } + loader.get_dylib_metadata(target, filename).map_err(MetadataError::LoadFailure)? } CrateFlavor::Rmeta => { // mmap the file, because only a small fraction of it is read. From fb6da33215198feeca8da141a577bba6c2692897 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 19 Sep 2021 15:18:05 +0200 Subject: [PATCH 02/10] Remove a workaround for a bug I don't think it is necessary anymore. As I understand it from issue 39504 the original problem was that rustbuild changed a hardlink in the cargo build dir to point to copy in the sysroot while cargo may have hardlinked it to the original first. I don't think this happens anymore and as such this workaround is no longer necessary. --- compiler/rustc_metadata/src/locator.rs | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 0cf28f9a032a..54ca82cd9978 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -589,32 +589,6 @@ impl<'a> CrateLocator<'a> { continue; } - // Ok so at this point we've determined that `(lib, kind)` above is - // a candidate crate to load, and that `slot` is either none (this - // is the first crate of its kind) or if some the previous path has - // the exact same hash (e.g., it's the exact same crate). - // - // In principle these two candidate crates are exactly the same so - // we can choose either of them to link. As a stupidly gross hack, - // however, we favor crate in the sysroot. - // - // You can find more info in rust-lang/rust#39518 and various linked - // issues, but the general gist is that during testing libstd the - // compilers has two candidates to choose from: one in the sysroot - // and one in the deps folder. These two crates are the exact same - // crate but if the compiler chooses the one in the deps folder - // it'll cause spurious errors on Windows. - // - // As a result, we favor the sysroot crate here. Note that the - // candidates are all canonicalized, so we canonicalize the sysroot - // as well. - if let Some((prev, _)) = &ret { - let sysroot = self.sysroot; - let sysroot = sysroot.canonicalize().unwrap_or_else(|_| sysroot.to_path_buf()); - if prev.starts_with(&sysroot) { - continue; - } - } *slot = Some((hash, metadata)); ret = Some((lib, kind)); } From cfa95fad6715c869d8ce8a0dd174c0e51f19e8ef Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 19 Sep 2021 15:24:34 +0200 Subject: [PATCH 03/10] Always check if the SVH of rlib and dylibs matches other crate sources If a crate is both an rlib and cdylib, the metadata loader would previously incorrectly assume that it is possible to link against the cdylib. Cdylibs don't export all symbols required for linking as rust library. Possibly fixes issue number 82151 --- compiler/rustc_metadata/src/locator.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 54ca82cd9978..d1674842ac8b 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -503,9 +503,8 @@ impl<'a> CrateLocator<'a> { // the errors and notes are emitted about the set of libraries. // // With only one library in the set, this function will extract it, and then - // read the metadata from it if `*slot` is `None`. If the metadata couldn't - // be read, it is assumed that the file isn't a valid rust library (no - // errors are emitted). + // read the metadata from it. If the metadata couldn't be read, it is assumed + // that the file isn't a valid rust library (no errors are emitted). fn extract_one( &mut self, m: FxHashMap, @@ -521,17 +520,8 @@ impl<'a> CrateLocator<'a> { // // See also #68149 which provides more detail on why emitting the // dependency on the rlib is a bad thing. - // - // We currently do not verify that these other sources are even in sync, - // and this is arguably a bug (see #10786), but because reading metadata - // is quite slow (especially from dylibs) we currently do not read it - // from the other crate sources. - if slot.is_some() { - if m.is_empty() || !self.needs_crate_flavor(flavor) { - return Ok(None); - } else if m.len() == 1 { - return Ok(Some(m.into_iter().next().unwrap())); - } + if slot.is_some() && !self.needs_crate_flavor(flavor) { + return Ok(None); } let mut ret: Option<(PathBuf, PathKind)> = None; From 99f92df395c527f0d4dc1391aa20e0d8509c4d34 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 12 Feb 2022 15:34:31 +0100 Subject: [PATCH 04/10] Move metadata header and version checks together This will make it easier to report rustc versions for older metadata formats. --- compiler/rustc_metadata/src/locator.rs | 43 +++++++++++--------- compiler/rustc_metadata/src/rmeta/decoder.rs | 21 +++++++--- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index d1674842ac8b..829232965526 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -241,7 +241,6 @@ use tracing::{debug, info}; crate struct CrateLocator<'a> { // Immutable per-session configuration. only_needs_metadata: bool, - sysroot: &'a Path, metadata_loader: &'a dyn MetadataLoader, // Immutable per-search configuration. @@ -308,7 +307,6 @@ impl<'a> CrateLocator<'a> { CrateLocator { only_needs_metadata, - sysroot: &sess.sysroot, metadata_loader, crate_name, exact_paths: if hash.is_none() { @@ -547,6 +545,20 @@ impl<'a> CrateLocator<'a> { continue; } } + Err(MetadataError::VersionMismatch(found_version)) => { + // The file was present and created by the same compiler version, but we + // couldn't load it for some reason. Give a hard error instead of silently + // ignoring it, but only if we would have given an error anyway. + let rustc_version = rustc_version(); + info!( + "Rejecting via version: expected {} got {}", + rustc_version, found_version + ); + self.crate_rejections + .via_version + .push(CrateMismatch { path: lib, got: found_version }); + continue; + } Err(MetadataError::LoadFailure(err)) => { info!("no metadata found: {}", err); // The file was present and created by the same compiler version, but we @@ -591,16 +603,6 @@ impl<'a> CrateLocator<'a> { } fn crate_matches(&mut self, metadata: &MetadataBlob, libpath: &Path) -> Option { - let rustc_version = rustc_version(); - let found_version = metadata.get_rustc_version(); - if found_version != rustc_version { - info!("Rejecting via version: expected {} got {}", rustc_version, found_version); - self.crate_rejections - .via_version - .push(CrateMismatch { path: libpath.to_path_buf(), got: found_version }); - return None; - } - let root = metadata.get_root(); if root.is_proc_macro_crate() != self.is_proc_macro { info!( @@ -742,13 +744,9 @@ fn get_metadata_section<'p>( } }; let blob = MetadataBlob::new(raw_bytes); - if blob.is_compatible() { - Ok(blob) - } else { - Err(MetadataError::LoadFailure(format!( - "invalid metadata version found: {}", - filename.display() - ))) + match blob.check_compatibility() { + Ok(()) => Ok(blob), + Err(version) => Err(MetadataError::VersionMismatch(version)), } } @@ -862,6 +860,8 @@ enum MetadataError<'a> { NotPresent(&'a Path), /// The file was present and invalid. LoadFailure(String), + /// The file was present, but compiled with a different rustc version. + VersionMismatch(String), } impl fmt::Display for MetadataError<'_> { @@ -871,6 +871,11 @@ impl fmt::Display for MetadataError<'_> { f.write_str(&format!("no such file: '{}'", filename.display())) } MetadataError::LoadFailure(msg) => f.write_str(msg), + MetadataError::VersionMismatch(found_version) => f.write_str(&format!( + "rustc version mismatch. expected {}, found {}", + rustc_version(), + found_version, + )), } } } diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 87a88fbac712..c751974db3e0 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -626,13 +626,22 @@ impl<'tcx> MetadataBlob { MetadataBlob(Lrc::new(metadata_ref)) } - crate fn is_compatible(&self) -> bool { - self.blob().starts_with(METADATA_HEADER) - } + crate fn check_compatibility(&self) -> Result<(), String> { + if !self.blob().starts_with(METADATA_HEADER) { + if self.blob().starts_with(b"rust") { + return Err("".to_string()); + } + return Err("".to_string()); + } - crate fn get_rustc_version(&self) -> String { - Lazy::::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 4).unwrap()) - .decode(self) + let found_version = + Lazy::::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 4).unwrap()) + .decode(self); + if rustc_version() != found_version { + return Err(found_version); + } + + Ok(()) } crate fn get_root(&self) -> CrateRoot<'tcx> { From 6d5a457d711e1faf0478c5e6b9e9ae7570e76e3e Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 12 Feb 2022 16:32:01 +0100 Subject: [PATCH 05/10] Use to_le_bytes and from_le_bytes instead of manual byte shuffling --- compiler/rustc_metadata/src/rmeta/decoder.rs | 5 +---- compiler/rustc_metadata/src/rmeta/encoder.rs | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index c751974db3e0..e6300af7c2eb 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -647,10 +647,7 @@ impl<'tcx> MetadataBlob { crate fn get_root(&self) -> CrateRoot<'tcx> { let slice = &self.blob()[..]; let offset = METADATA_HEADER.len(); - let pos = (((slice[offset + 0] as u32) << 24) - | ((slice[offset + 1] as u32) << 16) - | ((slice[offset + 2] as u32) << 8) - | ((slice[offset + 3] as u32) << 0)) as usize; + let pos = (u32::from_le_bytes(slice[offset..offset + 4].try_into().unwrap())) as usize; Lazy::>::from_position(NonZeroUsize::new(pos).unwrap()).decode(self) } diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index c92b3b9434c2..fc7ee9fb521c 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2214,10 +2214,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { // Encode the root position. let header = METADATA_HEADER.len(); let pos = root.position.get(); - result[header + 0] = (pos >> 24) as u8; - result[header + 1] = (pos >> 16) as u8; - result[header + 2] = (pos >> 8) as u8; - result[header + 3] = (pos >> 0) as u8; + result[header..header + 4].copy_from_slice(&pos.to_le_bytes()); // Record metadata size for self-profiling tcx.prof.artifact_size("crate_metadata", "crate_metadata", result.len() as u64); From 8c65a66a8dd0329d0ac0b90456a95428d9bb07bf Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 12 Feb 2022 18:50:13 +0100 Subject: [PATCH 06/10] Introduce -Zsplit-metadata option This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate. --- compiler/rustc_codegen_ssa/src/back/link.rs | 2 +- .../rustc_codegen_ssa/src/back/metadata.rs | 7 +-- compiler/rustc_interface/src/passes.rs | 4 +- compiler/rustc_interface/src/tests.rs | 1 + compiler/rustc_metadata/src/locator.rs | 18 +++++++ compiler/rustc_metadata/src/rmeta/decoder.rs | 36 ++++++++++++-- compiler/rustc_metadata/src/rmeta/encoder.rs | 48 +++++++++++++++---- compiler/rustc_metadata/src/rmeta/mod.rs | 25 ++++++++-- compiler/rustc_session/src/options.rs | 2 + 9 files changed, 119 insertions(+), 24 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index e53c98421176..7cb8d1fd82bd 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -359,7 +359,7 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>( // metadata in rlib files is wrapped in a "dummy" object file for // the target platform so the rlib can be processed entirely by // normal linkers for the platform. - let metadata = create_rmeta_file(sess, codegen_results.metadata.raw_data()); + let metadata = create_rmeta_file(sess, codegen_results.metadata.maybe_reference()); ab.add_file(&emit_metadata(sess, &metadata, tmpdir)); } diff --git a/compiler/rustc_codegen_ssa/src/back/metadata.rs b/compiler/rustc_codegen_ssa/src/back/metadata.rs index be8e71b4f423..acf0e2e4cad8 100644 --- a/compiler/rustc_codegen_ssa/src/back/metadata.rs +++ b/compiler/rustc_codegen_ssa/src/back/metadata.rs @@ -252,6 +252,7 @@ pub fn create_rmeta_file(sess: &Session, metadata: &[u8]) -> Vec { // As a result, we choose a slightly shorter name! As to why // `.note.rustc` works on MinGW, see // https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197 +// TODO rename function pub fn create_compressed_metadata_file( sess: &Session, metadata: &EncodedMetadata, @@ -260,7 +261,7 @@ pub fn create_compressed_metadata_file( let mut file = if let Some(file) = create_object_file(sess) { file } else { - return metadata.raw_data().to_vec(); + return metadata.maybe_reference().to_vec(); }; let section = file.add_section( file.segment_name(StandardSegment::Data).to_vec(), @@ -274,14 +275,14 @@ pub fn create_compressed_metadata_file( } _ => {} }; - let offset = file.append_section_data(section, metadata.raw_data(), 1); + let offset = file.append_section_data(section, metadata.maybe_reference(), 1); // For MachO and probably PE this is necessary to prevent the linker from throwing away the // .rustc section. For ELF this isn't necessary, but it also doesn't harm. file.add_symbol(Symbol { name: symbol_name.as_bytes().to_vec(), value: offset, - size: metadata.raw_data().len() as u64, + size: metadata.maybe_reference().len() as u64, kind: SymbolKind::Data, scope: SymbolScope::Dynamic, weak: false, diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index 66e1e78b2856..bbb9663b8cc9 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -1036,7 +1036,7 @@ fn encode_and_write_metadata( enum MetadataKind { None, Uncompressed, - Compressed, + Compressed, // TODO remove this variant } let metadata_kind = tcx @@ -1074,7 +1074,7 @@ fn encode_and_write_metadata( .tempdir_in(out_filename.parent().unwrap()) .unwrap_or_else(|err| tcx.sess.fatal(&format!("couldn't create a temp dir: {}", err))); let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps); - let metadata_filename = emit_metadata(tcx.sess, metadata.raw_data(), &metadata_tmpdir); + let metadata_filename = emit_metadata(tcx.sess, metadata.full(), &metadata_tmpdir); if let Err(e) = util::non_durable_rename(&metadata_filename, &out_filename) { tcx.sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e)); } diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 9ab138c1b12a..59eb77a80b79 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -689,6 +689,7 @@ fn test_debugging_options_tracking_hash() { untracked!(self_profile_events, Some(vec![String::new()])); untracked!(span_debug, true); untracked!(span_free_formats, true); + untracked!(split_metadata, true); untracked!(temps_dir, Some(String::from("abc"))); untracked!(terminal_width, Some(80)); untracked!(threads, 99); diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 829232965526..ce1131072d39 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -539,6 +539,11 @@ impl<'a> CrateLocator<'a> { match get_metadata_section(self.target, flavor, &lib, self.metadata_loader) { Ok(blob) => { if let Some(h) = self.crate_matches(&blob, &lib) { + if blob.is_reference_only() { + if slot.is_none() { + todo!("return error"); + } + } (h, blob) } else { info!("metadata mismatch"); @@ -603,6 +608,19 @@ impl<'a> CrateLocator<'a> { } fn crate_matches(&mut self, metadata: &MetadataBlob, libpath: &Path) -> Option { + if metadata.is_reference_only() { + let hash = metadata.get_hash(); + if let Some(expected_hash) = self.hash { + if hash != expected_hash { + info!("Rejecting via hash: expected {} got {}", expected_hash, hash); + self.crate_rejections + .via_hash + .push(CrateMismatch { path: libpath.to_path_buf(), got: hash.to_string() }); + return None; + } + } + } + let root = metadata.get_root(); if root.is_proc_macro_crate() != self.is_proc_macro { info!( diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index e6300af7c2eb..ec598b5be1cb 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -628,15 +628,22 @@ impl<'tcx> MetadataBlob { crate fn check_compatibility(&self) -> Result<(), String> { if !self.blob().starts_with(METADATA_HEADER) { - if self.blob().starts_with(b"rust") { + if self.blob().starts_with(PREV_METADATA_HEADER) { + let found_version = Lazy::::from_position( + NonZeroUsize::new(PREV_METADATA_HEADER.len() + 4).unwrap(), + ) + .decode(self); + return Err(found_version); + } else if self.blob().starts_with(b"rust") { return Err("".to_string()); } return Err("".to_string()); } - let found_version = - Lazy::::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 4).unwrap()) - .decode(self); + let found_version = Lazy::::from_position( + NonZeroUsize::new(METADATA_HEADER.len() + 8 + 4 + 4).unwrap(), + ) + .decode(self); if rustc_version() != found_version { return Err(found_version); } @@ -644,14 +651,33 @@ impl<'tcx> MetadataBlob { Ok(()) } + crate fn is_reference_only(&self) -> bool { + let slice = &self.blob()[..]; + let offset = METADATA_HEADER.len() + 8; + let pos = u32::from_le_bytes(slice[offset..offset + 4].try_into().unwrap()); + pos == 0 + } + + crate fn get_hash(&self) -> Svh { + let slice = &self.blob()[..]; + let offset = METADATA_HEADER.len() + 4; + Svh::new(u64::from_le_bytes(slice[offset..offset + 8].try_into().unwrap())) + } + crate fn get_root(&self) -> CrateRoot<'tcx> { let slice = &self.blob()[..]; - let offset = METADATA_HEADER.len(); + let offset = METADATA_HEADER.len() + 8; let pos = (u32::from_le_bytes(slice[offset..offset + 4].try_into().unwrap())) as usize; + assert_ne!(pos, 0, "Tried to get crate root for reference-only metadata"); Lazy::>::from_position(NonZeroUsize::new(pos).unwrap()).decode(self) } crate fn list_crate_metadata(&self, out: &mut dyn io::Write) -> io::Result<()> { + if self.is_reference_only() { + writeln!(out, "Split metadata crate hash {}", self.get_hash())?; + return Ok(()); + } + let root = self.get_root(); writeln!(out, "Crate info:")?; writeln!(out, "name {}{}", root.name, root.extra_filename)?; diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index fc7ee9fb521c..99167bd4d316 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2136,18 +2136,24 @@ fn prefetch_mir(tcx: TyCtxt<'_>) { #[derive(Encodable, Decodable)] pub struct EncodedMetadata { - raw_data: Vec, + full: Vec, + reference: Option>, } impl EncodedMetadata { #[inline] pub fn new() -> EncodedMetadata { - EncodedMetadata { raw_data: Vec::new() } + EncodedMetadata { full: Vec::new(), reference: None } } #[inline] - pub fn raw_data(&self) -> &[u8] { - &self.raw_data + pub fn full(&self) -> &[u8] { + &self.full + } + + #[inline] + pub fn maybe_reference(&self) -> &[u8] { + self.reference.as_ref().unwrap_or(&self.full) } } @@ -2173,20 +2179,26 @@ pub fn encode_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata { .0 } -fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { +fn encode_metadata_header<'a, 'tcx>( + tcx: TyCtxt<'tcx>, + hygiene_ctxt: &'a HygieneEncodeContext, +) -> EncodeContext<'a, 'tcx> { let mut encoder = opaque::Encoder::new(vec![]); encoder.emit_raw_bytes(METADATA_HEADER).unwrap(); + encoder.emit_raw_bytes(&tcx.crate_hash(LOCAL_CRATE).as_u64().to_le_bytes()).unwrap(); + // Will be filled with the root position after encoding everything. encoder.emit_raw_bytes(&[0, 0, 0, 0]).unwrap(); + // Reserved for future extension + encoder.emit_raw_bytes(&[0, 0, 0, 0]).unwrap(); + let source_map_files = tcx.sess.source_map().files(); let source_file_cache = (source_map_files[0].clone(), 0); let required_source_files = Some(GrowableBitSet::with_capacity(source_map_files.len())); drop(source_map_files); - let hygiene_ctxt = HygieneEncodeContext::default(); - let mut ecx = EncodeContext { opaque: encoder, tcx, @@ -2199,12 +2211,19 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { interpret_allocs: Default::default(), required_source_files, is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro), - hygiene_ctxt: &hygiene_ctxt, + hygiene_ctxt, }; // Encode the rustc version string in a predictable location. rustc_version().encode(&mut ecx).unwrap(); + ecx +} + +fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { + let hygiene_ctxt = HygieneEncodeContext::default(); + let mut ecx = encode_metadata_header(tcx, &hygiene_ctxt); + // Encode all the entries and extra information in the crate, // culminating in the `CrateRoot` which points to all of it. let root = ecx.encode_crate_root(); @@ -2212,14 +2231,23 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { let mut result = ecx.opaque.into_inner(); // Encode the root position. - let header = METADATA_HEADER.len(); + let header = METADATA_HEADER.len() + 8; let pos = root.position.get(); result[header..header + 4].copy_from_slice(&pos.to_le_bytes()); // Record metadata size for self-profiling tcx.prof.artifact_size("crate_metadata", "crate_metadata", result.len() as u64); - EncodedMetadata { raw_data: result } + let reference_result = if tcx.sess.opts.debugging_opts.split_metadata { + let hygiene_ctxt = HygieneEncodeContext::default(); + let ecx = encode_metadata_header(tcx, &hygiene_ctxt); + // Don't fill in the root position for reference metadata + Some(ecx.opaque.into_inner()) + } else { + None + }; + + EncodedMetadata { full: result, reference: reference_result } } pub fn provide(providers: &mut Providers) { diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 8424a31d59fc..4223c179d19f 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -54,11 +54,30 @@ const METADATA_VERSION: u8 = 6; /// Metadata header which includes `METADATA_VERSION`. /// -/// This header is followed by the position of the `CrateRoot`, -/// which is encoded as a 32-bit big-endian unsigned integer, -/// and further followed by the rustc version string. +/// # Format +/// +/// |field |size | +/// |--------|--------| +/// |magic |8 | +/// |svh |8 | +/// |root |4 | +/// |reserved|4 | +/// |version |variable| pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION]; +/// The previous metadata header. +/// +/// This is only used for reporting the rustc version of the incompatible crate. +/// +/// # Format +/// +/// |field |size | +/// |-------|--------| +/// |magic |8 | +/// |root |4 | +/// |version|variable| +pub const PREV_METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, 5]; + /// Additional metadata for a `Lazy` where `T` may not be `Sized`, /// e.g. for `Lazy<[T]>`, this is the length (count of `T` values). trait LazyMeta { diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index ae1b638c3446..a66bb2e2de91 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1427,6 +1427,8 @@ options! { split_dwarf_inlining: bool = (true, parse_bool, [UNTRACKED], "provide minimal debug info in the object/executable to facilitate online \ symbolication/stack traces in the absence of .dwo/.dwp files when using Split DWARF"), + split_metadata: bool = (false, parse_bool, [UNTRACKED], + "split metadata out of libraries into .rmeta files"), symbol_mangling_version: Option = (None, parse_symbol_mangling_version, [TRACKED], "which mangling version to use for symbol names ('legacy' (default) or 'v0')"), From af2535879ec3f15480f82d4caf76bfacfd0eeafd Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 12 Feb 2022 18:53:53 +0100 Subject: [PATCH 07/10] Rustbuild: enable -Zsplit-metadata for stage != 0 --- src/bootstrap/builder.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/bootstrap/builder.rs b/src/bootstrap/builder.rs index 4f88b5854b69..5b07e9756e5b 100644 --- a/src/bootstrap/builder.rs +++ b/src/bootstrap/builder.rs @@ -1095,6 +1095,11 @@ impl<'a> Builder<'a> { rustflags.arg("-Zunstable-options"); } + if stage != 0 { + // FIXME remove once cargo enables this by default + rustflags.arg("-Zsplit-metadata"); + } + // FIXME: It might be better to use the same value for both `RUSTFLAGS` and `RUSTDOCFLAGS`, // but this breaks CI. At the very least, stage0 `rustdoc` needs `--cfg bootstrap`. See // #71458. From 7969fd33357bd0f20e909c12c7ffe38a7f9b8de7 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 12 Feb 2022 19:31:43 +0100 Subject: [PATCH 08/10] Fix encoder --- compiler/rustc_metadata/src/rmeta/encoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 99167bd4d316..8a3934616dba 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2233,7 +2233,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { // Encode the root position. let header = METADATA_HEADER.len() + 8; let pos = root.position.get(); - result[header..header + 4].copy_from_slice(&pos.to_le_bytes()); + result[header..header + 4].copy_from_slice(&u32::try_from(pos).unwrap().to_le_bytes()); // Record metadata size for self-profiling tcx.prof.artifact_size("crate_metadata", "crate_metadata", result.len() as u64); From 5f0a9f8b7129c129bdc7fde239cb35f7bc608e03 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 13 Feb 2022 11:56:14 +0100 Subject: [PATCH 09/10] Don't try to get crate root for reference only metadata --- compiler/rustc_metadata/src/locator.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index ce1131072d39..22fd5358a98a 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -619,6 +619,7 @@ impl<'a> CrateLocator<'a> { return None; } } + return Some(hash); } let root = metadata.get_root(); From c0d8f3ee3a59463a0903244bb575c1f7b6095d32 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 13 Feb 2022 12:28:09 +0100 Subject: [PATCH 10/10] Ship rmeta files --- src/bootstrap/check.rs | 13 +------------ src/bootstrap/compile.rs | 23 ++++------------------- 2 files changed, 5 insertions(+), 31 deletions(-) diff --git a/src/bootstrap/check.rs b/src/bootstrap/check.rs index 28e7f1fdca7a..21a60f65c78f 100644 --- a/src/bootstrap/check.rs +++ b/src/bootstrap/check.rs @@ -90,14 +90,7 @@ impl Step for Std { "Checking stage{} std artifacts ({} -> {})", builder.top_stage, &compiler.host, target )); - run_cargo( - builder, - cargo, - args(builder), - &libstd_stamp(builder, compiler, target), - vec![], - true, - ); + run_cargo(builder, cargo, args(builder), &libstd_stamp(builder, compiler, target), vec![]); // We skip populating the sysroot in non-zero stage because that'll lead // to rlib/rmeta conflicts if std gets built during this session. @@ -146,7 +139,6 @@ impl Step for Std { args(builder), &libstd_test_stamp(builder, compiler, target), vec![], - true, ); } } @@ -222,7 +214,6 @@ impl Step for Rustc { args(builder), &librustc_stamp(builder, compiler, target), vec![], - true, ); let libdir = builder.sysroot_libdir(compiler, target); @@ -287,7 +278,6 @@ impl Step for CodegenBackend { args(builder), &codegen_backend_stamp(builder, compiler, target, backend), vec![], - true, ); } } @@ -354,7 +344,6 @@ macro_rules! tool_check_step { args(builder), &stamp(builder, compiler, target), vec![], - true, ); /// Cargo's output path in a given stage, compiled by a particular diff --git a/src/bootstrap/compile.rs b/src/bootstrap/compile.rs index e17de0ba49eb..f487ccc6f9a4 100644 --- a/src/bootstrap/compile.rs +++ b/src/bootstrap/compile.rs @@ -112,14 +112,7 @@ impl Step for Std { "Building stage{} std artifacts ({} -> {})", compiler.stage, &compiler.host, target )); - run_cargo( - builder, - cargo, - vec![], - &libstd_stamp(builder, compiler, target), - target_deps, - false, - ); + run_cargo(builder, cargo, vec![], &libstd_stamp(builder, compiler, target), target_deps); builder.ensure(StdLink { compiler: builder.compiler(compiler.stage, builder.config.build), @@ -629,14 +622,7 @@ impl Step for Rustc { "Building stage{} compiler artifacts ({} -> {})", compiler.stage, &compiler.host, target )); - run_cargo( - builder, - cargo, - vec![], - &librustc_stamp(builder, compiler, target), - vec![], - false, - ); + run_cargo(builder, cargo, vec![], &librustc_stamp(builder, compiler, target), vec![]); builder.ensure(RustcLink { compiler: builder.compiler(compiler.stage, builder.config.build), @@ -839,7 +825,7 @@ impl Step for CodegenBackend { "Building stage{} codegen backend {} ({} -> {})", compiler.stage, backend, &compiler.host, target )); - let files = run_cargo(builder, cargo, vec![], &tmp_stamp, vec![], false); + let files = run_cargo(builder, cargo, vec![], &tmp_stamp, vec![]); if builder.config.dry_run { return; } @@ -1232,7 +1218,6 @@ pub fn run_cargo( tail_args: Vec, stamp: &Path, additional_target_deps: Vec<(PathBuf, DependencyType)>, - is_check: bool, ) -> Vec { if builder.config.dry_run { return Vec::new(); @@ -1271,7 +1256,7 @@ pub fn run_cargo( || filename.ends_with(".a") || is_debug_info(&filename) || is_dylib(&filename) - || (is_check && filename.ends_with(".rmeta"))) + || filename.ends_with(".rmeta")) { continue; }