From 1c8fa0dd702e55a32cffb21f82ed447c6f8efe7c Mon Sep 17 00:00:00 2001 From: iequidoo Date: Sun, 30 Jul 2023 02:05:49 -0300 Subject: [PATCH] feat: Remove original file stem from filenames in the blobstorage (#4309) This way filenames in the blobstorage are just random hex numbers. This also allows us to get rid of the `sanitize-filename` dependency. --- Cargo.lock | 1 - Cargo.toml | 1 - node/test/test.js | 1 - python/tests/test_1_online.py | 3 - python/tests/test_2_increation.py | 1 - src/blob.rs | 125 ++++++++---------------------- src/chat.rs | 2 +- src/mimeparser.rs | 2 +- src/param.rs | 2 - src/receive_imf/tests.rs | 2 - 10 files changed, 36 insertions(+), 104 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3807895c06..a18914217f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1164,7 +1164,6 @@ dependencies = [ "reqwest", "rusqlite", "rust-hsluv", - "sanitize-filename", "serde", "serde_json", "sha-1", diff --git a/Cargo.toml b/Cargo.toml index 5625f62bbc..0b1bf93d45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,7 +75,6 @@ regex = "1.8" reqwest = { version = "0.11.18", features = ["json"] } rusqlite = { version = "0.29", features = ["sqlcipher"] } rust-hsluv = "0.1" -sanitize-filename = "0.4" serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } sha-1 = "0.10" diff --git a/node/test/test.js b/node/test/test.js index 2c79125840..571197055f 100644 --- a/node/test/test.js +++ b/node/test/test.js @@ -446,7 +446,6 @@ describe('Offline Tests with unconfigured account', function () { context.setChatProfileImage(chatId, imagePath) const blobPath = context.getChat(chatId).getProfileImage() expect(blobPath.startsWith(blobs)).to.be.true - expect(blobPath.includes('image')).to.be.true expect(blobPath.endsWith('.jpeg')).to.be.true context.setChatProfileImage(chatId, null) diff --git a/python/tests/test_1_online.py b/python/tests/test_1_online.py index 9b13a622ab..317f57e2fc 100644 --- a/python/tests/test_1_online.py +++ b/python/tests/test_1_online.py @@ -182,13 +182,11 @@ def send_and_receive_message(): msg = send_and_receive_message() assert msg.text == "withfile" assert open(msg.file_path).read() == "some data" - msg.file_path.index(basename) assert msg.file_path.endswith(ext) msg2 = send_and_receive_message() assert msg2.text == "withfile" assert open(msg2.file_path).read() == "some data" - msg2.file_path.index(basename) assert msg2.file_path.endswith(ext) assert msg.file_path != msg2.file_path @@ -215,7 +213,6 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp): msg = ac2.get_message_by_id(ev.data2) assert open(msg.file_path).read() == content - msg.file_path.index(basename) assert msg.file_path.endswith(ext) diff --git a/python/tests/test_2_increation.py b/python/tests/test_2_increation.py index b0f4d80998..bf4e76052e 100644 --- a/python/tests/test_2_increation.py +++ b/python/tests/test_2_increation.py @@ -50,7 +50,6 @@ def test_no_increation_copies_to_blobdir(self, tmp_path, acfactory, lp): src = tmp_path / "file.txt" src.write_text("hello there\n") msg = chat.send_file(str(src)) - assert msg.file_path.startswith(os.path.join(ac1.get_blobdir(), "file")) assert msg.file_path.endswith(".txt") def test_forward_increation(self, acfactory, data, lp): diff --git a/src/blob.rs b/src/blob.rs index 1459f7e161..ee73603cd1 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -47,8 +47,8 @@ impl<'a> BlobObject<'a> { data: &[u8], ) -> Result> { let blobdir = context.get_blobdir(); - let (stem, ext) = BlobObject::sanitise_name(suggested_name); - let (name, mut file) = BlobObject::create_new_file(context, blobdir, &stem, &ext).await?; + let ext = BlobObject::get_extension(suggested_name); + let (name, mut file) = BlobObject::create_new_file(context, blobdir, &ext).await?; file.write_all(data).await.context("file write failure")?; // workaround a bug in async-std @@ -68,12 +68,11 @@ impl<'a> BlobObject<'a> { async fn create_new_file( context: &Context, dir: &Path, - stem: &str, ext: &str, ) -> Result<(String, fs::File)> { let mut attempt = 0; loop { - let name = format!("{}-{:016x}{}", stem, rand::random::(), ext); + let name = format!("{:016x}{}", rand::random::(), ext); attempt += 1; let path = dir.join(&name); match fs::OpenOptions::new() @@ -104,9 +103,9 @@ impl<'a> BlobObject<'a> { let mut src_file = fs::File::open(src) .await .with_context(|| format!("failed to open file {}", src.display()))?; - let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy()); + let ext = BlobObject::get_extension(&src.to_string_lossy()); let (name, mut dst_file) = - BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?; + BlobObject::create_new_file(context, context.get_blobdir(), &ext).await?; let name_for_err = name.clone(); if let Err(err) = io::copy(&mut src_file, &mut dst_file).await { // Attempt to remove the failed file, swallow errors resulting from that. @@ -130,10 +129,8 @@ impl<'a> BlobObject<'a> { /// /// If the source file is not a path to into the blob directory /// the file will be copied into the blob directory first. If the - /// source file is already in the blobdir it will not be copied - /// and only be created if it is a valid blobname, that is no - /// subdirectory is used and [BlobObject::sanitise_name] does not - /// modify the filename. + /// source file is already in the blobdir (but not in a subdirectory) + /// it will not be copied and only be created if it is a valid blobname. /// /// Paths into the blob directory may be either defined by an absolute path /// or by the relative prefix `$BLOBDIR`. @@ -150,8 +147,7 @@ impl<'a> BlobObject<'a> { /// Returns a [BlobObject] for an existing blob from a path. /// /// The path must designate a file directly in the blobdir and - /// must use a valid blob name. That is after sanitisation the - /// name must still be the same, that means it must be valid UTF-8 + /// must use a valid blob name. That means it must be valid UTF-8 /// and not have any special characters in it. pub fn from_path(context: &'a Context, path: &Path) -> Result> { let rel_path = path @@ -225,21 +221,8 @@ impl<'a> BlobObject<'a> { } } - /// Create a safe name based on a messy input string. - /// - /// The safe name will be a valid filename on Unix and Windows and - /// not contain any path separators. The input can contain path - /// segments separated by either Unix or Windows path separators, - /// the rightmost non-empty segment will be used as name, - /// sanitised for special characters. - /// - /// The resulting name is returned as a tuple, the first part - /// being the stem or basename and the second being an extension, - /// including the dot. E.g. "foo.txt" is returned as `("foo", - /// ".txt")` while "bar" is returned as `("bar", "")`. - /// - /// The extension part will always be lowercased. - fn sanitise_name(name: &str) -> (String, String) { + /// Get a file extension if any, including the dot, in lower case, otherwise an empty string. + fn get_extension(name: &str) -> String { let mut name = name.to_string(); for part in name.rsplit('/') { if !part.is_empty() { @@ -253,20 +236,12 @@ impl<'a> BlobObject<'a> { break; } } - let opts = sanitize_filename::Options { - truncate: true, - windows: true, - replacement: "", - }; - let clean = sanitize_filename::sanitize_with_options(name, opts); // Let's take the tricky filename // "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example. // Split it into "file" and "with_lots_of_characters_behind_point_and_double_ending.tar.gz": - let mut iter = clean.splitn(2, '.'); - - let stem: String = iter.next().unwrap_or_default().chars().take(64).collect(); - // stem == "file" + let mut iter = name.splitn(2, '.'); + iter.next(); let ext_chars = iter.next().unwrap_or_default().chars(); let ext: String = ext_chars @@ -279,11 +254,10 @@ impl<'a> BlobObject<'a> { // ext == "d_point_and_double_ending.tar.gz" if ext.is_empty() { - (stem, "".to_string()) + ext } else { - (stem, format!(".{ext}").to_lowercase()) - // Return ("file", ".d_point_and_double_ending.tar.gz") - // which is not perfect but acceptable. + format!(".{ext}").to_lowercase() + // Return ".d_point_and_double_ending.tar.gz", which is not perfect but acceptable. } } @@ -638,7 +612,7 @@ mod tests { async fn test_create() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}$").unwrap(); assert!(re.is_match(blob.as_file_name())); let fname = t.get_blobdir().join(blob.as_file_name()); let data = fs::read(fname).await.unwrap(); @@ -657,7 +631,7 @@ mod tests { async fn test_lowercase_ext() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap(); - let re = Regex::new("^\\$BLOBDIR/foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt$").unwrap(); assert!(re.is_match(blob.as_name())); } @@ -665,7 +639,7 @@ mod tests { async fn test_as_file_name() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap(); assert!(re.is_match(blob.as_file_name())); } @@ -673,7 +647,7 @@ mod tests { async fn test_as_rel_path() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap(); assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); } @@ -689,7 +663,7 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_dup() { let t = TestContext::new().await; - let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap(); let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); @@ -710,7 +684,7 @@ mod tests { let blob = BlobObject::create(&t, "foo.tar.gz", b"hello") .await .unwrap(); - let re = Regex::new("^foo-[[:xdigit:]]{16}.tar.gz$").unwrap(); + let re = Regex::new("^[[:xdigit:]]{16}.tar.gz$").unwrap(); assert!(re.is_match(blob.as_file_name())); let foo_path = t.get_blobdir().join(blob.as_file_name()); assert!(foo_path.exists()); @@ -725,7 +699,6 @@ mod tests { } else { let name = fname.to_str().unwrap(); println!("{name}"); - assert!(name.starts_with("foo")); assert!(name.ends_with(".tar.gz")); } } @@ -746,7 +719,7 @@ mod tests { let src = t.dir.path().join("src"); fs::write(&src, b"boo").await.unwrap(); let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap(); - let re = Regex::new("^\\$BLOBDIR/src-[[:xdigit:]]{16}$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap(); assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -768,7 +741,7 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - let re = Regex::new("^\\$BLOBDIR/external-[[:xdigit:]]{16}$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap(); assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -781,20 +754,6 @@ mod tests { assert_eq!(data, b"boo"); } - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn test_create_from_name_long() { - let t = TestContext::new().await; - let src_ext = t.dir.path().join("autocrypt-setup-message-4137848473.html"); - fs::write(&src_ext, b"boo").await.unwrap(); - let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) - .await - .unwrap(); - let re = - Regex::new("^\\$BLOBDIR/autocrypt-setup-message-4137848473-[[:xdigit:]]{16}.html$") - .unwrap(); - assert!(re.is_match(blob.as_name())); - } - #[test] fn test_is_blob_name() { assert!(BlobObject::is_acceptible_blob_name("foo")); @@ -806,42 +765,26 @@ mod tests { } #[test] - fn test_sanitise_name() { - let (stem, ext) = - BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt"); + fn test_get_extension() { + let ext = BlobObject::get_extension("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt"); assert_eq!(ext, ".txt"); - assert!(!stem.is_empty()); - // the extensions are kept together as between stem and extension a number may be added - - // and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz` - let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz"); - assert_eq!(stem, "wot"); + let ext = BlobObject::get_extension("wot.tar.gz"); assert_eq!(ext, ".tar.gz"); - let (stem, ext) = BlobObject::sanitise_name(".foo.bar"); - assert_eq!(stem, ""); + let ext = BlobObject::get_extension(".foo.bar"); + // TODO: Should be ".bar", filenames starting with "." are for hidden files and this "." + // participates in a file stem. assert_eq!(ext, ".foo.bar"); - let (stem, ext) = BlobObject::sanitise_name("foo?.bar"); - assert!(stem.contains("foo")); - assert!(!stem.contains('?')); + let ext = BlobObject::get_extension("foo?.bar"); assert_eq!(ext, ".bar"); - let (stem, ext) = BlobObject::sanitise_name("no-extension"); - assert_eq!(stem, "no-extension"); + let ext = BlobObject::get_extension("no-extension"); assert_eq!(ext, ""); - let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c"); + let ext = BlobObject::get_extension("path/ignored\\this: is* forbidden?.c"); assert_eq!(ext, ".c"); - assert!(!stem.contains("path")); - assert!(!stem.contains("ignored")); - assert!(stem.contains("this")); - assert!(stem.contains("forbidden")); - assert!(!stem.contains('/')); - assert!(!stem.contains('\\')); - assert!(!stem.contains(':')); - assert!(!stem.contains('*')); - assert!(!stem.contains('?')); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -856,7 +799,7 @@ mod tests { let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); let blobdir = t.get_blobdir().to_str().unwrap(); assert!(avatar_blob.starts_with(blobdir)); - let re = Regex::new("avatar-[[:xdigit:]]{16}.jpg$").unwrap(); + let re = Regex::new("[[:xdigit:]]{16}.jpg$").unwrap(); assert!(re.is_match(&avatar_blob)); let avatar_blob = Path::new(&avatar_blob); assert!(avatar_blob.exists()); @@ -925,7 +868,7 @@ mod tests { let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); let blobdir = t.get_blobdir().to_str().unwrap(); assert!(avatar_blob.starts_with(blobdir)); - let re = Regex::new("avatar-[[:xdigit:]]{16}.png$").unwrap(); + let re = Regex::new("[[:xdigit:]]{16}.png$").unwrap(); assert!(re.is_match(&avatar_blob)); assert!(Path::new(&avatar_blob).exists()); assert_eq!( diff --git a/src/chat.rs b/src/chat.rs index 566e4147de..28e72b37ea 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -6248,7 +6248,7 @@ mod tests { msg.param.get(Param::Filename).unwrap(), "harmless_file.txt.exe" ); - let re = Regex::new("^\\$BLOBDIR/harmless_file-[[:xdigit:]]{16}.txt.exe$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt.exe$").unwrap(); assert!(re.is_match(msg.param.get(Param::File).unwrap())); Ok(()) } diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 38759e457a..0c41f97e16 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -3712,7 +3712,7 @@ Message. mime_message.parts[0].msg, "this is a classic email – I attached the .EML file".to_string() ); - let re = Regex::new("^\\$BLOBDIR/-[[:xdigit:]]{16}.eml$").unwrap(); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.eml$").unwrap(); assert!(re.is_match(mime_message.parts[0].param.get(Param::File).unwrap())); assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string())); diff --git a/src/param.rs b/src/param.rs index 8768f996d2..5f57bcb00a 100644 --- a/src/param.rs +++ b/src/param.rs @@ -530,8 +530,6 @@ mod tests { assert!(p.get_blob(Param::File, &t, false).await.is_err()); fs::write(fname, b"boo").await.unwrap(); - let blob = p.get_blob(Param::File, &t, true).await.unwrap().unwrap(); - assert!(blob.as_file_name().starts_with("foo")); // Blob in blobdir, expect blob. let bar_path = t.get_blobdir().join("bar"); diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index d95f1659a2..85d4de1fdc 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -1457,7 +1457,6 @@ async fn test_pdf_filename_simple() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/simple")); assert!(file_path.ends_with(".pdf")); } @@ -1473,7 +1472,6 @@ async fn test_pdf_filename_continuation() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/test pdf äöüß")); assert!(file_path.ends_with(".pdf")); }