From 5903a04e9a3d67967366d192b9f6893520351343 Mon Sep 17 00:00:00 2001 From: amir Date: Wed, 4 Dec 2024 01:29:19 -0500 Subject: [PATCH 1/5] Add sanitization and validation for files --- Cargo.lock | 1 + Cargo.toml | 1 + README.md | 16 ++++---- src/file.rs | 113 ++++++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 115 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 867f0d9..827b83b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1189,6 +1189,7 @@ dependencies = [ "anyhow", "base32", "blake3", + "mime", "pubky", "pubky-common", "serde", diff --git a/Cargo.toml b/Cargo.toml index 5d1b585..be25786 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ url = "2.5.4" base32 = "0.5.1" blake3 = "1.5.4" utoipa = { version = "5.2.0", optional = true } +mime = "0.3" [dev-dependencies] pubky = "0.3.0" diff --git a/README.md b/README.md index 48dc363..970e391 100644 --- a/README.md +++ b/README.md @@ -95,17 +95,17 @@ Pubky.app models are designed for decentralized content sharing. The system uses ### PubkyAppFile -**Description:** Represents metadata of file uploaded by the user. +**Description:** Represents a file uploaded by the user, containing its metadata, including a reference to the actual blob of the file in `src` property. **URI:** `/pub/pubky.app/files/:file_id` -| **Field** | **Type** | **Description** | **Validation Rules** | -| -------------- | -------- | --------------------------- | --------------------------- | -| `name` | String | Name of the file. | Required. | -| `created_at` | Integer | Unix timestamp of creation. | Required. | -| `src` | String | File blob URL | Required. | -| `content_type` | String | MIME type of the file. | Required. | -| `size` | Integer | Size of the file in bytes. | Required. Positive integer. | +| **Field** | **Type** | **Description** | **Validation Rules** | +| -------------- | -------- | --------------------------- | ---------------------------------------------- | +| `name` | String | Name of the file. | Required. Must be 1-255 characters | +| `created_at` | Integer | Unix timestamp of creation. | Required. | +| `src` | String | File blob URL | Required. must be a valid URL. Max length 1024 | +| `content_type` | String | MIME type of the file. | Required. Valid IANA mime types | +| `size` | Integer | Size of the file in bytes. | Required. Positive integer. Max size is 10Mb | **Validation Notes:** diff --git a/src/file.rs b/src/file.rs index 7e54577..9e1c00d 100644 --- a/src/file.rs +++ b/src/file.rs @@ -1,13 +1,22 @@ +use std::str::FromStr; + use crate::{ common::timestamp, traits::{HasPath, TimestampId, Validatable}, APP_PATH, }; +use mime::Mime; use serde::{Deserialize, Serialize}; +use url::Url; #[cfg(feature = "openapi")] use utoipa::ToSchema; +const MIN_NAME_LENGTH: usize = 1; +const MAX_NAME_LENGTH: usize = 255; +const MAX_SRC_LENGTH: usize = 1024; +const MAX_SIZE: i64 = 10_000_000; // 10 MB + /// Represents a file uploaded by the user. /// URI: /pub/pubky.app/files/:file_id #[derive(Deserialize, Serialize, Debug, Default, Clone)] @@ -31,6 +40,7 @@ impl PubkyAppFile { content_type, size, } + .sanitize() } } @@ -43,11 +53,59 @@ impl HasPath for PubkyAppFile { } impl Validatable for PubkyAppFile { - // TODO: content_type validation. + fn sanitize(self) -> Self { + let name = self.name.trim().chars().take(MAX_NAME_LENGTH).collect(); + + let sanitized_src = self + .src + .trim() + .chars() + .take(MAX_SRC_LENGTH) + .collect::(); + + let src = match Url::parse(&sanitized_src) { + Ok(_) => Some(sanitized_src), + Err(_) => None, // Invalid src URL, set to None + }; + + let content_type = self.content_type.trim().to_string(); + + Self { + name, + created_at: self.created_at, + src: src.unwrap_or("".to_string()), + content_type, + size: self.size, + } + } + fn validate(&self, id: &str) -> Result<(), String> { self.validate_id(id)?; - // TODO: content_type validation. - // TODO: size and other validation. + + // Validate name + let name_length = self.name.chars().count(); + + if !(MIN_NAME_LENGTH..=MAX_NAME_LENGTH).contains(&name_length) { + return Err("Validation Error: Invalid name length".into()); + } + + // Validate src + if self.src.chars().count() == 0 { + return Err("Validation Error: Invalid src".into()); + } + if self.src.chars().count() > MAX_SRC_LENGTH { + return Err("Validation Error: src exceeds maximum length".into()); + } + + // Validate content type + if Mime::from_str(&self.content_type).is_err() { + return Err("Validation Error: Invalid content type".into()); + } + + // Validate size + if self.size <= 0 || self.size > MAX_SIZE { + return Err("Validation Error: Invalid size".into()); + } Ok(()) } } @@ -97,7 +155,7 @@ mod tests { fn test_validate_valid() { let file = PubkyAppFile::new( "example.png".to_string(), - "/uploads/example.png".to_string(), + "pubky://user_id/pub/pubky.app/blobs/id".to_string(), "image/png".to_string(), 1024, ); @@ -110,7 +168,7 @@ mod tests { fn test_validate_invalid_id() { let file = PubkyAppFile::new( "example.png".to_string(), - "/uploads/example.png".to_string(), + "pubky://user_id/pub/pubky.app/blobs/id".to_string(), "image/png".to_string(), 1024, ); @@ -119,13 +177,52 @@ mod tests { assert!(result.is_err()); } + #[test] + fn test_validate_invalid_content_type() { + let file = PubkyAppFile::new( + "example.png".to_string(), + "pubky://user_id/pub/pubky.app/blobs/id".to_string(), + "notavalid/content_type".to_string(), + 1024, + ); + let id = file.create_id(); + let result = file.validate(&id); + assert!(result.is_err()); + } + + #[test] + fn test_validate_invalid_size() { + let file = PubkyAppFile::new( + "example.png".to_string(), + "pubky://user_id/pub/pubky.app/blobs/id".to_string(), + "notavalid/content_type".to_string(), + MAX_SIZE + 1, + ); + let id = file.create_id(); + let result = file.validate(&id); + assert!(result.is_err()); + } + + #[test] + fn test_validate_invalid_src() { + let file = PubkyAppFile::new( + "example.png".to_string(), + "not_a_url".to_string(), + "notavalid/content_type".to_string(), + MAX_SIZE + 1, + ); + let id = file.create_id(); + let result = file.validate(&id); + assert!(result.is_err()); + } + #[test] fn test_try_from_valid() { let file_json = r#" { "name": "example.png", "created_at": 1627849723, - "src": "/uploads/example.png", + "src": "pubky://user_id/pub/pubky.app/blobs/id", "content_type": "image/png", "size": 1024 } @@ -133,7 +230,7 @@ mod tests { let file = PubkyAppFile::new( "example.png".to_string(), - "/uploads/example.png".to_string(), + "pubky://user_id/pub/pubky.app/blobs/id".to_string(), "image/png".to_string(), 1024, ); @@ -143,7 +240,7 @@ mod tests { let file_parsed = ::try_from(&blob, &id).unwrap(); assert_eq!(file_parsed.name, "example.png"); - assert_eq!(file_parsed.src, "/uploads/example.png"); + assert_eq!(file_parsed.src, "pubky://user_id/pub/pubky.app/blobs/id"); assert_eq!(file_parsed.content_type, "image/png"); assert_eq!(file_parsed.size, 1024); } From dac2b730b9254238c09f770969e3ed5508a8f5e7 Mon Sep 17 00:00:00 2001 From: amir Date: Wed, 4 Dec 2024 16:29:37 -0500 Subject: [PATCH 2/5] fix mime type not actually validating --- src/file.rs | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/file.rs b/src/file.rs index 9e1c00d..ac7e418 100644 --- a/src/file.rs +++ b/src/file.rs @@ -17,6 +17,30 @@ const MAX_NAME_LENGTH: usize = 255; const MAX_SRC_LENGTH: usize = 1024; const MAX_SIZE: i64 = 10_000_000; // 10 MB +const VALID_MIME_TYPES: &[&str] = &[ + "application/javascript", + "application/json", + "application/octet-stream", + "application/pdf", + "application/x-www-form-urlencoded", + "application/xml", + "application/zip", + "audio/mpeg", + "audio/wav", + "image/gif", + "image/jpeg", + "image/png", + "image/svg+xml", + "image/webp", + "multipart/form-data", + "text/css", + "text/html", + "text/plain", + "text/xml", + "video/mp4", + "video/mpeg", +]; + /// Represents a file uploaded by the user. /// URI: /pub/pubky.app/files/:file_id #[derive(Deserialize, Serialize, Debug, Default, Clone)] @@ -97,9 +121,16 @@ impl Validatable for PubkyAppFile { return Err("Validation Error: src exceeds maximum length".into()); } - // Validate content type - if Mime::from_str(&self.content_type).is_err() { - return Err("Validation Error: Invalid content type".into()); + // validate content type + match Mime::from_str(&self.content_type) { + Ok(mime) => { + if !VALID_MIME_TYPES.contains(&mime.essence_str()) { + return Err("Validation Error: Invalid content type".into()); + } + } + Err(_) => { + return Err("Validation Error: Invalid content type".into()); + } } // Validate size From dc840759d27a92370cbd8dcce78547107e93d796 Mon Sep 17 00:00:00 2001 From: amir Date: Wed, 18 Dec 2024 03:21:50 -0500 Subject: [PATCH 3/5] fix file max size --- src/file.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/file.rs b/src/file.rs index ac7e418..1ebd197 100644 --- a/src/file.rs +++ b/src/file.rs @@ -15,7 +15,7 @@ use utoipa::ToSchema; const MIN_NAME_LENGTH: usize = 1; const MAX_NAME_LENGTH: usize = 255; const MAX_SRC_LENGTH: usize = 1024; -const MAX_SIZE: i64 = 10_000_000; // 10 MB +const MAX_SIZE: i64 = 10 * (1 << 20); // 10 MB const VALID_MIME_TYPES: &[&str] = &[ "application/javascript", From 5e3e2d27971d2c2a647e7ba7e73c599c83c60e1b Mon Sep 17 00:00:00 2001 From: amir Date: Fri, 20 Dec 2024 03:54:51 -0500 Subject: [PATCH 4/5] Add PubkyAppBlob --- src/file_blob.rs | 36 ++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 37 insertions(+) create mode 100644 src/file_blob.rs diff --git a/src/file_blob.rs b/src/file_blob.rs new file mode 100644 index 0000000..27af7fe --- /dev/null +++ b/src/file_blob.rs @@ -0,0 +1,36 @@ +use crate::traits::HashId; + +const SAMPLE_SIZE: usize = 2 * 1024; + +pub struct PubkyAppBlob(pub Vec); +impl HashId for PubkyAppBlob { + fn get_id_data(&self) -> String { + // Get the start and end samples + let start = &self.0[..SAMPLE_SIZE.min(self.0.len())]; + let end = if self.0.len() > SAMPLE_SIZE { + &self.0[self.0.len() - SAMPLE_SIZE..] + } else { + &[] + }; + + // Combine the samples + let mut combined = Vec::with_capacity(start.len() + end.len()); + combined.extend_from_slice(start); + combined.extend_from_slice(end); + + base32::encode(base32::Alphabet::Crockford, &combined) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::traits::HashId; + + #[test] + fn test_get_id_data_size_is_smaller_than_sample() { + let blob = PubkyAppBlob(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let id = blob.get_id_data(); + assert_eq!(id, "041061050R3GG28A"); + } +} diff --git a/src/lib.rs b/src/lib.rs index a3f32c3..f800724 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ mod bookmark; mod common; mod feed; mod file; +mod file_blob; mod follow; mod last_read; mod mute; From f48540175406a101e02ca022ab430977ae19f1c4 Mon Sep 17 00:00:00 2001 From: amir Date: Fri, 20 Dec 2024 03:58:37 -0500 Subject: [PATCH 5/5] Add file blob type --- src/file_blob.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/file_blob.rs b/src/file_blob.rs index 27af7fe..bd8af18 100644 --- a/src/file_blob.rs +++ b/src/file_blob.rs @@ -1,8 +1,21 @@ -use crate::traits::HashId; +use crate::{ + traits::{HasPath, HashId}, + APP_PATH, +}; + +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "openapi")] +use utoipa::ToSchema; const SAMPLE_SIZE: usize = 2 * 1024; +/// Represents a file uploaded by the user. +/// URI: /pub/pubky.app/files/:file_id +#[derive(Deserialize, Serialize, Debug, Default, Clone)] +#[cfg_attr(feature = "openapi", derive(ToSchema))] pub struct PubkyAppBlob(pub Vec); + impl HashId for PubkyAppBlob { fn get_id_data(&self) -> String { // Get the start and end samples @@ -22,6 +35,12 @@ impl HashId for PubkyAppBlob { } } +impl HasPath for PubkyAppBlob { + fn create_path(&self) -> String { + format!("{}blobs/{}", APP_PATH, self.create_id()) + } +} + #[cfg(test)] mod tests { use super::*;