pubky · amirRamirfatahi · Dec 4, 2024 · Dec 4, 2024 · Dec 18, 2024 · Dec 20, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -15,6 +15,7 @@ url = "2.5.4"
 base32 = "0.5.1"
 blake3 = "1.5.4"
 utoipa = { version = "5.2.0", optional = true }
+mime = "0.3"
 
 [dev-dependencies]
 pubky = "0.3.0"

diff --git a/README.md b/README.md
@@ -95,17 +95,17 @@ Pubky.app models are designed for decentralized content sharing. The system uses
 
 ### PubkyAppFile
 
-**Description:** Represents metadata of file uploaded by the user.
+**Description:** Represents a file uploaded by the user, containing its metadata, including a reference to the actual blob of the file in `src` property.
 
 **URI:** `/pub/pubky.app/files/:file_id`
 
-| **Field**      | **Type** | **Description**             | **Validation Rules**        |
-| -------------- | -------- | --------------------------- | --------------------------- |
-| `name`         | String   | Name of the file.           | Required.                   |
-| `created_at`   | Integer  | Unix timestamp of creation. | Required.                   |
-| `src`          | String   | File blob URL               | Required.                   |
-| `content_type` | String   | MIME type of the file.      | Required.                   |
-| `size`         | Integer  | Size of the file in bytes.  | Required. Positive integer. |
+| **Field**      | **Type** | **Description**             | **Validation Rules**                           |
+| -------------- | -------- | --------------------------- | ---------------------------------------------- |
+| `name`         | String   | Name of the file.           | Required. Must be 1-255 characters             |
+| `created_at`   | Integer  | Unix timestamp of creation. | Required.                                      |
+| `src`          | String   | File blob URL               | Required. must be a valid URL. Max length 1024 |
+| `content_type` | String   | MIME type of the file.      | Required. Valid IANA mime types                |
+| `size`         | Integer  | Size of the file in bytes.  | Required. Positive integer. Max size is 10Mb   |
 
 **Validation Notes:**
 

diff --git a/src/file.rs b/src/file.rs
@@ -1,13 +1,46 @@
+use std::str::FromStr;
+
 use crate::{
     common::timestamp,
     traits::{HasPath, TimestampId, Validatable},
     APP_PATH,
 };
+use mime::Mime;
 use serde::{Deserialize, Serialize};
 
+use url::Url;
 #[cfg(feature = "openapi")]
 use utoipa::ToSchema;
 
+const MIN_NAME_LENGTH: usize = 1;
+const MAX_NAME_LENGTH: usize = 255;
+const MAX_SRC_LENGTH: usize = 1024;
+const MAX_SIZE: i64 = 10 * (1 << 20); // 10 MB
+
+const VALID_MIME_TYPES: &[&str] = &[
+    "application/javascript",
+    "application/json",
+    "application/octet-stream",
+    "application/pdf",
+    "application/x-www-form-urlencoded",
+    "application/xml",
+    "application/zip",
+    "audio/mpeg",
+    "audio/wav",
+    "image/gif",
+    "image/jpeg",
+    "image/png",
+    "image/svg+xml",
+    "image/webp",
+    "multipart/form-data",
+    "text/css",
+    "text/html",
+    "text/plain",
+    "text/xml",
+    "video/mp4",
+    "video/mpeg",
+];
+
 /// Represents a file uploaded by the user.
 /// URI: /pub/pubky.app/files/:file_id
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
@@ -31,6 +64,7 @@ impl PubkyAppFile {
             content_type,
             size,
         }
+        .sanitize()
     }
 }
 
@@ -43,11 +77,66 @@ impl HasPath for PubkyAppFile {
 }
 
 impl Validatable for PubkyAppFile {
-    // TODO: content_type validation.
+    fn sanitize(self) -> Self {
+        let name = self.name.trim().chars().take(MAX_NAME_LENGTH).collect();
+
+        let sanitized_src = self
+            .src
+            .trim()
+            .chars()
+            .take(MAX_SRC_LENGTH)
+            .collect::<String>();
+
+        let src = match Url::parse(&sanitized_src) {
+            Ok(_) => Some(sanitized_src),
+            Err(_) => None, // Invalid src URL, set to None
+        };
+
+        let content_type = self.content_type.trim().to_string();
+
+        Self {
+            name,
+            created_at: self.created_at,
+            src: src.unwrap_or("".to_string()),
+            content_type,
+            size: self.size,
+        }
+    }
+
     fn validate(&self, id: &str) -> Result<(), String> {
         self.validate_id(id)?;
-        // TODO: content_type validation.
-        // TODO: size and other validation.
+
+        // Validate name
+        let name_length = self.name.chars().count();
+
+        if !(MIN_NAME_LENGTH..=MAX_NAME_LENGTH).contains(&name_length) {
+            return Err("Validation Error: Invalid name length".into());
+        }
+
+        // Validate src
+        if self.src.chars().count() == 0 {
+            return Err("Validation Error: Invalid src".into());
+        }
+        if self.src.chars().count() > MAX_SRC_LENGTH {
+            return Err("Validation Error: src exceeds maximum length".into());
+        }
+
+        // validate content type
+        match Mime::from_str(&self.content_type) {
+            Ok(mime) => {
+                if !VALID_MIME_TYPES.contains(&mime.essence_str()) {
+                    return Err("Validation Error: Invalid content type".into());
+                }
+            }
+            Err(_) => {
+                return Err("Validation Error: Invalid content type".into());
+            }
+        }
+
+        // Validate size
+        if self.size <= 0 || self.size > MAX_SIZE {
+            return Err("Validation Error: Invalid size".into());
+        }
         Ok(())
     }
 }
@@ -97,7 +186,7 @@ mod tests {
     fn test_validate_valid() {
         let file = PubkyAppFile::new(
             "example.png".to_string(),
-            "/uploads/example.png".to_string(),
+            "pubky://user_id/pub/pubky.app/blobs/id".to_string(),
             "image/png".to_string(),
             1024,
         );
@@ -110,7 +199,7 @@ mod tests {
     fn test_validate_invalid_id() {
         let file = PubkyAppFile::new(
             "example.png".to_string(),
-            "/uploads/example.png".to_string(),
+            "pubky://user_id/pub/pubky.app/blobs/id".to_string(),
             "image/png".to_string(),
             1024,
         );
@@ -119,21 +208,60 @@ mod tests {
         assert!(result.is_err());
     }
 
+    #[test]
+    fn test_validate_invalid_content_type() {
+        let file = PubkyAppFile::new(
+            "example.png".to_string(),
+            "pubky://user_id/pub/pubky.app/blobs/id".to_string(),
+            "notavalid/content_type".to_string(),
+            1024,
+        );
+        let id = file.create_id();
+        let result = file.validate(&id);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_validate_invalid_size() {
+        let file = PubkyAppFile::new(
+            "example.png".to_string(),
+            "pubky://user_id/pub/pubky.app/blobs/id".to_string(),
+            "notavalid/content_type".to_string(),
+            MAX_SIZE + 1,
+        );
+        let id = file.create_id();
+        let result = file.validate(&id);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_validate_invalid_src() {
+        let file = PubkyAppFile::new(
+            "example.png".to_string(),
+            "not_a_url".to_string(),
+            "notavalid/content_type".to_string(),
+            MAX_SIZE + 1,
+        );
+        let id = file.create_id();
+        let result = file.validate(&id);
+        assert!(result.is_err());
+    }
+
     #[test]
     fn test_try_from_valid() {
         let file_json = r#"
         {
             "name": "example.png",
             "created_at": 1627849723,
-            "src": "/uploads/example.png",
+            "src": "pubky://user_id/pub/pubky.app/blobs/id",
             "content_type": "image/png",
             "size": 1024
         }
         "#;
 
         let file = PubkyAppFile::new(
             "example.png".to_string(),
-            "/uploads/example.png".to_string(),
+            "pubky://user_id/pub/pubky.app/blobs/id".to_string(),
             "image/png".to_string(),
             1024,
         );
@@ -143,7 +271,7 @@ mod tests {
         let file_parsed = <PubkyAppFile as Validatable>::try_from(&blob, &id).unwrap();
 
         assert_eq!(file_parsed.name, "example.png");
-        assert_eq!(file_parsed.src, "/uploads/example.png");
+        assert_eq!(file_parsed.src, "pubky://user_id/pub/pubky.app/blobs/id");
         assert_eq!(file_parsed.content_type, "image/png");
         assert_eq!(file_parsed.size, 1024);
     }

diff --git a/src/file_blob.rs b/src/file_blob.rs
@@ -0,0 +1,55 @@
+use crate::{
+    traits::{HasPath, HashId},
+    APP_PATH,
+};
+
+use serde::{Deserialize, Serialize};
+
+#[cfg(feature = "openapi")]
+use utoipa::ToSchema;
+
+const SAMPLE_SIZE: usize = 2 * 1024;
+
+/// Represents a file uploaded by the user.
+/// URI: /pub/pubky.app/files/:file_id
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
+#[cfg_attr(feature = "openapi", derive(ToSchema))]
+pub struct PubkyAppBlob(pub Vec<u8>);
+
+impl HashId for PubkyAppBlob {
+    fn get_id_data(&self) -> String {
+        // Get the start and end samples
+        let start = &self.0[..SAMPLE_SIZE.min(self.0.len())];
+        let end = if self.0.len() > SAMPLE_SIZE {
+            &self.0[self.0.len() - SAMPLE_SIZE..]
+        } else {
+            &[]
+        };
+
+        // Combine the samples
+        let mut combined = Vec::with_capacity(start.len() + end.len());
+        combined.extend_from_slice(start);
+        combined.extend_from_slice(end);
+
+        base32::encode(base32::Alphabet::Crockford, &combined)
+    }
+}
+
+impl HasPath for PubkyAppBlob {
+    fn create_path(&self) -> String {
+        format!("{}blobs/{}", APP_PATH, self.create_id())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::traits::HashId;
+
+    #[test]
+    fn test_get_id_data_size_is_smaller_than_sample() {
+        let blob = PubkyAppBlob(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+        let id = blob.get_id_data();
+        assert_eq!(id, "041061050R3GG28A");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -2,6 +2,7 @@ mod bookmark;
 mod common;
 mod feed;
 mod file;
+mod file_blob;
 mod follow;
 mod last_read;
 mod mute;