Update 4-AI_image_multimodal_location_GPT.R

Removing redundancies on the creation of raw_vector and adding Image Description generated by Azure Vision
fevieira27 · Jan 10, 2024 · ccdbbdc · ccdbbdc
1 parent 4aad418
commit ccdbbdc
Showing 1 changed file with 14 additions and 21 deletions.
diff --git a/4-AI_image_multimodal_location_GPT.R b/4-AI_image_multimodal_location_GPT.R
@@ -140,6 +140,13 @@ if (info$height > info$width) {
 }
 plot(img)
 
+# read the local image file as raw bytes, reducing to 4Mb (if needed) which is Azure's max limit
+if (file.size(image_path)>4150000){
+  raw_vector <- readBin(image_path, "raw", 4150000)
+} else {
+  raw_vector <- readBin(image_path, "raw", file.info(image_path)$size)
+}
+
 # Resizing image based on model requirements
 image_224 <- image_load(image_path, target_size = c(224, 224))
 image_299 <- image_load(image_path, target_size = c(299, 299))
@@ -405,12 +412,8 @@ concat <- rbind(concat,filtered_tags)
 
 ############################### Cloud ML Models (Require API keys)
 # Get tags from Azure Vision
-  # img <- image_read(image_path) # needed?
-  if (file.size(image_path)>4150000){
-    con <- file(image_path, "rb")
-    raw_vector <- readBin(con, what = raw(), n = 4150000)
+  if (file.size(image_path)>4150000){ # needed?
     resultsAzure <- analyze(vis, raw_vector, domain = "landmarks", feature_types = "tags")$tags
-    close(con)
   } else {
     resultsAzure <- analyze(vis, image_path, domain = "landmarks", feature_types = "tags")$tags
   }
@@ -493,13 +496,6 @@ date <- format(exif_data$CreateDate, format = "%d/%B/%Y")
 endp <- cognitive_endpoint(cognitiveservicesURL,
     service_type="ComputerVision", key=azure_api_key)
 
-# read the local image file as raw bytes, reducing to 4Mb (if needed) which is Azure's max limit
-if (file.size(image_path)>4150000){
-  raw_vector <- readBin(image_path, "raw", 4150000)
-} else {
-  raw_vector <- readBin(image_path, "raw", file.info(image_path)$size)
-}
-
 # call the cognitive endpoint to analyze the image for landmarks
 landmarkAzure <- call_cognitive_endpoint(endp, operation = "analyze",
 	body = raw_vector,
@@ -721,16 +717,14 @@ if (lon!="") {
 }
 
 # Extract the text from the image using Azure Computer Vision API (OCR)
-if (file.size(image_path)>4150000){
-  text <- read_text(vis, raw_vector, detect_orientation = TRUE, language = "en")
+if (file.size(image_path)>4150000){ # needed?
+  textOCR <- read_text(vis, raw_vector, detect_orientation = TRUE, language = "en")
 } else {
-  text <- read_text(vis, image_path, detect_orientation = TRUE, language = "en")
+  textOCR <- read_text(vis, image_path, detect_orientation = TRUE, language = "en")
 }
 
-# If no text was found using OCR, try to describe the image using Azure AI:
-if (length(text)==0){
-  text <- analyze(vis, image_path, domain = "landmarks", feature_types = "description")$description$captions$text
-}
+# Describe the image using Azure AI:
+text <- analyze(vis, image_path, domain = "landmarks", feature_types = "description")$description$captions$text
 
 # Define a string for the Bing Chat prompt, that will generate the text for the social media post. Feel free to change this to your liking
 str <- ""
@@ -753,6 +747,5 @@ browseURL(url)
 # browseURL("https://www.bing.com/search?showconv=1&sendquery=1&q=Hello%20Bing")
 
 # Show main results in R Console, which could be used on prompt for Bing Chat
-  cat(" Hashtags:     ", hashtags, "\n", "GPS Coordin.: ", lat, ",", lon, "\n", "Landmark Name:", name, "\n", "Landm. Source:", source, "\n", "Text/Descript:", paste(text, collapse = ", "), "\n", "Full address: ", address, "\n", "City:         ", city, "\n", "Country:      ", country, "\n", "Camera :      ", exif_data$Make, exif_data$Model, "\n", "Date   :      ", date, "\n")
-
+  cat(" Hashtags:     ", hashtags, "\n", "GPS Coordin.: ", lat, ",", lon, "\n", "Landmark Name:", name, "\n", "Landm. Source:", source, "\n", "Text OCR     :", paste(textOCR, collapse = ", "), "\n", "Img. Descript:", paste(text, collapse = ", "), "\n", "Full address: ", address, "\n", "City:         ", city, "\n", "Country:      ", country, "\n", "Camera :      ", exif_data$Make, exif_data$Model, "\n", "Date   :      ", date, "\n")