Merge pull request #24 from kscalelabs/second_milestone_serhii

feature_ai_translate
kscalelabs · Sep 12, 2024 · ebe8820 · ebe8820
2 parents 8550111 + 36296e9
commit ebe8820
Show file tree

Hide file tree

Showing 11 changed files with 281 additions and 84 deletions.
diff --git a/frontend/src/api/api.ts b/frontend/src/api/api.ts
@@ -59,4 +59,12 @@ export class Api {
     );
     return response.data;
   }
+  public async translateImages(images: Array<string>): Promise<Array<Image>> {
+    const response = await this.api.post(
+      "/translate",
+      { images },
+      { timeout: 300000 },
+    );
+    return response.data;
+  }
 }
diff --git a/frontend/src/components/image.tsx b/frontend/src/components/image.tsx
@@ -1,13 +1,16 @@
 import React from "react";
 import { CheckCircleFill, LockFill, PencilFill } from "react-bootstrap-icons";
 import { Image } from "types/model";
-
-const ImageComponent: React.FC<Image> = ({
-  // id,
+// Extend the existing Image interface to include the new function
+interface ImageWithFunction extends Image {
+  handleTranslateOneImage: (image_id: string) => void;
+}
+const ImageComponent: React.FC<ImageWithFunction> = ({
+  id,
   is_translated,
   image_url,
-  // audio_url,
-  transcript,
+  transcriptions,
+  handleTranslateOneImage,
 }) => {
   return (
     <div
@@ -22,7 +25,9 @@ const ImageComponent: React.FC<Image> = ({
               <span>The image has been translated</span>
             </div>
             <div className="absolute bottom-2 text-white bg-gray-800 py-1 px-3 mx-2 rounded">
-              <span>{transcript}</span>
+              {transcriptions.map((transcription, index) => (
+                <span key={index}>{transcription.text}&nbsp;&nbsp;</span>
+              ))}
             </div>
           </>
         ) : (
@@ -41,7 +46,10 @@ const ImageComponent: React.FC<Image> = ({
               Edit
             </button>
           ) : (
-            <button className="bg-blue-500 text-white py-1 px-3 rounded">
+            <button
+              className="bg-blue-500 text-white py-1 px-3 rounded"
+              onClick={() => handleTranslateOneImage(id)}
+            >
               Translate
             </button>
           )}

diff --git a/frontend/src/pages/Collection.tsx b/frontend/src/pages/Collection.tsx
@@ -5,9 +5,15 @@ import Modal from "components/modal";
 import UploadContent from "components/UploadContent";
 import { useAuth } from "contexts/AuthContext";
 import { useLoading } from "contexts/LoadingContext";
-import React, { useEffect, useState } from "react";
+import React, { useEffect, useMemo, useState } from "react";
 import { Col, Row } from "react-bootstrap";
-import { ArrowLeft } from "react-bootstrap-icons";
+import {
+  ArrowLeft,
+  CaretLeft,
+  CaretRight,
+  SkipBackward,
+  SkipForward,
+} from "react-bootstrap-icons";
 import { useLocation, useNavigate, useParams } from "react-router-dom";
 import { Collection, Image } from "types/model";
 
@@ -18,36 +24,52 @@ const CollectionPage: React.FC = () => {
   const [title, setTitle] = useState("");
   const [description, setDescription] = useState("");
   const [currentImageIndex, setCurrentImageIndex] = useState(0);
+  const [currentTranscriptionIndex, setCurrentTranscriptionIndex] = useState(0);
   const [currentImage, setCurrentImage] = useState<Image | null>(null);
   const [collection, setCollection] = useState<Collection | null>(null);
   const { auth, is_auth } = useAuth();
   const { startLoading, stopLoading } = useLoading();
   const [showModal, setShowModal] = useState(false);
   const [images, setImages] = useState<Array<Image> | null>([]);
 
-  const apiClient: AxiosInstance = axios.create({
-    baseURL: process.env.REACT_APP_BACKEND_URL, // Base URL for all requests
-    timeout: 10000, // Request timeout (in milliseconds)
-    headers: {
-      "Content-Type": "application/json",
-      Authorization: `Bearer ${auth?.token}`, // Add any default headers you need
-    },
-  });
-  const apiClient1: AxiosInstance = axios.create({
-    baseURL: process.env.REACT_APP_BACKEND_URL, // Base URL for all requests
-    timeout: 1000000, // Request timeout (in milliseconds)
-    headers: {
-      "Content-Type": "multipart/form-data",
-      Authorization: `Bearer ${auth?.token}`, // Add any default headers you need
-    },
-  });
-  const API = new Api(apiClient);
-  const API_Uploader = new Api(apiClient1);
+  const apiClient: AxiosInstance = useMemo(
+    () =>
+      axios.create({
+        baseURL: process.env.REACT_APP_BACKEND_URL, // Base URL for all requests
+        timeout: 10000, // Request timeout (in milliseconds)
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Bearer ${auth?.token}`, // Add any default headers you need
+        },
+      }),
+    [auth?.token],
+  );
+  const apiClient1: AxiosInstance = useMemo(
+    () =>
+      axios.create({
+        baseURL: process.env.REACT_APP_BACKEND_URL,
+        timeout: 1000000,
+        headers: {
+          "Content-Type": "multipart/form-data",
+          Authorization: `Bearer ${auth?.token}`,
+        },
+      }),
+    [auth?.token],
+  );
+  const API = useMemo(() => new Api(apiClient), [apiClient]);
+  const API_Uploader = useMemo(() => new Api(apiClient1), [apiClient1]);
   // Helper to check if it's an edit action
-  const isEditAction = location.search.includes("Action=edit");
+  const isEditAction = useMemo(
+    () => location.search.includes("Action=edit"),
+    [location.search],
+  );
 
   // Get translated images
-  let translatedImages: Array<Image> = [];
+  const translatedImages = useMemo(() => {
+    // Get translated images
+    if (images) return images.filter((img) => img.is_translated);
+    return [];
+  }, [images]);
 
   // Simulate fetching data for the edit page (mocking API call)
   useEffect(() => {
@@ -62,11 +84,6 @@ const CollectionPage: React.FC = () => {
     }
   }, [id, is_auth]);
 
-  useEffect(() => {
-    // Get translated images
-    if (images) translatedImages = images.filter((img) => img.is_translated);
-  }, [images]);
-
   useEffect(() => {
     if (translatedImages.length > 0) {
       setCurrentImage(translatedImages[currentImageIndex]);
@@ -96,15 +113,31 @@ const CollectionPage: React.FC = () => {
   const handleNext = () => {
     if (currentImageIndex < translatedImages.length - 1) {
       setCurrentImageIndex(currentImageIndex + 1);
+      setCurrentTranscriptionIndex(0);
     }
   };
 
   const handlePrev = () => {
     if (currentImageIndex > 0) {
       setCurrentImageIndex(currentImageIndex - 1);
+      setCurrentTranscriptionIndex(0);
+    }
+  };
+  // Navigate transcriptions
+  const handleTranscriptionNext = () => {
+    if (
+      currentImage?.transcriptions &&
+      currentTranscriptionIndex < currentImage?.transcriptions.length - 1
+    ) {
+      setCurrentTranscriptionIndex(currentTranscriptionIndex + 1);
     }
   };
 
+  const handleTranscriptionPrev = () => {
+    if (currentTranscriptionIndex > 0) {
+      setCurrentTranscriptionIndex(currentTranscriptionIndex - 1);
+    }
+  };
   // Return button handler
   const handleReturn = () => {
     navigate("/collections");
@@ -133,6 +166,16 @@ const CollectionPage: React.FC = () => {
       }
     }
   };
+  const handleTranslateOneImage = async (image_id: string) => {
+    if (images) {
+      startLoading();
+      const image_response = await API.translateImages([image_id]);
+      const i = images?.findIndex((image) => image.id == image_id);
+      images[i] = image_response[0];
+      setImages([...images]);
+      stopLoading();
+    }
+  };
   // Custom Return Button (fixed top-left with border)
   const ReturnButton = () => (
     <button
@@ -243,7 +286,10 @@ const CollectionPage: React.FC = () => {
             images.map((image) => {
               return (
                 <Col lg={4} md={6} sm={12} key={image.id} className="p-0">
-                  <ImageComponent {...image} />
+                  <ImageComponent
+                    {...image}
+                    handleTranslateOneImage={handleTranslateOneImage}
+                  />
                 </Col>
               );
             })
@@ -269,35 +315,80 @@ const CollectionPage: React.FC = () => {
               alt="Collection Image"
               className="max-h-96 h-auto mx-auto mb-4"
             />
-            <p className="mt-2">{currentImage.transcript}</p>
+            <p className="mt-2">
+              {currentImage.transcriptions[currentTranscriptionIndex].text}
+            </p>
+            <p className="mt-2">
+              {currentImage.transcriptions[currentTranscriptionIndex].pinyin}
+            </p>
+            <p className="mt-2">
+              {
+                currentImage.transcriptions[currentTranscriptionIndex]
+                  .translation
+              }
+            </p>
             <audio controls className="mt-4 w-full">
-              <source src={currentImage.audio_url} type="audio/mpeg" />
+              <source
+                src={
+                  currentImage.transcriptions[currentTranscriptionIndex]
+                    .audio_url
+                }
+                type="audio/mpeg"
+              />
               Your browser does not support the audio element.
             </audio>
 
             {/* Navigation Buttons */}
-            <div className="flex justify-between mt-4 w-40">
+            <div className="flex justify-content-center mt-4 w-40 gap-4">
               <button
-                className={`px-3 py-1 rounded ${
+                className={`px-5 py-3 rounded ${
                   currentImageIndex === 0
                     ? "bg-gray-300 text-gray-600 cursor-not-allowed"
                     : "bg-gray-500 text-white hover:bg-gray-600"
                 }`}
                 onClick={handlePrev}
                 disabled={currentImageIndex === 0}
               >
-                Prev
+                <SkipBackward size={22} />
+              </button>
+              <button
+                className={`px-5 py-3 rounded ${
+                  currentTranscriptionIndex === 0
+                    ? "bg-gray-300 text-gray-600 cursor-not-allowed"
+                    : "bg-gray-500 text-white hover:bg-gray-600"
+                }`}
+                onClick={handleTranscriptionPrev}
+                disabled={currentTranscriptionIndex === 0}
+              >
+                <CaretLeft size={22} />
+              </button>
+              <button
+                className={`px-5 py-3 rounded ${
+                  currentTranscriptionIndex ===
+                    currentImage.transcriptions.length - 1 ||
+                  currentImage.transcriptions.length === 0
+                    ? "bg-gray-300 text-gray-600 cursor-not-allowed"
+                    : "bg-gray-500 text-white hover:bg-gray-600"
+                }`}
+                onClick={handleTranscriptionNext}
+                disabled={
+                  currentTranscriptionIndex ===
+                    currentImage.transcriptions.length - 1 ||
+                  currentImage.transcriptions.length === 0
+                }
+              >
+                <CaretRight size={22} />
               </button>
               <button
-                className={`px-3 py-1 rounded ${
+                className={`px-5 py-3 rounded ${
                   currentImageIndex === translatedImages.length - 1
                     ? "bg-gray-300 text-gray-600 cursor-not-allowed"
                     : "bg-gray-500 text-white hover:bg-gray-600"
                 }`}
                 onClick={handleNext}
                 disabled={currentImageIndex === translatedImages.length - 1}
               >
-                Next
+                <SkipForward size={22} />
               </button>
             </div>
           </div>

diff --git a/frontend/src/types/model.ts b/frontend/src/types/model.ts
@@ -5,11 +5,16 @@ export interface Collection {
   images: Array<string>;
 }
 
+interface Transcription {
+  text: string;
+  pinyin: string;
+  translation: string;
+  audio_url: string;
+}
 export interface Image {
   id: string;
   is_translated: boolean;
   collection: string;
   image_url: string;
-  audio_url: string;
-  transcript: string;
+  transcriptions: Array<Transcription>;
 }
diff --git a/linguaphoto/ai/cli.py b/linguaphoto/ai/cli.py
@@ -7,9 +7,8 @@
 
 from openai import AsyncOpenAI
 from PIL import Image
-
-from linguaphoto.ai.transcribe import transcribe_image
-from linguaphoto.ai.tts import synthesize_text
+from transcribe import transcribe_image
+from tts import synthesize_text
 
 logger = logging.getLogger(__name__)
 
@@ -26,8 +25,11 @@ async def main() -> None:
 
     # Transcribes the image.
     image = Image.open(args.image)
-    client = AsyncOpenAI()
+    client = AsyncOpenAI(
+        api_key="sk-svcacct-PFETCFHtqmHOmIpP_IAyQfBGz5LOpvC6Zudj7d5Wcdp9WjJT4ImAxuotGcpyT3BlbkFJRbtswQqIxYHam9TN13mCM04_OTZE-v8z-Rw1WEcwzyZqW_GcK0PNNyFp6BcA"
+    )
     transcription_response = await transcribe_image(image, client)
+    print(transcription_response.model_dump_json(indent=2))
     with open(root_dir / "transcription.json", "w") as file:
         file.write(transcription_response.model_dump_json(indent=2))
     logger.info("Transcription saved to %s", args.output)