Skip to content

Commit

Permalink
Merge pull request #24 from kscalelabs/second_milestone_serhii
Browse files Browse the repository at this point in the history
feature_ai_translate
  • Loading branch information
Serhii Ofii authored Sep 12, 2024
2 parents 8550111 + 36296e9 commit ebe8820
Show file tree
Hide file tree
Showing 11 changed files with 281 additions and 84 deletions.
8 changes: 8 additions & 0 deletions frontend/src/api/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,12 @@ export class Api {
);
return response.data;
}
public async translateImages(images: Array<string>): Promise<Array<Image>> {
const response = await this.api.post(
"/translate",
{ images },
{ timeout: 300000 },
);
return response.data;
}
}
22 changes: 15 additions & 7 deletions frontend/src/components/image.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import React from "react";
import { CheckCircleFill, LockFill, PencilFill } from "react-bootstrap-icons";
import { Image } from "types/model";

const ImageComponent: React.FC<Image> = ({
// id,
// Extend the existing Image interface to include the new function
interface ImageWithFunction extends Image {
handleTranslateOneImage: (image_id: string) => void;
}
const ImageComponent: React.FC<ImageWithFunction> = ({
id,
is_translated,
image_url,
// audio_url,
transcript,
transcriptions,
handleTranslateOneImage,
}) => {
return (
<div
Expand All @@ -22,7 +25,9 @@ const ImageComponent: React.FC<Image> = ({
<span>The image has been translated</span>
</div>
<div className="absolute bottom-2 text-white bg-gray-800 py-1 px-3 mx-2 rounded">
<span>{transcript}</span>
{transcriptions.map((transcription, index) => (
<span key={index}>{transcription.text}&nbsp;&nbsp;</span>
))}
</div>
</>
) : (
Expand All @@ -41,7 +46,10 @@ const ImageComponent: React.FC<Image> = ({
Edit
</button>
) : (
<button className="bg-blue-500 text-white py-1 px-3 rounded">
<button
className="bg-blue-500 text-white py-1 px-3 rounded"
onClick={() => handleTranslateOneImage(id)}
>
Translate
</button>
)}
Expand Down
161 changes: 126 additions & 35 deletions frontend/src/pages/Collection.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,15 @@ import Modal from "components/modal";
import UploadContent from "components/UploadContent";
import { useAuth } from "contexts/AuthContext";
import { useLoading } from "contexts/LoadingContext";
import React, { useEffect, useState } from "react";
import React, { useEffect, useMemo, useState } from "react";
import { Col, Row } from "react-bootstrap";
import { ArrowLeft } from "react-bootstrap-icons";
import {
ArrowLeft,
CaretLeft,
CaretRight,
SkipBackward,
SkipForward,
} from "react-bootstrap-icons";
import { useLocation, useNavigate, useParams } from "react-router-dom";
import { Collection, Image } from "types/model";

Expand All @@ -18,36 +24,52 @@ const CollectionPage: React.FC = () => {
const [title, setTitle] = useState("");
const [description, setDescription] = useState("");
const [currentImageIndex, setCurrentImageIndex] = useState(0);
const [currentTranscriptionIndex, setCurrentTranscriptionIndex] = useState(0);
const [currentImage, setCurrentImage] = useState<Image | null>(null);
const [collection, setCollection] = useState<Collection | null>(null);
const { auth, is_auth } = useAuth();
const { startLoading, stopLoading } = useLoading();
const [showModal, setShowModal] = useState(false);
const [images, setImages] = useState<Array<Image> | null>([]);

const apiClient: AxiosInstance = axios.create({
baseURL: process.env.REACT_APP_BACKEND_URL, // Base URL for all requests
timeout: 10000, // Request timeout (in milliseconds)
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${auth?.token}`, // Add any default headers you need
},
});
const apiClient1: AxiosInstance = axios.create({
baseURL: process.env.REACT_APP_BACKEND_URL, // Base URL for all requests
timeout: 1000000, // Request timeout (in milliseconds)
headers: {
"Content-Type": "multipart/form-data",
Authorization: `Bearer ${auth?.token}`, // Add any default headers you need
},
});
const API = new Api(apiClient);
const API_Uploader = new Api(apiClient1);
const apiClient: AxiosInstance = useMemo(
() =>
axios.create({
baseURL: process.env.REACT_APP_BACKEND_URL, // Base URL for all requests
timeout: 10000, // Request timeout (in milliseconds)
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${auth?.token}`, // Add any default headers you need
},
}),
[auth?.token],
);
const apiClient1: AxiosInstance = useMemo(
() =>
axios.create({
baseURL: process.env.REACT_APP_BACKEND_URL,
timeout: 1000000,
headers: {
"Content-Type": "multipart/form-data",
Authorization: `Bearer ${auth?.token}`,
},
}),
[auth?.token],
);
const API = useMemo(() => new Api(apiClient), [apiClient]);
const API_Uploader = useMemo(() => new Api(apiClient1), [apiClient1]);
// Helper to check if it's an edit action
const isEditAction = location.search.includes("Action=edit");
const isEditAction = useMemo(
() => location.search.includes("Action=edit"),
[location.search],
);

// Get translated images
let translatedImages: Array<Image> = [];
const translatedImages = useMemo(() => {
// Get translated images
if (images) return images.filter((img) => img.is_translated);
return [];
}, [images]);

// Simulate fetching data for the edit page (mocking API call)
useEffect(() => {
Expand All @@ -62,11 +84,6 @@ const CollectionPage: React.FC = () => {
}
}, [id, is_auth]);

useEffect(() => {
// Get translated images
if (images) translatedImages = images.filter((img) => img.is_translated);
}, [images]);

useEffect(() => {
if (translatedImages.length > 0) {
setCurrentImage(translatedImages[currentImageIndex]);
Expand Down Expand Up @@ -96,15 +113,31 @@ const CollectionPage: React.FC = () => {
const handleNext = () => {
if (currentImageIndex < translatedImages.length - 1) {
setCurrentImageIndex(currentImageIndex + 1);
setCurrentTranscriptionIndex(0);
}
};

const handlePrev = () => {
if (currentImageIndex > 0) {
setCurrentImageIndex(currentImageIndex - 1);
setCurrentTranscriptionIndex(0);
}
};
// Navigate transcriptions
const handleTranscriptionNext = () => {
if (
currentImage?.transcriptions &&
currentTranscriptionIndex < currentImage?.transcriptions.length - 1
) {
setCurrentTranscriptionIndex(currentTranscriptionIndex + 1);
}
};

const handleTranscriptionPrev = () => {
if (currentTranscriptionIndex > 0) {
setCurrentTranscriptionIndex(currentTranscriptionIndex - 1);
}
};
// Return button handler
const handleReturn = () => {
navigate("/collections");
Expand Down Expand Up @@ -133,6 +166,16 @@ const CollectionPage: React.FC = () => {
}
}
};
const handleTranslateOneImage = async (image_id: string) => {
if (images) {
startLoading();
const image_response = await API.translateImages([image_id]);
const i = images?.findIndex((image) => image.id == image_id);
images[i] = image_response[0];
setImages([...images]);
stopLoading();
}
};
// Custom Return Button (fixed top-left with border)
const ReturnButton = () => (
<button
Expand Down Expand Up @@ -243,7 +286,10 @@ const CollectionPage: React.FC = () => {
images.map((image) => {
return (
<Col lg={4} md={6} sm={12} key={image.id} className="p-0">
<ImageComponent {...image} />
<ImageComponent
{...image}
handleTranslateOneImage={handleTranslateOneImage}
/>
</Col>
);
})
Expand All @@ -269,35 +315,80 @@ const CollectionPage: React.FC = () => {
alt="Collection Image"
className="max-h-96 h-auto mx-auto mb-4"
/>
<p className="mt-2">{currentImage.transcript}</p>
<p className="mt-2">
{currentImage.transcriptions[currentTranscriptionIndex].text}
</p>
<p className="mt-2">
{currentImage.transcriptions[currentTranscriptionIndex].pinyin}
</p>
<p className="mt-2">
{
currentImage.transcriptions[currentTranscriptionIndex]
.translation
}
</p>
<audio controls className="mt-4 w-full">
<source src={currentImage.audio_url} type="audio/mpeg" />
<source
src={
currentImage.transcriptions[currentTranscriptionIndex]
.audio_url
}
type="audio/mpeg"
/>
Your browser does not support the audio element.
</audio>

{/* Navigation Buttons */}
<div className="flex justify-between mt-4 w-40">
<div className="flex justify-content-center mt-4 w-40 gap-4">
<button
className={`px-3 py-1 rounded ${
className={`px-5 py-3 rounded ${
currentImageIndex === 0
? "bg-gray-300 text-gray-600 cursor-not-allowed"
: "bg-gray-500 text-white hover:bg-gray-600"
}`}
onClick={handlePrev}
disabled={currentImageIndex === 0}
>
Prev
<SkipBackward size={22} />
</button>
<button
className={`px-5 py-3 rounded ${
currentTranscriptionIndex === 0
? "bg-gray-300 text-gray-600 cursor-not-allowed"
: "bg-gray-500 text-white hover:bg-gray-600"
}`}
onClick={handleTranscriptionPrev}
disabled={currentTranscriptionIndex === 0}
>
<CaretLeft size={22} />
</button>
<button
className={`px-5 py-3 rounded ${
currentTranscriptionIndex ===
currentImage.transcriptions.length - 1 ||
currentImage.transcriptions.length === 0
? "bg-gray-300 text-gray-600 cursor-not-allowed"
: "bg-gray-500 text-white hover:bg-gray-600"
}`}
onClick={handleTranscriptionNext}
disabled={
currentTranscriptionIndex ===
currentImage.transcriptions.length - 1 ||
currentImage.transcriptions.length === 0
}
>
<CaretRight size={22} />
</button>
<button
className={`px-3 py-1 rounded ${
className={`px-5 py-3 rounded ${
currentImageIndex === translatedImages.length - 1
? "bg-gray-300 text-gray-600 cursor-not-allowed"
: "bg-gray-500 text-white hover:bg-gray-600"
}`}
onClick={handleNext}
disabled={currentImageIndex === translatedImages.length - 1}
>
Next
<SkipForward size={22} />
</button>
</div>
</div>
Expand Down
9 changes: 7 additions & 2 deletions frontend/src/types/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@ export interface Collection {
images: Array<string>;
}

interface Transcription {
text: string;
pinyin: string;
translation: string;
audio_url: string;
}
export interface Image {
id: string;
is_translated: boolean;
collection: string;
image_url: string;
audio_url: string;
transcript: string;
transcriptions: Array<Transcription>;
}
10 changes: 6 additions & 4 deletions linguaphoto/ai/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@

from openai import AsyncOpenAI
from PIL import Image

from linguaphoto.ai.transcribe import transcribe_image
from linguaphoto.ai.tts import synthesize_text
from transcribe import transcribe_image
from tts import synthesize_text

logger = logging.getLogger(__name__)

Expand All @@ -26,8 +25,11 @@ async def main() -> None:

# Transcribes the image.
image = Image.open(args.image)
client = AsyncOpenAI()
client = AsyncOpenAI(
api_key="sk-svcacct-PFETCFHtqmHOmIpP_IAyQfBGz5LOpvC6Zudj7d5Wcdp9WjJT4ImAxuotGcpyT3BlbkFJRbtswQqIxYHam9TN13mCM04_OTZE-v8z-Rw1WEcwzyZqW_GcK0PNNyFp6BcA"
)
transcription_response = await transcribe_image(image, client)
print(transcription_response.model_dump_json(indent=2))
with open(root_dir / "transcription.json", "w") as file:
file.write(transcription_response.model_dump_json(indent=2))
logger.info("Transcription saved to %s", args.output)
Expand Down
Loading

0 comments on commit ebe8820

Please sign in to comment.