diff --git a/.env b/.env deleted file mode 100644 index e69de29..0000000 diff --git a/package-lock.json b/package-lock.json index 4013941..7ab104b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "@radix-ui/react-label": "^2.1.0", "@radix-ui/react-popover": "^1.1.2", "@radix-ui/react-slot": "^1.1.0", + "browser-image-compression": "^2.0.2", "class-variance-authority": "^0.7.0", "clsx": "^2.1.1", "date-fns": "^3.6.0", @@ -20,6 +21,7 @@ "follow-redirects": "^1.15.9", "he": "^1.2.0", "lucide-react": "^0.453.0", + "node-html-parser": "^6.1.13", "prettier": "^3.3.3", "process": "^0.11.10", "react": "^18.3.1", @@ -2910,7 +2912,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", - "dev": true, "license": "ISC" }, "node_modules/brace-expansion": { @@ -2936,6 +2937,15 @@ "node": ">=8" } }, + "node_modules/browser-image-compression": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/browser-image-compression/-/browser-image-compression-2.0.2.tgz", + "integrity": "sha512-pBLlQyUf6yB8SmmngrcOw3EoS4RpQ1BcylI3T9Yqn7+4nrQTXJD4sJDe5ODnJdrvNMaio5OicFo75rDyJD2Ucw==", + "license": "MIT", + "dependencies": { + "uzip": "0.20201231.0" + } + }, "node_modules/browserslist": { "version": "4.24.0", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.0.tgz", @@ -3313,7 +3323,6 @@ "version": "5.1.0", "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", - "dev": true, "license": "BSD-2-Clause", "dependencies": { "boolbase": "^1.0.0", @@ -3330,7 +3339,6 @@ "version": "6.1.0", "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", - "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">= 6" @@ -3528,7 +3536,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", - "dev": true, "license": "MIT", "dependencies": { "domelementtype": "^2.3.0", @@ -3543,7 +3550,6 @@ "version": "2.3.0", "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", - "dev": true, "funding": [ { "type": "github", @@ -3556,7 +3562,6 @@ "version": "5.0.3", "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", - "dev": true, "license": "BSD-2-Clause", "dependencies": { "domelementtype": "^2.3.0" @@ -3572,7 +3577,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", - "dev": true, "license": "BSD-2-Clause", "dependencies": { "dom-serializer": "^2.0.0", @@ -3669,7 +3673,6 @@ "version": "4.5.0", "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", - "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">=0.12" @@ -6049,89 +6052,15 @@ } }, "node_modules/node-html-parser": { - "version": "5.4.2", - "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-5.4.2.tgz", - "integrity": "sha512-RaBPP3+51hPne/OolXxcz89iYvQvKOydaqoePpOgXcrOKZhjVIzmpKZz+Hd/RBO2/zN2q6CNJhQzucVz+u3Jyw==", - "dev": true, + "version": "6.1.13", + "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-6.1.13.tgz", + "integrity": "sha512-qIsTMOY4C/dAa5Q5vsobRpOOvPfC4pB61UVW2uSwZNUp0QU/jCekTal1vMmbO0DgdHeLUJpv/ARmDqErVxA3Sg==", "license": "MIT", "dependencies": { - "css-select": "^4.2.1", + "css-select": "^5.1.0", "he": "1.2.0" } }, - "node_modules/node-html-parser/node_modules/css-select": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz", - "integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==", - "dev": true, - "license": "BSD-2-Clause", - "dependencies": { - "boolbase": "^1.0.0", - "css-what": "^6.0.1", - "domhandler": "^4.3.1", - "domutils": "^2.8.0", - "nth-check": "^2.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/fb55" - } - }, - "node_modules/node-html-parser/node_modules/dom-serializer": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz", - "integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==", - "dev": true, - "license": "MIT", - "dependencies": { - "domelementtype": "^2.0.1", - "domhandler": "^4.2.0", - "entities": "^2.0.0" - }, - "funding": { - "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" - } - }, - "node_modules/node-html-parser/node_modules/domhandler": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz", - "integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==", - "dev": true, - "license": "BSD-2-Clause", - "dependencies": { - "domelementtype": "^2.2.0" - }, - "engines": { - "node": ">= 4" - }, - "funding": { - "url": "https://github.com/fb55/domhandler?sponsor=1" - } - }, - "node_modules/node-html-parser/node_modules/domutils": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz", - "integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==", - "dev": true, - "license": "BSD-2-Clause", - "dependencies": { - "dom-serializer": "^1.0.1", - "domelementtype": "^2.2.0", - "domhandler": "^4.2.0" - }, - "funding": { - "url": "https://github.com/fb55/domutils?sponsor=1" - } - }, - "node_modules/node-html-parser/node_modules/entities": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", - "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==", - "dev": true, - "license": "BSD-2-Clause", - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, "node_modules/node-releases": { "version": "2.0.18", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz", @@ -6162,7 +6091,6 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", - "dev": true, "license": "BSD-2-Clause", "dependencies": { "boolbase": "^1.0.0" @@ -8171,6 +8099,12 @@ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", "license": "MIT" }, + "node_modules/uzip": { + "version": "0.20201231.0", + "resolved": "https://registry.npmjs.org/uzip/-/uzip-0.20201231.0.tgz", + "integrity": "sha512-OZeJfZP+R0z9D6TmBgLq2LHzSSptGMGDGigGiEe0pr8UBe/7fdflgHlHBNDASTXB5jnFuxHpNaJywSg8YFeGng==", + "license": "MIT" + }, "node_modules/vite": { "version": "5.4.9", "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.9.tgz", @@ -8452,6 +8386,79 @@ "vite": ">=2.0.0" } }, + "node_modules/vite-plugin-html/node_modules/css-select": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz", + "integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.0.1", + "domhandler": "^4.3.1", + "domutils": "^2.8.0", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/vite-plugin-html/node_modules/dom-serializer": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz", + "integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==", + "dev": true, + "license": "MIT", + "dependencies": { + "domelementtype": "^2.0.1", + "domhandler": "^4.2.0", + "entities": "^2.0.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/vite-plugin-html/node_modules/domhandler": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz", + "integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.2.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/vite-plugin-html/node_modules/domutils": { + "version": "2.8.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz", + "integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^1.0.1", + "domelementtype": "^2.2.0", + "domhandler": "^4.2.0" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/vite-plugin-html/node_modules/entities": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", + "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==", + "dev": true, + "license": "BSD-2-Clause", + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/vite-plugin-html/node_modules/fs-extra": { "version": "10.1.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz", @@ -8480,6 +8487,17 @@ "graceful-fs": "^4.1.6" } }, + "node_modules/vite-plugin-html/node_modules/node-html-parser": { + "version": "5.4.2", + "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-5.4.2.tgz", + "integrity": "sha512-RaBPP3+51hPne/OolXxcz89iYvQvKOydaqoePpOgXcrOKZhjVIzmpKZz+Hd/RBO2/zN2q6CNJhQzucVz+u3Jyw==", + "dev": true, + "license": "MIT", + "dependencies": { + "css-select": "^4.2.1", + "he": "1.2.0" + } + }, "node_modules/vite-plugin-html/node_modules/universalify": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", diff --git a/package.json b/package.json index ecb6b52..2891e59 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "@radix-ui/react-label": "^2.1.0", "@radix-ui/react-popover": "^1.1.2", "@radix-ui/react-slot": "^1.1.0", + "browser-image-compression": "^2.0.2", "class-variance-authority": "^0.7.0", "clsx": "^2.1.1", "date-fns": "^3.6.0", @@ -50,6 +51,7 @@ "follow-redirects": "^1.15.9", "he": "^1.2.0", "lucide-react": "^0.453.0", + "node-html-parser": "^6.1.13", "prettier": "^3.3.3", "process": "^0.11.10", "react": "^18.3.1", diff --git a/src/components/pages/Home/render2.tsx b/src/components/pages/Home/render2.tsx deleted file mode 100644 index 42d67eb..0000000 --- a/src/components/pages/Home/render2.tsx +++ /dev/null @@ -1,217 +0,0 @@ -import { Button } from "@/components/ui/button"; -import { Card } from "@/components/ui/card"; -import { useLogInContext } from "@/hooks/LogInContext"; -import { ApiDelay, BLUESKY_USERNAME } from "@/lib/constant"; -import { - cleanTweetText, - isPostValid, - isQuote, - parseTweetsFile, - sortTweetsWithDateRange, -} from "@/lib/parse/parse"; -import { Render2Props } from "@/types/render"; -import { RichText } from "@atproto/api"; -import { getMergeEmbed } from "@/components/utils"; -import { useState } from "react"; -import { filePassableType } from "@/hooks/useUpload"; -import { TEmbeddedImage } from "@/types/tweets"; - -const RenderStep2: React.FC = ({ - setCurrentStep, - shareableData, -}) => { - const { agent } = useLogInContext(); - const [isProcessing, setIsProcessing] = useState(false); - const [progress, setProgress] = useState(0); - const [simulate, setSimulate] = useState(false); - - const { - fileMap, - dateRange, - mediaLocation, - totalTweets, - tweetsLocation, - validTweets, - } = shareableData; - - const tweet_to_bsky = async () => { - if (!agent) throw new Error("Agent not found"); - if (!fileMap.size) { - console.log("No files selected"); - return; - } - setIsProcessing(true); - setProgress(0); - - try { - const tweetsFile = fileMap.get(tweetsLocation!); - if (!tweetsFile) throw new Error(`Tweets file not found at ${tweetsLocation}`); - - const tweetsFileContent = await tweetsFile.text(); - const tweets = parseTweetsFile(tweetsFileContent); - - if (!Array.isArray(tweets)) throw new Error("Parsed content is not an array"); - - let importedTweet = 0; - - const sortedTweets = sortTweetsWithDateRange(tweets, dateRange); - - for (const [index, { tweet }] of sortedTweets.entries()) { - try { - setProgress(Math.round((index / sortedTweets.length) * 100)); - const tweetDate = new Date(tweet.created_at); - const tweet_createdAt = tweetDate.toISOString(); - - if (!isPostValid(tweet) || isQuote(tweets, tweet.id)) continue; - - const embeddedImage: TEmbeddedImage[] = []; - - // Process media for embedding - if (tweet.extended_entities?.media) { - for (const media of tweet.extended_entities.media) { - if (media.type === "photo") { - const fileType = media.media_url.split(".").pop(); - const mimeType = filePassableType(fileType); - - if (!mimeType) continue; - if (embeddedImage.length >= 4) break; - - const mediaFilename = `${mediaLocation}/${tweet.id}-${media.media_url.split("/").pop()}`; - const imageFile = fileMap.get(mediaFilename); - - if (imageFile) { - const imageBuffer = await imageFile.arrayBuffer(); - const uint8Array = new Uint8Array(imageBuffer); - if (!simulate) { - const blobRecord = await agent.uploadBlob(uint8Array, { - encoding: mimeType, - }); - - embeddedImage.push({ - alt: "", - image: { - $type: "blob", - ref: blobRecord.data.blob.ref, - mimeType: blobRecord.data.blob.mimeType, - size: blobRecord.data.blob.size, - }, - }); - } - } - } else { - console.log("Skipping non-photo, edia type:", media.type); - continue; - } - } - } - console.log(`Final post will contain ${embeddedImage.length} images`); - let postText = tweet.full_text; - const urls = tweet.entities?.urls?.map((url) => url.display_url) || []; - - if (!simulate) { - postText = await cleanTweetText(tweet.full_text); - if (postText.length > 300) postText = postText.substring(0, 296) + "..."; - } - - if (urls.length > 0) postText += `\n\n${urls.join(" ")}`; - - const rt = new RichText({ text: postText }); - await rt.detectFacets(agent); - - if (embeddedImage.length > 1) console.log("The embedded images are:", embeddedImage); - - const postRecord = { - $type: "app.bsky.feed.post", - text: rt.text, - facets: rt.facets, - createdAt: tweet_createdAt, - embed: { - $type: "app.bsky.embed.images", - images: embeddedImage, - }, - }; - - const embed = getMergeEmbed(embeddedImage); - if (embed && Object.keys(embed).length > 0) { - Object.assign(postRecord, { embed }); - } - if (!simulate) { - await new Promise((resolve) => setTimeout(resolve, ApiDelay)); - const recordData = await agent.post(postRecord); - const postRkey = recordData.uri.split("/").pop(); - if (postRkey) importedTweet++; - } else { - importedTweet++; - } - } catch (error) { - console.error(`Error processing tweet ${tweet.id}:`, error); - } - } - setCurrentStep(3); - console.log(`Import completed. ${importedTweet} tweets imported.`); - } catch (error) { - console.error("Error during import:", error); - } finally { - setIsProcessing(false); - setProgress(100); - } - }; - - return ( -
-
- -

Tweet Analysis

-
-

- Total tweets found: {totalTweets} -

-

- Valid tweets to import: {validTweets} -

-

- Excluded: {totalTweets - validTweets} (quotes, retweets, replies, - or outside date range) -

-
-
- -
- {isProcessing && ( -
-
-
- )} -
- - -
-
-
-
- ); -}; - -export default RenderStep2; - diff --git a/src/components/utils.tsx b/src/components/utils.tsx index 16a7faa..8a3f21f 100644 --- a/src/components/utils.tsx +++ b/src/components/utils.tsx @@ -1,9 +1,26 @@ +import imageCompression from 'browser-image-compression'; +import { parse } from 'node-html-parser'; import AtpAgent, { AppBskyActorProfile, BlobRef } from "@atproto/api"; import { TEmbeddedImage, Tweet } from "@/types/tweets"; import { TDateRange, TFileState } from "@/types/render"; import he from "he"; import URI from "urijs"; +const MAX_FILE_SIZE = 1 * 1024 * 1024; // 1MB +const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'; + +interface Thumb { + $type: "none" | "blob"; + ref: string; + mimeType: string; + size: number; +} +interface EmbedCard { + uri: string; + title: string; + description: string; + thumb: Thumb; +} type BlobResponse = BlobRef; export const findFileFromMap = ( fileMap: Map, @@ -349,14 +366,368 @@ export const importXProfileToBsky = async ( } }; -export function getMergeEmbed(images: TEmbeddedImage[] = []) { - let mediaData = null; + +export function getMergeEmbed(images: [] = [], embeddedVideo: {} | null = null, record: {} | null = null): {} | null { + let mediaData: {} | null = null; if (images.length > 0) { mediaData = { $type: "app.bsky.embed.images", - images, + images + }; + } else if (embeddedVideo != null) { + mediaData = { + $type: "app.bsky.embed.video", + video: embeddedVideo, + }; + } + + let recordData: {} | null = null; + if (record && Object.keys(record).length > 0) { + recordData = { + $type: "app.bsky.embed.record", + record }; } - return mediaData; + if (mediaData && recordData) { + return { + $type: "app.bsky.embed.recordWithMedia", + media: mediaData, + record: { + record + } + }; + } + + return mediaData || recordData; + +} + +export async function recompressImageIfNeeded( + imageData: File | Blob | ArrayBuffer | string +): Promise { + // Convert string/ArrayBuffer to File if needed + let file: File; + if (typeof imageData === 'string') { + // Assuming it's a base64 string + const response = await fetch(imageData); + const blob = await response.blob(); + file = new File([blob], 'image.jpg', { type: blob.type }); + } else if (imageData instanceof ArrayBuffer) { + file = new File([new Uint8Array(imageData)], 'image.jpg', { type: 'image/jpeg' }); + } else { + file = imageData instanceof File ? imageData : new File([imageData], 'image.jpg'); + } + + const options = { + maxSizeMB: 1, + maxWidthOrHeight: 1920, + useWebWorker: true + }; + + try { + return await imageCompression(file, options); + } catch (error) { + console.warn('Image compression failed:', error); + return file; + } +} + + +async function fetchOembed(url: string): Promise { + // Expanded list of OEmbed providers with more flexible discovery + const oembedProviders = [ + `https://open.iframe.ly/api/oembed?url=${encodeURIComponent(url)}`, + ]; + + // Try HTML link tag discovery first + try { + const htmlDiscoveryEndpoint = await discoverOEmbedEndpointFromHTML(url); + if (htmlDiscoveryEndpoint) { + const discoveredResult = await fetchOEmbedFromDiscoveredEndpoint(htmlDiscoveryEndpoint, url); + if (discoveredResult) return discoveredResult; + } + } catch (error) { + console.debug('HTML OEmbed discovery failed:', error); + } + + // Fallback to predefined providers + for (const providerUrl of oembedProviders) { + try { + const response = await fetch(providerUrl, { + headers: { + 'User-Agent': USER_AGENT, + 'Accept': 'application/json' + }, + signal: AbortSignal.timeout(25000) + }); + + if (response.ok) { + const data = await response.json(); + if (data && (data.title || data.description || data.thumbnail_url)) { + return data; + } + } + } catch (error) { + console.debug(`Oembed fetch error for ${providerUrl}:`, error); + } + } + + return null; +} + +// Discover OEmbed endpoint from HTML link tags +async function discoverOEmbedEndpointFromHTML(url: string): Promise { + try { + const response = await fetch(url, { + headers: { + 'Accept': 'text/html', + 'User-Agent': USER_AGENT + }, + signal: AbortSignal.timeout(25000) + }); + + if (!response.ok) return null; + + const html = await response.text(); + const root = parse(html); + + // Look for link tags with rel="alternate" and type="application/json+oembed" + const oembedLinks = root.querySelectorAll('link[rel="alternate"][type="application/json+oembed"]'); + + if (oembedLinks.length > 0) { + const href = oembedLinks[0].getAttribute('href'); + return href || null; + } + + return null; + } catch (error) { + console.debug('HTML OEmbed discovery error:', error); + return null; + } +} + +// Fetch OEmbed data from discovered endpoint +async function fetchOEmbedFromDiscoveredEndpoint(endpoint: string, originalUrl: string): Promise { + try { + const fullEndpoint = `${endpoint}?url=${encodeURIComponent(originalUrl)}&format=json`; + + const response = await fetch(fullEndpoint, { + headers: { + 'Accept': 'application/json', + 'User-Agent': USER_AGENT + }, + signal: AbortSignal.timeout(25000) + }); + + if (response.ok) { + return await response.json(); + } + } catch (error) { + console.debug('Discovered endpoint OEmbed fetch error:', error); + } + + return null; +} + +export async function fetchEmbedUrlCard(url: string, agent: AtpAgent): Promise { + const card: EmbedCard = { + uri: url, + title: "", + description: "", + thumb: { $type: "none", ref: "", mimeType: "", size: 0 }, + }; + + try { + console.log("fetching the embed url card"); + const oembedResult = await fetchOembed(url); + + if (oembedResult) { + card.title = oembedResult.title || card.title; + card.description = oembedResult.description || card.description; + + if (oembedResult.thumbnail_url) { + try { + const imgResp = await fetch(oembedResult.thumbnail_url, { + headers: { + 'User-Agent': USER_AGENT, + 'Accept': 'image/*' + }, + mode: 'cors', + credentials: 'omit', + signal: AbortSignal.timeout(25000) + }); + + if (imgResp.ok) { + let imgBuffer = await imgResp.arrayBuffer(); + const mimeType = imgResp.headers.get('content-type') || 'image/jpeg'; + + if (imgBuffer.byteLength > MAX_FILE_SIZE) { + console.warn('Image needs compression'); + } + + if (mimeType.startsWith('image/') && !mimeType.startsWith('image/svg')) { + const blobRecord = await agent.uploadBlob(imgBuffer, { + encoding: mimeType + }); + + card.thumb = { + $type: "blob", + ref: blobRecord.data.blob.ref, + mimeType: blobRecord.data.blob.mimeType, + size: blobRecord.data.blob.size + }; + } + } + } catch (error) { + console.warn('Thumbnail fetch error:', error); + } + } + } + + // Fallback to direct URL fetch if no OEmbed data + if (!card.title && !card.description && !card.thumb.size) { + const resp = await fetch(url, { + headers: { + 'User-Agent': USER_AGENT, + 'Accept': 'text/html' + }, + signal: AbortSignal.timeout(25000) + }); + + if (!resp.ok) { + if (resp.status === 401 && url.startsWith('http:')) { + return await fetchEmbedUrlCard(url.replace('http:', 'https:'), agent); + } + throw new Error(`HTTP error: ${resp.status} ${resp.statusText}`); + } + + const html = await resp.text(); + const root = parse(html); + + const titleTag = root.querySelector('meta[property="og:title"]'); + if (titleTag) { + card.title = he.decode(titleTag.getAttribute('content') || ''); + } + + const descriptionTag = root.querySelector('meta[property="og:description"]'); + if (descriptionTag) { + card.description = he.decode(descriptionTag.getAttribute('content') || ''); + } + + const imageTag = root.querySelector('meta[property="og:image"]'); + if (imageTag) { + let imgUrl = imageTag.getAttribute('content') || ''; + if (!imgUrl.includes('://')) { + imgUrl = new URL(imgUrl, url).href; + } + + try { + const imgResp = await fetch(imgUrl, { + headers: { + 'User-Agent': USER_AGENT, + 'Accept': 'image/*' + }, + signal: AbortSignal.timeout(25000) + }); + + if (imgResp.ok) { + let imgBuffer = await imgResp.arrayBuffer(); + const mimeType = imgResp.headers.get('content-type') || 'image/jpeg'; + + if (imgBuffer.byteLength > MAX_FILE_SIZE) { + console.warn('Image needs compression'); + } + + if (mimeType.startsWith('image/') && !mimeType.startsWith('image/svg')) { + const blobRecord = await agent.uploadBlob(imgBuffer, { encoding: mimeType }); + + card.thumb = { + $type: "blob", + ref: blobRecord.data.blob.ref, + mimeType: blobRecord.data.blob.mimeType, + size: blobRecord.data.blob.size + }; + } + } + } catch (error) { + console.warn('Image fetch error:', error); + } + } + } + + // Standardize return format for Bluesky + if (card.title || card.description || card.thumb.size > 0) { + return { + $type: "app.bsky.embed.external", + external: { + uri: url, + title: card.title || '', + description: card.description || '', + ...(card.thumb.size > 0 ? { thumb: card.thumb } : {}) + } + }; + } + + return null; + } catch (error: any) { + console.warn(`Error fetching embed URL card: ${error.message}`); + return null; + } +} + +export function checkPastHandles(twitterHandles: string[], url: string): boolean { + return (twitterHandles || []).some(handle => + url.startsWith(`https://x.com/${handle}/`) || + url.startsWith(`https://twitter.com/${handle}/`) + ) +} + +export function getEmbeddedUrlAndRecord( + twitterHandles: string[], + urls: Array<{ expanded_url: string }>, + tweets: Array<{ + tweet: Record, + bsky?: Record, + }> +): { + embeddedUrl: string | null; + embeddedRecord: { + "uri": string; + "cid": string; + } | null; +} { + let embeddedTweetUrl: string | null = null; + const nullResult = { + embeddedUrl: null, + embeddedRecord: null, + }; + + // get the last one url to embed + const reversedUrls = urls.reverse(); + embeddedTweetUrl = reversedUrls.find(({ expanded_url }) => checkPastHandles(twitterHandles, expanded_url))?.expanded_url ?? null; + + if (!embeddedTweetUrl) { + return nullResult; + } + + const index = embeddedTweetUrl.lastIndexOf("/"); + if (index == -1) { + return nullResult; + } + + const urlId = embeddedTweetUrl.substring(index + 1); + const tweet = tweets.find(({ tweet: { id } }) => id == urlId) + + if (!tweet?.bsky) { + return nullResult; + } + + return { + embeddedUrl: embeddedTweetUrl, + embeddedRecord: { + "uri": tweet.bsky.uri, + "cid": tweet.bsky.cid, + } + }; } diff --git a/src/hooks/useUpload.ts b/src/hooks/useUpload.ts index 0272d17..cb44afd 100644 --- a/src/hooks/useUpload.ts +++ b/src/hooks/useUpload.ts @@ -1,12 +1,18 @@ import { useMemo, useState } from "react"; import { useLogInContext } from "./LogInContext"; import { shareableData } from "@/types/render"; -import { processTweetsData } from "@/lib/parse/processTweets"; -import { cleanTweetText, isPostValid, isQuote } from "@/lib/parse/parse"; +import { + cleanTweetText, + isPostValid, + isQuote, + parseTweetsFile, + sortTweetsWithDateRange, +} from "@/lib/parse/parse"; import { TMedia, TEmbeddedImage, Tweet } from "@/types/tweets"; -import { RichText } from "@atproto/api"; -import { getMergeEmbed } from "@/components/utils"; +import { processTweetsData } from "@/lib/parse/processTweets"; +import { getMergeEmbed, fetchEmbedUrlCard, getEmbeddedUrlAndRecord } from "@/components/utils"; import { ApiDelay, BLUESKY_USERNAME } from "@/lib/constant"; +import AtpAgent, { AppBskyVideoDefs, BlobRef, RichText } from "@atproto/api"; export const filePassableType = (fileType: string = ""): string => { if (fileType === "png") return "image/png"; @@ -68,7 +74,10 @@ export const useUpload = ({ const createPostRecord = async ( tweet: Tweet["tweet"], - embeddedImage: TEmbeddedImage[], + embeddedImage: [], + embeddedVideo: any, + embeddedRecord: any, + externalEmbed: any, ) => { if (!agent) return; let postText = await cleanTweetText(tweet.full_text); @@ -90,25 +99,23 @@ export const useUpload = ({ text: rt.text, facets: rt.facets, createdAt: tweetCreatedAt, - embed: { - $type: "app.bsky.embed.images", - images: embeddedImage, - }, }; console.log(postRecord); // Merge any additional embed data - const embed = getMergeEmbed(embeddedImage); + const embed = getMergeEmbed(embeddedImage, embeddedVideo, embeddedRecord); if (embed && Object.keys(embed).length > 0) { Object.assign(postRecord, { embed }); + } else if (externalEmbed) { + Object.assign(postRecord, { embed: externalEmbed }); } await new Promise((resolve) => setTimeout(resolve, ApiDelay)); // Throttle API calls const recordData = await agent?.post(postRecord); const postRkey = recordData?.uri.split("/").pop(); if (postRkey) { - const postUri = `https://bsky.app/profile/${BLUESKY_USERNAME}/post/${postRkey}`; + const postUri = `https://bsky.app/profile/${BLUESKY_USERNAME}.bsky.social/post/${postRkey}`; console.log("Bluesky post created:", postRecord.text); console.log(postUri); } @@ -140,16 +147,211 @@ export const useUpload = ({ setProgress(Math.round((index / validTweets.length) * 100)); if (cannotPost(tweet, tweets)) continue; + const media = tweet.extended_entities?.media; const embeddedImage: TEmbeddedImage[] = []; + let embeddedVideo: BlobRef | undefined = undefined; + if (tweet.extended_entities?.media) { - for (const media of tweet.extended_entities.media) { - const mediaEmbed = await processMedia(media, tweet.id); + for (const mediaItem of tweet.extended_entities.media) { + const mediaEmbed = await processMedia(mediaItem, tweet.id); if (mediaEmbed) embeddedImage.push(mediaEmbed); if (embeddedImage.length >= 4) break; // Limit to 4 images } } - await createPostRecord(tweet, embeddedImage).then(() => { + console.log(media?.[0]?.type); + console.log(media); + console.log(tweet); + if (media?.[0]?.type === 'video') { + const mediaItem = media[0]; + console.log(mediaItem); + + const highQualityVariant = mediaItem.video_info.variants.find( + (variant: VideoVariant) => variant.bitrate === '2176000' && variant.content_type === 'video/mp4' + ); + const video_info = highQualityVariant.url; + + const videoFileName = `${mediaLocation}/${tweet.id}-${video_info.split('/').pop()?.split('?')[0]}`; + console.log(videoFileName); + const videoFile = fileMap.get(videoFileName); + + const { data: serviceAuth } = await agent!.com.atproto.server.getServiceAuth({ + aud: `did:web:${agent!.dispatchUrl.host}`, + lxm: "com.atproto.repo.uploadBlob", + exp: Date.now() / 1000 + 60 * 30, // 30 minutes + }); + + const token = serviceAuth.token; + const MAX_SINGLE_VIDEO_SIZE = 10 * 1024 * 1024 * 1024; // 10GB max size + + // Check file size + if (videoFile.size > MAX_SINGLE_VIDEO_SIZE) { + throw new Error(`File size (${(videoFile.size / (1024 * 1024 * 1024)).toFixed(2)}GB) exceeds maximum allowed size of 10GB`); + } + + // Prepare upload URL + const uploadUrl = new URL("https://video.bsky.app/xrpc/app.bsky.video.uploadVideo"); + uploadUrl.searchParams.append("did", agent!.session!.did); + uploadUrl.searchParams.append("name", videoFileName); + + console.log("Starting upload request...", { + fileSize: `${(videoFile.size / (1024 * 1024)).toFixed(2)}MB`, + fileName: videoFile.name + }); + + let uploadResponse: any; + let jobStatus: AppBskyVideoDefs.JobStatus; + + try { + let bytesUploaded = 0; + const size = videoFile.size; + + const progressTrackingStream = new TransformStream({ + transform(chunk, controller) { + controller.enqueue(chunk); + bytesUploaded += chunk.byteLength; + console.log( + "Upload progress:", + Math.trunc((bytesUploaded / size) * 100) + "%" + ); + }, + flush() { + console.log("Upload complete ✨"); + } + }); + + const fileStream = videoFile.stream(); + const uploadStream = fileStream.pipeThrough(progressTrackingStream); + + interface ExtendedRequestInit extends RequestInit { + duplex: 'half'; + } + + const fetchOptions: ExtendedRequestInit = { + method: "POST", + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'video/mp4', + 'Content-Length': String(size), + 'Accept': 'application/json', + }, + body: uploadStream, + duplex: 'half', + }; + + uploadResponse = await fetch(uploadUrl.toString(), fetchOptions); + + if (!uploadResponse.ok) { + const errorText = await uploadResponse.text(); + throw new Error(`Upload failed: ${uploadResponse.status} - ${errorText}`); + } + + jobStatus = await uploadResponse.json() as AppBskyVideoDefs.JobStatus; + console.log('Upload successful:', jobStatus); + } catch (error: any) { + if (error.message.includes('already_exists')) { + const errorData = JSON.parse(error.message.split(' - ')[1]); + console.log('Using existing video jobId:', errorData.jobId); + jobStatus = { + jobId: errorData.jobId, + state: errorData.state, + did: errorData.did + } as AppBskyVideoDefs.JobStatus; + } else { + console.error('Upload error:', error); + throw error; + } + } + + if (jobStatus.error) { + console.warn(` Video job status: '${jobStatus.error}'. Video will be posted as a link`); + } + console.log(" JobId:", jobStatus.jobId); + + let blob: BlobRef | undefined = jobStatus.blob; + + const videoAgent = new AtpAgent({ service: "https://video.bsky.app" }); + + while (!blob) { + const { data: status } = await videoAgent.app.bsky.video.getJobStatus({ + jobId: jobStatus.jobId, + }); + console.log(" Status:", + status.jobStatus.state, + status.jobStatus.progress || "", + ); + if (status.jobStatus.blob) { + blob = status.jobStatus.blob; + } + // wait a second + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + + embeddedVideo = blob; + } else { + console.log("Skipping non-photo, non-video media type:", media?.[0]?.type); + } + + const twitterHandles = ['whoisanku']; + + const { embeddedUrl = null, embeddedRecord = null } = getEmbeddedUrlAndRecord( + twitterHandles, + tweet.entities?.urls, + validTweets, + ); + + let externalEmbed = null; + + // Other than t.co url within full text + function extractUrlsFromText(text: string): string[] { + // Regular expression to match URLs in text + const urlRegex = /(https?:\/\/[^\s]+)/g; + return (text.match(urlRegex) || []) + .filter(url => + !url.startsWith('https://twitter.com') && + !url.startsWith('https://x.com') && + !url.startsWith('https://t.co/') + ); + } + + function removeUrlsFromText(text: string): string { + // Regex to match URLs along with adjacent non-space characters + const urlRegex = /[()[\]{}"']*\s*(https?:\/\/[^\s()]+)\s*[()[\]{}"']*/g; + + // Remove URLs and their immediately adjacent punctuation + const cleanedText = text.replace(urlRegex, '').trim(); + + return cleanedText; + } + // For t.co urls within full text + if (tweet.entities?.urls) { + for (const urlEntity of tweet.entities.urls) { + if (!urlEntity.expanded_url.startsWith('https://twitter.com') && !urlEntity.expanded_url.startsWith('https://x.com')) { + try { + externalEmbed = await fetchEmbedUrlCard(urlEntity.expanded_url, agent); + console.log(externalEmbed); + } catch (error: any) { + console.warn(`Error fetching embed URL card: ${error.message}`); + } + } + } + } + + const textUrls = extractUrlsFromText(tweet.full_text); + if (textUrls.length > 0) { + console.log("textUrls", textUrls); + try { + externalEmbed = await fetchEmbedUrlCard(textUrls[0], agent); + console.log(externalEmbed); + } catch (error: any) { + console.warn(`Error fetching embed URL card from full_text: ${error.message}`); + } + + } + + tweet.full_text = removeUrlsFromText(tweet.full_text); + + await createPostRecord(tweet, embeddedImage, embeddedVideo, embeddedRecord, externalEmbed).then(() => { importedTweet++; }); } catch (error) { @@ -171,4 +373,4 @@ export const useUpload = ({ progress, tweet_to_bsky, }; -}; +} diff --git a/src/lib/constant.ts b/src/lib/constant.ts index 2afa396..c991d99 100644 --- a/src/lib/constant.ts +++ b/src/lib/constant.ts @@ -17,8 +17,8 @@ export const initalTweetAnalyzer = { }; export const intialDate = { - min_date: new Date(2000, 0, 1), - max_date: new Date(), + min_date: new Date(2010, 0, 1), + max_date: new Date(2023, 0, 1), }; export const initialShareableData = {