diff --git a/notebooks/benchmarks/vector_database_comparison_mongodb_postgreSQL.ipynb b/notebooks/benchmarks/vector_database_comparison_mongodb_postgreSQL.ipynb index 9336008..db3680d 100644 --- a/notebooks/benchmarks/vector_database_comparison_mongodb_postgreSQL.ipynb +++ b/notebooks/benchmarks/vector_database_comparison_mongodb_postgreSQL.ipynb @@ -9,6 +9,8 @@ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/benchmarks/vector_database_comparison_mongodb_postgreSQL.ipynb)\n", "-----\n", "\n", + "[![YouTube](https://img.shields.io/badge/YouTube-Watch%20Video-red)](https://www.youtube.com/live/6NGqgRzOT8E?si=ujPa31IDwlsGQCYU)\n", + "\n", "This notebook implements and benchmarks a standard AI workload that involves vector embeddings and the retreival of semantically similar documents from a database. The system uses two different vector databases:\n", "- PostgreSQL with pgvector: A vector database extension for PostgreSQL that enables vector search on the database.\n", "- MongoDB Atlas Vector Search: A vector search feature for MongoDB Database that enables vector search on the database.\n" @@ -487,455 +489,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " | When was YouTube officially launched, and by whom? | \n", - "What is YouTube's slogan introduced after Google's acquisition? | \n", - "How many hours of videos are collectively watched on YouTube daily? | \n", - "Which was the first video uploaded to YouTube, and when was it uploaded? | \n", - "What was the acquisition cost of YouTube by Google, and when was the deal finalized? | \n", - "What was the first YouTube video to reach one million views, and when did it happen? | \n", - "What are the three separate branches of the United States government? | \n", - "Which country has the highest documented incarceration rate and prison population? | \n", - "How many executions have occurred in the United States since 1977, and which countries have more? | \n", - "What percentage of the global military spending did the United States account for in 2019? | \n", - "How is the U.S. president elected? | \n", - "What cooling system innovation was included in the proposed venues for the World Cup in Qatar? | \n", - "What lawsuit was filed against Google in June 2020, and what was it about? | \n", - "How much was Google fined by CNIL in January 2022, and for what reason? | \n", - "When did YouTube join the NSA's PRISM program, according to reports? | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "0.184692 | \n", - "0.181519 | \n", - "0.124939 | \n", - "0.226074 | \n", - "0.415771 | \n", - "0.127075 | \n", - "0.237915 | \n", - "0.524902 | \n", - "0.396484 | \n", - "0.342529 | \n", - "0.375488 | \n", - "0.490234 | \n", - "0.118774 | \n", - "0.138428 | \n", - "0.477783 | \n", - "
1 | \n", - "0.262451 | \n", - "0.019638 | \n", - "0.022446 | \n", - "0.130493 | \n", - "0.049530 | \n", - "0.195923 | \n", - "0.418945 | \n", - "-0.256348 | \n", - "-0.112061 | \n", - "-0.033020 | \n", - "0.158081 | \n", - "0.331055 | \n", - "0.199219 | \n", - "-0.065125 | \n", - "0.128540 | \n", - "
2 | \n", - "-0.314697 | \n", - "0.134399 | \n", - "0.049500 | \n", - "-0.228027 | \n", - "0.077454 | \n", - "-0.062683 | \n", - "0.057587 | \n", - "-0.576660 | \n", - "0.051880 | \n", - "-0.189575 | \n", - "-0.151855 | \n", - "-0.245117 | \n", - "-0.053436 | \n", - "-0.451172 | \n", - "-0.274658 | \n", - "
3 | \n", - "0.098145 | \n", - "0.179077 | \n", - "0.499756 | \n", - "0.089600 | \n", - "0.000246 | \n", - "0.063354 | \n", - "0.011215 | \n", - "0.008095 | \n", - "0.014961 | \n", - "-0.099731 | \n", - "-0.213135 | \n", - "0.257812 | \n", - "0.398926 | \n", - "0.274170 | \n", - "0.223389 | \n", - "
4 | \n", - "-0.058105 | \n", - "0.311035 | \n", - "0.307373 | \n", - "0.076355 | \n", - "0.416260 | \n", - "0.398682 | \n", - "-0.243042 | \n", - "0.160278 | \n", - "-0.098389 | \n", - "0.036224 | \n", - "-0.036987 | \n", - "-0.351318 | \n", - "0.334229 | \n", - "0.256836 | \n", - "0.176514 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
763 | \n", - "0.455078 | \n", - "0.499756 | \n", - "0.428711 | \n", - "0.464600 | \n", - "0.513184 | \n", - "0.464600 | \n", - "0.291504 | \n", - "0.306641 | \n", - "0.328613 | \n", - "0.367188 | \n", - "0.238281 | \n", - "0.497070 | \n", - "0.384521 | \n", - "0.507324 | \n", - "0.436523 | \n", - "
764 | \n", - "0.267334 | \n", - "0.156128 | \n", - "0.244629 | \n", - "0.340088 | \n", - "0.174561 | \n", - "0.487549 | \n", - "0.038269 | \n", - "0.058563 | \n", - "0.609375 | \n", - "0.080261 | \n", - "-0.004959 | \n", - "0.007370 | \n", - "0.403564 | \n", - "0.544922 | \n", - "-0.016052 | \n", - "
765 | \n", - "0.260254 | \n", - "0.039520 | \n", - "-0.106445 | \n", - "0.114807 | \n", - "0.439697 | \n", - "0.103333 | \n", - "0.186890 | \n", - "0.013084 | \n", - "0.176880 | \n", - "0.337646 | \n", - "0.200439 | \n", - "-0.355469 | \n", - "0.326660 | \n", - "0.316895 | \n", - "0.338135 | \n", - "
766 | \n", - "0.001040 | \n", - "0.315430 | \n", - "0.045349 | \n", - "0.113403 | \n", - "0.190552 | \n", - "-0.038116 | \n", - "0.299072 | \n", - "0.256348 | \n", - "0.316650 | \n", - "0.420410 | \n", - "0.342529 | \n", - "0.062683 | \n", - "0.035522 | \n", - "0.094238 | \n", - "0.180420 | \n", - "
767 | \n", - "0.117249 | \n", - "0.250244 | \n", - "0.145020 | \n", - "0.096802 | \n", - "0.245605 | \n", - "-0.057373 | \n", - "-0.201904 | \n", - "-0.123169 | \n", - "-0.248047 | \n", - "-0.034363 | \n", - "0.039124 | \n", - "-0.110229 | \n", - "0.125610 | \n", - "0.179321 | \n", - "-0.066345 | \n", - "
768 rows × 15 columns
\n", - "\n", + " | title | \n", + "text | \n", + "url | \n", + "embedding | \n", + "json_data | \n", + "
---|---|---|---|---|---|
0 | \n", + "Deaths in 2022 | \n", + "The following notable deaths occurred in 2022.... | \n", + "https://en.wikipedia.org/wiki?curid=69407798 | \n", + "[0.2865696847438812, -0.03181683272123337, 0.0... | \n", + "{'title': 'Deaths in 2022', 'text': 'The follo... | \n", + "
1 | \n", + "YouTube | \n", + "YouTube is a global online video sharing and s... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "[-0.09689381718635559, 0.1619211882352829, -0.... | \n", + "{'title': 'YouTube', 'text': 'YouTube is a glo... | \n", + "
2 | \n", + "YouTube | \n", + "In October 2006, YouTube was bought by Google ... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "[0.1302049309015274, 0.265736848115921, 0.4018... | \n", + "{'title': 'YouTube', 'text': 'In October 2006,... | \n", + "
3 | \n", + "YouTube | \n", + "Since its purchase by Google, YouTube has expa... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "[-0.09791257232427597, 0.13586106896400452, -0... | \n", + "{'title': 'YouTube', 'text': 'Since its purcha... | \n", + "
4 | \n", + "YouTube | \n", + "YouTube has had an unprecedented social impact... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "[-0.2641527056694031, 0.06968216598033905, -0.... | \n", + "{'title': 'YouTube', 'text': 'YouTube has had ... | \n", + "
\n", + " | When was YouTube officially launched, and by whom? | \n", + "What is YouTube's slogan introduced after Google's acquisition? | \n", + "How many hours of videos are collectively watched on YouTube daily? | \n", + "Which was the first video uploaded to YouTube, and when was it uploaded? | \n", + "What was the acquisition cost of YouTube by Google, and when was the deal finalized? | \n", + "What was the first YouTube video to reach one million views, and when did it happen? | \n", + "What are the three separate branches of the United States government? | \n", + "Which country has the highest documented incarceration rate and prison population? | \n", + "How many executions have occurred in the United States since 1977, and which countries have more? | \n", + "What percentage of the global military spending did the United States account for in 2019? | \n", + "How is the U.S. president elected? | \n", + "What cooling system innovation was included in the proposed venues for the World Cup in Qatar? | \n", + "What lawsuit was filed against Google in June 2020, and what was it about? | \n", + "How much was Google fined by CNIL in January 2022, and for what reason? | \n", + "When did YouTube join the NSA's PRISM program, according to reports? | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0.184692 | \n", + "0.181519 | \n", + "0.124939 | \n", + "0.226074 | \n", + "0.415771 | \n", + "0.127075 | \n", + "0.237915 | \n", + "0.524902 | \n", + "0.396484 | \n", + "0.342529 | \n", + "0.375488 | \n", + "0.490234 | \n", + "0.118774 | \n", + "0.138428 | \n", + "0.477783 | \n", + "
1 | \n", + "0.262451 | \n", + "0.019638 | \n", + "0.022446 | \n", + "0.130493 | \n", + "0.049530 | \n", + "0.195923 | \n", + "0.418945 | \n", + "-0.256348 | \n", + "-0.112061 | \n", + "-0.033020 | \n", + "0.158081 | \n", + "0.331055 | \n", + "0.199219 | \n", + "-0.065125 | \n", + "0.128540 | \n", + "
2 | \n", + "-0.314697 | \n", + "0.134399 | \n", + "0.049500 | \n", + "-0.228027 | \n", + "0.077454 | \n", + "-0.062683 | \n", + "0.057587 | \n", + "-0.576660 | \n", + "0.051880 | \n", + "-0.189575 | \n", + "-0.151855 | \n", + "-0.245117 | \n", + "-0.053436 | \n", + "-0.451172 | \n", + "-0.274658 | \n", + "
3 | \n", + "0.098145 | \n", + "0.179077 | \n", + "0.499756 | \n", + "0.089600 | \n", + "0.000246 | \n", + "0.063354 | \n", + "0.011215 | \n", + "0.008095 | \n", + "0.014961 | \n", + "-0.099731 | \n", + "-0.213135 | \n", + "0.257812 | \n", + "0.398926 | \n", + "0.274170 | \n", + "0.223389 | \n", + "
4 | \n", + "-0.058105 | \n", + "0.311035 | \n", + "0.307373 | \n", + "0.076355 | \n", + "0.416260 | \n", + "0.398682 | \n", + "-0.243042 | \n", + "0.160278 | \n", + "-0.098389 | \n", + "0.036224 | \n", + "-0.036987 | \n", + "-0.351318 | \n", + "0.334229 | \n", + "0.256836 | \n", + "0.176514 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
763 | \n", + "0.455078 | \n", + "0.499756 | \n", + "0.428711 | \n", + "0.464600 | \n", + "0.513184 | \n", + "0.464600 | \n", + "0.291504 | \n", + "0.306641 | \n", + "0.328613 | \n", + "0.367188 | \n", + "0.238281 | \n", + "0.497070 | \n", + "0.384521 | \n", + "0.507324 | \n", + "0.436523 | \n", + "
764 | \n", + "0.267334 | \n", + "0.156128 | \n", + "0.244629 | \n", + "0.340088 | \n", + "0.174561 | \n", + "0.487549 | \n", + "0.038269 | \n", + "0.058563 | \n", + "0.609375 | \n", + "0.080261 | \n", + "-0.004959 | \n", + "0.007370 | \n", + "0.403564 | \n", + "0.544922 | \n", + "-0.016052 | \n", + "
765 | \n", + "0.260254 | \n", + "0.039520 | \n", + "-0.106445 | \n", + "0.114807 | \n", + "0.439697 | \n", + "0.103333 | \n", + "0.186890 | \n", + "0.013084 | \n", + "0.176880 | \n", + "0.337646 | \n", + "0.200439 | \n", + "-0.355469 | \n", + "0.326660 | \n", + "0.316895 | \n", + "0.338135 | \n", + "
766 | \n", + "0.001040 | \n", + "0.315430 | \n", + "0.045349 | \n", + "0.113403 | \n", + "0.190552 | \n", + "-0.038116 | \n", + "0.299072 | \n", + "0.256348 | \n", + "0.316650 | \n", + "0.420410 | \n", + "0.342529 | \n", + "0.062683 | \n", + "0.035522 | \n", + "0.094238 | \n", + "0.180420 | \n", + "
767 | \n", + "0.117249 | \n", + "0.250244 | \n", + "0.145020 | \n", + "0.096802 | \n", + "0.245605 | \n", + "-0.057373 | \n", + "-0.201904 | \n", + "-0.123169 | \n", + "-0.248047 | \n", + "-0.034363 | \n", + "0.039124 | \n", + "-0.110229 | \n", + "0.125610 | \n", + "0.179321 | \n", + "-0.066345 | \n", + "
768 rows × 15 columns
\n", + "\n", + " | title | \n", + "text | \n", + "url | \n", + "score | \n", + "
---|---|---|---|---|
0 | \n", + "YouTube | \n", + "YouTube announced the project in September 201... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "0.951712 | \n", + "
1 | \n", + "YouTube | \n", + "The mobile version of the site was relaunched ... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "0.948441 | \n", + "
2 | \n", + "YouTube | \n", + "In January 2009, YouTube launched \"YouTube for... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "0.948370 | \n", + "
3 | \n", + "YouTube | \n", + "Later the same year, \"YouTube Feather\" was int... | \n", + "https://en.wikipedia.org/wiki?curid=3524766 | \n", + "0.947532 | \n", + "
4 | \n", + "Twitch (service) | \n", + "On May 18, 2014, \"Variety\" first reported that... | \n", + "https://en.wikipedia.org/wiki?curid=33548254 | \n", + "0.946378 | \n", + "
\n", - " | title | \n", - "text | \n", - "url | \n", - "embedding | \n", - "json_data | \n", - "
---|---|---|---|---|---|
0 | \n", - "Deaths in 2022 | \n", - "The following notable deaths occurred in 2022.... | \n", - "https://en.wikipedia.org/wiki?curid=69407798 | \n", - "[0.2865696847438812, -0.03181683272123337, 0.0... | \n", - "{'title': 'Deaths in 2022', 'text': 'The follo... | \n", - "
1 | \n", - "YouTube | \n", - "YouTube is a global online video sharing and s... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "[-0.09689381718635559, 0.1619211882352829, -0.... | \n", - "{'title': 'YouTube', 'text': 'YouTube is a glo... | \n", - "
2 | \n", - "YouTube | \n", - "In October 2006, YouTube was bought by Google ... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "[0.1302049309015274, 0.265736848115921, 0.4018... | \n", - "{'title': 'YouTube', 'text': 'In October 2006,... | \n", - "
3 | \n", - "YouTube | \n", - "Since its purchase by Google, YouTube has expa... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "[-0.09791257232427597, 0.13586106896400452, -0... | \n", - "{'title': 'YouTube', 'text': 'Since its purcha... | \n", - "
4 | \n", - "YouTube | \n", - "YouTube has had an unprecedented social impact... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "[-0.2641527056694031, 0.06968216598033905, -0.... | \n", - "{'title': 'YouTube', 'text': 'YouTube has had ... | \n", - "
\n", - " | When was YouTube officially launched, and by whom? | \n", - "What is YouTube's slogan introduced after Google's acquisition? | \n", - "How many hours of videos are collectively watched on YouTube daily? | \n", - "Which was the first video uploaded to YouTube, and when was it uploaded? | \n", - "What was the acquisition cost of YouTube by Google, and when was the deal finalized? | \n", - "What was the first YouTube video to reach one million views, and when did it happen? | \n", - "What are the three separate branches of the United States government? | \n", - "Which country has the highest documented incarceration rate and prison population? | \n", - "How many executions have occurred in the United States since 1977, and which countries have more? | \n", - "What percentage of the global military spending did the United States account for in 2019? | \n", - "How is the U.S. president elected? | \n", - "What cooling system innovation was included in the proposed venues for the World Cup in Qatar? | \n", - "What lawsuit was filed against Google in June 2020, and what was it about? | \n", - "How much was Google fined by CNIL in January 2022, and for what reason? | \n", - "When did YouTube join the NSA's PRISM program, according to reports? | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "0.184692 | \n", - "0.181519 | \n", - "0.124939 | \n", - "0.226074 | \n", - "0.415771 | \n", - "0.127075 | \n", - "0.237915 | \n", - "0.524902 | \n", - "0.396484 | \n", - "0.342529 | \n", - "0.375488 | \n", - "0.490234 | \n", - "0.118774 | \n", - "0.138428 | \n", - "0.477783 | \n", - "
1 | \n", - "0.262451 | \n", - "0.019638 | \n", - "0.022446 | \n", - "0.130493 | \n", - "0.049530 | \n", - "0.195923 | \n", - "0.418945 | \n", - "-0.256348 | \n", - "-0.112061 | \n", - "-0.033020 | \n", - "0.158081 | \n", - "0.331055 | \n", - "0.199219 | \n", - "-0.065125 | \n", - "0.128540 | \n", - "
2 | \n", - "-0.314697 | \n", - "0.134399 | \n", - "0.049500 | \n", - "-0.228027 | \n", - "0.077454 | \n", - "-0.062683 | \n", - "0.057587 | \n", - "-0.576660 | \n", - "0.051880 | \n", - "-0.189575 | \n", - "-0.151855 | \n", - "-0.245117 | \n", - "-0.053436 | \n", - "-0.451172 | \n", - "-0.274658 | \n", - "
3 | \n", - "0.098145 | \n", - "0.179077 | \n", - "0.499756 | \n", - "0.089600 | \n", - "0.000246 | \n", - "0.063354 | \n", - "0.011215 | \n", - "0.008095 | \n", - "0.014961 | \n", - "-0.099731 | \n", - "-0.213135 | \n", - "0.257812 | \n", - "0.398926 | \n", - "0.274170 | \n", - "0.223389 | \n", - "
4 | \n", - "-0.058105 | \n", - "0.311035 | \n", - "0.307373 | \n", - "0.076355 | \n", - "0.416260 | \n", - "0.398682 | \n", - "-0.243042 | \n", - "0.160278 | \n", - "-0.098389 | \n", - "0.036224 | \n", - "-0.036987 | \n", - "-0.351318 | \n", - "0.334229 | \n", - "0.256836 | \n", - "0.176514 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
763 | \n", - "0.455078 | \n", - "0.499756 | \n", - "0.428711 | \n", - "0.464600 | \n", - "0.513184 | \n", - "0.464600 | \n", - "0.291504 | \n", - "0.306641 | \n", - "0.328613 | \n", - "0.367188 | \n", - "0.238281 | \n", - "0.497070 | \n", - "0.384521 | \n", - "0.507324 | \n", - "0.436523 | \n", - "
764 | \n", - "0.267334 | \n", - "0.156128 | \n", - "0.244629 | \n", - "0.340088 | \n", - "0.174561 | \n", - "0.487549 | \n", - "0.038269 | \n", - "0.058563 | \n", - "0.609375 | \n", - "0.080261 | \n", - "-0.004959 | \n", - "0.007370 | \n", - "0.403564 | \n", - "0.544922 | \n", - "-0.016052 | \n", - "
765 | \n", - "0.260254 | \n", - "0.039520 | \n", - "-0.106445 | \n", - "0.114807 | \n", - "0.439697 | \n", - "0.103333 | \n", - "0.186890 | \n", - "0.013084 | \n", - "0.176880 | \n", - "0.337646 | \n", - "0.200439 | \n", - "-0.355469 | \n", - "0.326660 | \n", - "0.316895 | \n", - "0.338135 | \n", - "
766 | \n", - "0.001040 | \n", - "0.315430 | \n", - "0.045349 | \n", - "0.113403 | \n", - "0.190552 | \n", - "-0.038116 | \n", - "0.299072 | \n", - "0.256348 | \n", - "0.316650 | \n", - "0.420410 | \n", - "0.342529 | \n", - "0.062683 | \n", - "0.035522 | \n", - "0.094238 | \n", - "0.180420 | \n", - "
767 | \n", - "0.117249 | \n", - "0.250244 | \n", - "0.145020 | \n", - "0.096802 | \n", - "0.245605 | \n", - "-0.057373 | \n", - "-0.201904 | \n", - "-0.123169 | \n", - "-0.248047 | \n", - "-0.034363 | \n", - "0.039124 | \n", - "-0.110229 | \n", - "0.125610 | \n", - "0.179321 | \n", - "-0.066345 | \n", - "
768 rows × 15 columns
\n", - "\n", - " | title | \n", - "text | \n", - "url | \n", - "score | \n", - "
---|---|---|---|---|
0 | \n", - "YouTube | \n", - "YouTube announced the project in September 201... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "0.951712 | \n", - "
1 | \n", - "YouTube | \n", - "The mobile version of the site was relaunched ... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "0.948441 | \n", - "
2 | \n", - "YouTube | \n", - "In January 2009, YouTube launched \"YouTube for... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "0.948370 | \n", - "
3 | \n", - "YouTube | \n", - "Later the same year, \"YouTube Feather\" was int... | \n", - "https://en.wikipedia.org/wiki?curid=3524766 | \n", - "0.947532 | \n", - "
4 | \n", - "Twitch (service) | \n", - "On May 18, 2014, \"Variety\" first reported that... | \n", - "https://en.wikipedia.org/wiki?curid=33548254 | \n", - "0.946378 | \n", - "