run-llama · jerryjliu · Aug 21, 2023 · Jul 27, 2023 · Jul 31, 2023 · Aug 1, 2023
diff --git a/llama_hub/airbyte_cdk/.gitignore b/llama_hub/airbyte_cdk/.gitignore
@@ -0,0 +1 @@
+test.py
diff --git a/llama_hub/airbyte_cdk/README.md b/llama_hub/airbyte_cdk/README.md
@@ -0,0 +1,42 @@
+# Airbyte CDK Loader
+
+The Airbyte CDK Loader is a shim for sources creating using the [Airbyte Python CDK](https://docs.airbyte.com/connector-development/cdk-python/). It allows you to load data from any Airbyte source into LlamaIndex.
+
+## Installation
+
+* Install llama_hub: `pip install llama_hub`
+* Install airbyte-cdk: `pip install airbyte-cdk`
+* Install a source via git (or implement your own): `pip install git+https://github.com/airbytehq/airbyte.git@master#egg=source_github&subdirectory=airbyte-integrations/connectors/source-github`
+
+## Usage
+
+Here's an example usage of the AirbyteCdkReader.
+
+```python
+from llama_index import download_loader
+from llama_hub.airbyte_cdk.base import AirbyteCDKReader
+from source_github.source import SourceGithub
+
+
+github_config = {
+    # ...
+}
+reader = AirbyteCDKReader(source_class=SourceGithub,config=github_config)
+documents = reader.load_data(stream_name="issues")
+```
+
+By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader.
+
+## Incremental loads
+
+If a stream supports it, this loader can be used to load data incrementally (only returning documents that weren't loaded last time or got updated in the meantime):
+```python
+
+reader = AirbyteCDKReader(source_class=SourceGithub,config=github_config)
+documents = reader.load_data(stream_name="issues")
+current_state = reader.last_state # can be pickled away or stored otherwise
+
+updated_documents = reader.load_data(stream_name="issues", state=current_state) # only loads documents that were updated since last time
+```
+
+This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
diff --git a/llama_hub/airbyte_cdk/__init__.py b/llama_hub/airbyte_cdk/__init__.py
diff --git a/llama_hub/airbyte_cdk/base.py b/llama_hub/airbyte_cdk/base.py
@@ -0,0 +1,33 @@
+from typing import Any, List, Mapping, Optional
+
+from llama_index.readers.base import BaseReader
+from llama_index.readers.schema.base import Document
+from airbyte_protocol.models.airbyte_protocol import AirbyteRecordMessage
+from airbyte_cdk.sources.embedded.base_integration import BaseEmbeddedIntegration
+from airbyte_cdk.sources.embedded.runner import CDKRunner
+
+
+class AirbyteCDKReader(BaseReader, BaseEmbeddedIntegration):
+    """AirbyteCDKReader reader.
+
+    Retrieve documents from an Airbyte source implemented using the CDK.
+
+    Args:
+        source_class: The Airbyte source class.
+        config: The config object for the Airbyte source.
+    """
+
+    def __init__(
+        self,
+        source_class: Any,
+        config: Mapping[str, Any],
+    ) -> None:
+        """Initialize with parameters."""
+
+        super().__init__(config=config, runner=CDKRunner(source=source_class(), name=source_class.__name__))
+
+    def _handle_record(self, record: AirbyteRecordMessage, id: Optional[str]) -> Document:
+        return Document(doc_id=id,text="", extra_info=record.data)
+
+    def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
+        return list(self._load_data(*args, **load_kwargs))
diff --git a/llama_hub/airbyte_cdk/requirements.txt b/llama_hub/airbyte_cdk/requirements.txt
@@ -0,0 +1,2 @@
+airbyte-cdk
+airbyte-protocol-models
diff --git a/llama_hub/airbyte_hubspot/.gitignore b/llama_hub/airbyte_hubspot/.gitignore
@@ -0,0 +1 @@
+test.py
diff --git a/llama_hub/airbyte_hubspot/README.md b/llama_hub/airbyte_hubspot/README.md
@@ -0,0 +1,54 @@
+# Airbyte Hubspot Loader
+
+The Airbyte Hubspot Loader allows you to access different Hubspot objects.
+
+## Installation
+
+* Install llama_hub: `pip install llama_hub`
+* Install the hubspot source: `pip install source_hubspot`
+
+## Usage
+
+Here's an example usage of the AirbyteHubspotReader.
+
+```python
+from llama_hub.airbyte_hubspot.base import AirbyteHubspotReader
+
+hubspot_config = {
+    # ...
+}
+reader = AirbyteHubspotReader(config=hubspot_config)
+documents = reader.load_data(stream_name="products")
+```
+
+## Configuration
+
+Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/hubspot/) for details about how to configure the reader.
+The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.yaml).
+
+The general shape looks like this:
+```python
+{
+  "start_date": "<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>",
+  "credentials": {
+    "credentials_title": "Private App Credentials",
+    "access_token": "<access token of your private app>"
+  }
+}
+```
+
+By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader.
+
+## Incremental loads
+
+This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime):
+```python
+
+reader = AirbyteHubspotReader(...so many things...)
+documents = reader.load_data(stream_name="products")
+current_state = reader.last_state # can be pickled away or stored otherwise
+
+updated_documents = reader.load_data(stream_name="products", state=current_state) # only loads documents that were updated since last time
+```
+
+This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
diff --git a/llama_hub/airbyte_hubspot/__init__.py b/llama_hub/airbyte_hubspot/__init__.py
diff --git a/llama_hub/airbyte_hubspot/base.py b/llama_hub/airbyte_hubspot/base.py
@@ -0,0 +1,21 @@
+from typing import Any, Mapping
+from llama_hub.airbyte_cdk.base import AirbyteCDKReader
+
+
+class AirbyteHubspotReader(AirbyteCDKReader):
+    """AirbyteHubspotReader reader.
+
+    Retrieve documents from Hubspot
+
+    Args:
+        config: The config object for the hubspot source.
+    """
+
+    def __init__(
+        self,
+        config: Mapping[str, Any],
+    ) -> None:
+        """Initialize with parameters."""
+        import source_hubspot
+
+        super().__init__(source_class=source_hubspot.SourceHubspot, config=config)
diff --git a/llama_hub/airbyte_hubspot/requirements.txt b/llama_hub/airbyte_hubspot/requirements.txt
@@ -0,0 +1 @@
+source_hubspot
diff --git a/llama_hub/airbyte_salesforce/.gitignore b/llama_hub/airbyte_salesforce/.gitignore
@@ -0,0 +1 @@
+test.py
diff --git a/llama_hub/airbyte_salesforce/README.md b/llama_hub/airbyte_salesforce/README.md
@@ -0,0 +1,59 @@
+# Airbyte Salesforce Loader
+
+The Airbyte Salesforce Loader allows you to access different Salesforce objects.
+
+## Installation
+
+* Install llama_hub: `pip install llama_hub`
+* Install the salesforce source: `pip install source_salesforce`
+
+## Usage
+
+Here's an example usage of the AirbyteSalesforceReader.
+
+```python
+from llama_hub.airbyte_salesforce.base import AirbyteSalesforceReader
+
+salesforce_config = {
+    # ...
+}
+reader = AirbyteSalesforceReader(config=salesforce_config)
+documents = reader.load_data(stream_name="asset")
+```
+
+## Configuration
+
+Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/salesforce/) for details about how to configure the reader.
+The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.yaml).
+
+The general shape looks like this:
+```python
+{
+  "client_id": "<oauth client id>",
+  "client_secret": "<oauth client secret>",
+  "refresh_token": "<oauth refresh token>",
+  "start_date": "<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>",
+  "is_sandbox": False, # set to True if you're using a sandbox environment
+  "streams_criteria": [ # Array of filters for salesforce objects that should be loadable
+    {"criteria": "exacts", "value": "Account"}, # Exact name of salesforce object
+    {"criteria": "starts with", "value": "Asset"}, # Prefix of the name
+    # Other allowed criteria: ends with, contains, starts not with, ends not with, not contains, not exacts
+  ],
+}
+```
+
+By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader.
+
+## Incremental loads
+
+This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime):
+```python
+
+reader = AirbyteSalesforceReader(...so many things...)
+documents = reader.load_data(stream_name="asset")
+current_state = reader.last_state # can be pickled away or stored otherwise
+
+updated_documents = reader.load_data(stream_name="asset", state=current_state) # only loads documents that were updated since last time
+```
+
+This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
diff --git a/llama_hub/airbyte_salesforce/__init__.py b/llama_hub/airbyte_salesforce/__init__.py
diff --git a/llama_hub/airbyte_salesforce/base.py b/llama_hub/airbyte_salesforce/base.py
@@ -0,0 +1,21 @@
+from typing import Any, Mapping
+from llama_hub.airbyte_cdk.base import AirbyteCDKReader
+
+
+class AirbyteSalesforceReader(AirbyteCDKReader):
+    """AirbyteSalesforceReader reader.
+
+    Retrieve documents from Salesforce 
+
+    Args:
+        config: The config object for the salesforce source.
+    """
+
+    def __init__(
+        self,
+        config: Mapping[str, Any],
+    ) -> None:
+        """Initialize with parameters."""
+        import source_salesforce
+
+        super().__init__(source_class=source_salesforce.SourceSalesforce, config=config)
diff --git a/llama_hub/airbyte_salesforce/requirements.txt b/llama_hub/airbyte_salesforce/requirements.txt
@@ -0,0 +1 @@
+airbyte-source-salesforce
diff --git a/llama_hub/airbyte_shopify/.gitignore b/llama_hub/airbyte_shopify/.gitignore
@@ -0,0 +1 @@
+test.py
diff --git a/llama_hub/airbyte_shopify/README.md b/llama_hub/airbyte_shopify/README.md
@@ -0,0 +1,55 @@
+# Airbyte Shopify Loader
+
+The Airbyte Shopify Loader allows you to access different Shopify objects.
+
+## Installation
+
+* Install llama_hub: `pip install llama_hub`
+* Install the shopify source: `pip install source_shopify`
+
+## Usage
+
+Here's an example usage of the AirbyteShopifyReader.
+
+```python
+from llama_hub.airbyte_shopify.base import AirbyteShopifyReader
+
+shopify_config = {
+    # ...
+}
+reader = AirbyteShopifyReader(config=shopify_config)
+documents = reader.load_data(stream_name="orders")
+```
+
+## Configuration
+
+Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/shopify/) for details about how to configure the reader.
+The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json).
+
+The general shape looks like this:
+```python
+{
+    "start_date": "<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>",
+    "shop": "<name of the shop you want to retrieve documents from>",
+    "credentials": {
+        "auth_method": "api_password",
+        "api_password": "<your api password>"
+    }
+}
+```
+
+By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader.
+
+## Incremental loads
+
+This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime):
+```python
+
+reader = AirbyteShopifyReader(...so many things...)
+documents = reader.load_data(stream_name="orders")
+current_state = reader.last_state # can be pickled away or stored otherwise
+
+updated_documents = reader.load_data(stream_name="orders", state=current_state) # only loads documents that were updated since last time
+```
+
+This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
diff --git a/llama_hub/airbyte_shopify/__init__.py b/llama_hub/airbyte_shopify/__init__.py
diff --git a/llama_hub/airbyte_shopify/base.py b/llama_hub/airbyte_shopify/base.py
@@ -0,0 +1,21 @@
+from typing import Any, Mapping
+from llama_hub.airbyte_cdk.base import AirbyteCDKReader
+
+
+class AirbyteShopifyReader(AirbyteCDKReader):
+    """AirbyteShopifyReader reader.
+
+    Retrieve documents from Shopify 
+
+    Args:
+        config: The config object for the shopify source.
+    """
+
+    def __init__(
+        self,
+        config: Mapping[str, Any],
+    ) -> None:
+        """Initialize with parameters."""
+        import source_shopify
+
+        super().__init__(source_class=source_shopify.SourceShopify, config=config)
diff --git a/llama_hub/airbyte_shopify/requirements.txt b/llama_hub/airbyte_shopify/requirements.txt
@@ -0,0 +1 @@
+airbyte-source-shopify
diff --git a/llama_hub/airbyte_stripe/.gitignore b/llama_hub/airbyte_stripe/.gitignore
@@ -0,0 +1 @@
+test.py
diff --git a/llama_hub/airbyte_stripe/README.md b/llama_hub/airbyte_stripe/README.md
@@ -0,0 +1,53 @@
+# Airbyte Stripe Loader
+
+The Airbyte Stripe Loader allows you to access different Stripe objects.
+
+## Installation
+
+* Install llama_hub: `pip install llama_hub`
+* Install the stripe source: `pip install source_stripe`
+
+## Usage
+
+Here's an example usage of the AirbyteStripeReader.
+
+```python
+from llama_hub.airbyte_stripe.base import AirbyteStripeReader
+
+stripe_config = {
+    # ...
+}
+reader = AirbyteStripeReader(config=stripe_config)
+documents = reader.load_data(stream_name="invoices")
+```
+
+## Configuration
+
+Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/stripe/) for details about how to configure the reader.
+The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml).
+
+The general shape looks like this:
+```python
+
+{
+  "client_secret": "<secret key>",
+  "account_id": "<account id>",
+  "start_date": "<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>",
+}
+```
+
+By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader.
+
+## Incremental loads
+
+This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime):
+```python
+
+reader = AirbyteStripeReader(...so many things...)
+documents = reader.load_data(stream_name="invoices")
+current_state = reader.last_state # can be pickled away or stored otherwise
+
+updated_documents = reader.load_data(stream_name="invoices", state=current_state) # only loads documents that were updated since last time
+```
+
+This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
diff --git a/llama_hub/airbyte_stripe/__init__.py b/llama_hub/airbyte_stripe/__init__.py