Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
vertefra committed Oct 18, 2024
1 parent 862192d commit 9a19162
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 5 deletions.
48 changes: 48 additions & 0 deletions qcog_python_client/qcog/_base64utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import base64
import csv
import gzip
import io
import json
from typing import TypedDict
Expand Down Expand Up @@ -77,6 +79,7 @@ def encode_base64(data: pd.DataFrame) -> str:
"""
indexing: list[int] = list(range(data.index.nlevels))

raw_string: str = data.to_csv()
payload: DataFramePayload = DataFramePayload(
blob=raw_string,
Expand All @@ -86,3 +89,48 @@ def encode_base64(data: pd.DataFrame) -> str:
base64_bytes = base64.b64encode(raw_bytes)
base64_string = base64_bytes.decode("ascii")
return base64_string


def compress_data(data: pd.DataFrame) -> io.BytesIO:
"""Compress a pandas dataframe.
Take a normal pandas dataframe and compress as
gzip "string" of csv export
It adds the indexing of the columns as a csv.
It uses a separator "---" to separate the csv and the indexing.
Parameters
----------
data: pd.DataFrame
dataframe to compress
Returns
-------
str: compressed gzip string
"""
indexing: list[int] = list(range(data.index.nlevels))

# Transform the indexing into a csv
indexing_csv = io.StringIO()

writer = csv.writer(indexing_csv)
writer.writerow(indexing)
indexing_csv.seek(0)

data_csv: str = data.to_csv()

# raw_bytes: bytes = json.dumps(payload).encode("ascii")
# base64_bytes = base64.b64encode(raw_bytes)
# base64_string = base64_bytes.decode("ascii")

buffer = io.BytesIO()

with gzip.GzipFile(fileobj=buffer, mode="wb") as f:
f.write(data_csv.encode())
f.write(b"\n---\n")
f.write(indexing_csv.getvalue().encode())

buffer.seek(0)

return buffer
8 changes: 3 additions & 5 deletions qcog_python_client/qcog/_data_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
to support multi part uploads or other types of uploads.
"""

import gzip

import aiohttp
from pandas.core.api import DataFrame as DataFrame

from qcog_python_client.qcog._base64utils import encode_base64
from qcog_python_client.qcog._base64utils import compress_data, encode_base64
from qcog_python_client.qcog._interfaces import IDataClient, IRequestClient
from qcog_python_client.schema import DatasetPayload

Expand Down Expand Up @@ -38,7 +36,7 @@ async def stream_data(
data: DataFrame,
*,
dataset_id: str,
encoding: str = "gzipBase64",
encoding: str = "gzip",
) -> dict:
"""Stream data to the server.
Expand All @@ -60,7 +58,7 @@ async def stream_data(
url = f"{base_url}/dataset/upload?dataset_id={dataset_id}&format=dataframe&source=client&encoding={encoding}" # noqa: E501

# Zip gzip the data
zip_data = gzip.compress(encode_base64(data).encode())
zip_data = compress_data(data)

form = aiohttp.FormData()
form.add_field(
Expand Down

0 comments on commit 9a19162

Please sign in to comment.