Skip to content

Commit

Permalink
black
Browse files Browse the repository at this point in the history
  • Loading branch information
jacgoldsm committed Oct 7, 2023
1 parent 7e8c5bf commit 1bb4634
Show file tree
Hide file tree
Showing 15 changed files with 1,502 additions and 2,114 deletions.
50 changes: 30 additions & 20 deletions osos/OsosSession.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,27 @@ def createDataFrame(data, schema=None) -> DataFrame:
schema has to be a list or a simple string"""
)
schema = _parse_schema(schema)
if isinstance(data, (dict,tuple)):
if isinstance(data, (dict, tuple)):
data = [data]

if isinstance(data, pd.DataFrame):
return DataFrame(data)

if isinstance(data[0], dict):
warnings.warn("This is the Spark way of defining a DataFrame, not the Pandas way. Each "
"dict represents a row, not the whole data")
warnings.warn(
"This is the Spark way of defining a DataFrame, not the Pandas way. Each "
"dict represents a row, not the whole data"
)

if isinstance(data, list):
# either a list of tuples or a list of dicts.
# either way, handled by pd.DataFrame.from_records
return DataFrame(pd.DataFrame.from_records(data, columns = schema))
return DataFrame(pd.DataFrame.from_records(data, columns=schema))
else:
raise AnalysisException("Data must be a dict, tuple, pandas DataFrame, list[dict], or list[tuple]")
raise AnalysisException(
"Data must be a dict, tuple, pandas DataFrame, list[dict], or list[tuple]"
)


def _parse_schema(schema):
if isinstance(schema, str):
Expand All @@ -43,21 +48,27 @@ def _parse_schema(schema):
schema = "".join(schema.split()).split(",")
cols = []
for elem in schema:
cols.append(re.search('(.*):', elem).group(1))
cols.append(re.search("(.*):", elem).group(1))
return cols
elif isinstance(schema,list):
elif isinstance(schema, list):
return schema
else:
raise TypeError("schema must be str or list")


def range(start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None):
def range(
start: int,
end: Optional[int] = None,
step: int = 1,
numSlices: Optional[int] = None,
):
import numpy as np

if end is None:
end = start
start = 0

return DataFrame(pd.DataFrame({"id":np.arange(start,end,step)}))
return DataFrame(pd.DataFrame({"id": np.arange(start, end, step)}))


class read:
Expand All @@ -67,12 +78,13 @@ def csv(path, *args, **kwargs):
def text(path, *args, **kwargs):
return pd.read_csv(path, *args, **kwargs)


def _test():
pd_data = pd.DataFrame({"a":[1,2,3], "b":[4,5,6]})
tuple_data = (1,2)
dict_data = {"a":1, "b":2}
list_tuple_data = [(1,2), (3,4)]
list_dict_data = [{"a":1, "b":2}, {"a":3, "b":4}]
pd_data = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
tuple_data = (1, 2)
dict_data = {"a": 1, "b": 2}
list_tuple_data = [(1, 2), (3, 4)]
list_dict_data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
str_schema_one = "ai bi"
str_schema_two = "ai: int, bi:int"
list_schema = ["ai", "bi"]
Expand All @@ -85,11 +97,11 @@ def _test():
print()
print(createDataFrame(pd_data))
print()
print(createDataFrame(pd_data,str_schema_one))
print(createDataFrame(pd_data, str_schema_one))
print()
print(createDataFrame(tuple_data))
print()
print(createDataFrame(tuple_data,str_schema_two))
print(createDataFrame(tuple_data, str_schema_two))
print()
print(createDataFrame(list_tuple_data, str_schema_one))
print()
Expand All @@ -98,7 +110,5 @@ def _test():
print(createDataFrame(dict_data))


if __name__ == '__main__':
if __name__ == "__main__":
_test()


Loading

0 comments on commit 1bb4634

Please sign in to comment.