-
Notifications
You must be signed in to change notification settings - Fork 0
/
dictionaries.py
206 lines (161 loc) · 6.62 KB
/
dictionaries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# dictionaries.py
import random
import string
from logger import log_this
import logging
import numpy as np
import pandas as pd
import polars as pl
logger = log_this(__name__, level=logging.WARNING)
# Default error messages
LIST_ERROR = "Sample data must be a list."
TUPLE_ERROR = "Each item in sample data must be a tuple."
TUPLE_LENGTH_ERROR = "Each tuple in sample data must contain exactly 2 elements."
def generate_sample_data(size, max_list_length):
"""
Generates sample data for a Python dictionary.
:param size: Number of keys in the dictionary.
:param max_list_length: Maximum length of the list for each key.
:return: List of tuples (key, value) where key is a string and value is an integer.
"""
if size <= 0 or max_list_length <= 0:
logger.error("Size and max list length must be non-negative, and non-zero.")
raise ValueError("Size and max list length must be non-negative, and non-zero.")
try:
logger.info("Generating sample data.")
sample_data = []
for _ in range(size):
key = "".join(
random.choice(string.ascii_lowercase) for _ in range(3)
) # Generate a random 3-letter string
if max_list_length > 0:
for _ in range(random.randint(1, max_list_length)):
# Generate a random integer between 1 and 100
value = random.randint(1, 100)
sample_data.append((key, value))
else:
# If max_list_length is 0, no values are added for this key
sample_data.append((key, []))
logger.debug(sample_data)
except Exception as sample_data_error:
logger.error("Error generating sample data: " + str(sample_data_error))
raise sample_data_error
logger.info("Successfully generated sample data.")
logger.debug("Sample Data: " + str(sample_data))
return sample_data
def convert_to_dictionary(sample_data):
"""
Converts sample data to a Python dictionary.
:param sample_data: List of tuples (key, value) where key is a string and value is an integer.
:return: Python dictionary.
"""
if not isinstance(sample_data, list):
logger.error(LIST_ERROR)
raise TypeError(LIST_ERROR)
try:
logger.info("Converting sample data to a Python dictionary.")
dictionary = {}
for item in sample_data:
if not isinstance(item, tuple):
logger.error(TUPLE_ERROR)
raise TypeError(TUPLE_ERROR)
if len(item) != 2:
logger.error(TUPLE_LENGTH_ERROR)
raise ValueError(TUPLE_LENGTH_ERROR)
key, value = item
if key in dictionary:
dictionary[key].append(value)
else:
dictionary[key] = [value]
except Exception as convert_to_dictionary_error:
logger.error(
"Error converting sample data to a Python dictionary: "
+ str(convert_to_dictionary_error)
)
raise convert_to_dictionary_error
logger.info("Successfully converted sample data to a Python dictionary.")
logger.debug("Python dictionary: " + str(dictionary))
return dictionary
def convert_to_numpy_array(sample_data):
"""
Converts sample data to a NumPy array.
:param sample_data: List of tuples where each tuple represents a row in the array.
:return: NumPy array.
"""
if not isinstance(sample_data, list):
logger.error(LIST_ERROR)
raise TypeError(LIST_ERROR)
try:
logger.info("Converting sample data to a NumPy array.")
if not sample_data:
# Return an empty array equivalent to np.array([])
return np.array([])
# Verify that all tuples have the same length
if len(set(len(item) for item in sample_data if isinstance(item, tuple))) != 1:
logger.error(TUPLE_ERROR)
raise ValueError(TUPLE_ERROR)
# Convert to NumPy array
np_array = np.array(sample_data)
except Exception as convert_to_numpy_array_error:
logger.error(
"Error converting sample data to a NumPy array: "
+ str(convert_to_numpy_array_error)
)
raise convert_to_numpy_array_error
logger.info("Successfully converted sample data to a NumPy array.")
logger.debug("Numpy array: " + str(np_array))
return np_array
def convert_to_pandas_df(sample_data):
"""
Converts sample data to a Pandas DataFrame.
:param sample_data: List of tuples where each tuple represents a row in the DataFrame.
:return: Pandas DataFrame.
"""
if not isinstance(sample_data, list):
logger.error(LIST_ERROR)
raise TypeError(LIST_ERROR)
try:
logger.info("Converting sample data to a Pandas DataFrame.")
if not all(isinstance(item, tuple) and len(item) == 2 for item in sample_data):
logger.error(TUPLE_ERROR)
raise ValueError(TUPLE_ERROR)
# Creating DataFrame from sample_data
df = pd.DataFrame(sample_data, columns=["Key", "Value"])
except Exception as convert_to_pandas_df_error:
logger.error(
"Error converting sample data to a Pandas DataFrame: "
+ str(convert_to_pandas_df_error)
)
raise convert_to_pandas_df_error
logger.info("Successfully converted sample data to a Pandas DataFrame.")
logger.debug("Pandas DataFrame: " + str(df))
return df
def convert_to_polars_df(sample_data):
"""
Converts sample data to a Polars DataFrame.
:param sample_data: List of tuples where each tuple represents a row in the DataFrame.
:return: Polars DataFrame.
"""
if not isinstance(sample_data, list):
logger.error(LIST_ERROR)
raise TypeError(LIST_ERROR)
try:
logger.info("Converting sample data to a Polars DataFrame.")
if not sample_data:
# Return an empty DataFrame
return pl.DataFrame()
# Verify that all tuples have the same length
if len(set(len(item) for item in sample_data if isinstance(item, tuple))) != 1:
logger.error(TUPLE_LENGTH_ERROR)
raise ValueError(TUPLE_LENGTH_ERROR)
# Convert to Polars DataFrame
df = pl.DataFrame(sample_data)
except Exception as convert_to_polars_df_error:
logger.error(
"Error converting sample data to a Polars DataFrame: "
+ str(convert_to_polars_df_error)
)
raise convert_to_polars_df_error
logger.info("Successfully converted sample data to a Polars DataFrame.")
logger.debug("Polars DataFrame: " + str(df))
return df