-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
79aaeef
commit 1cf462c
Showing
22 changed files
with
1,011 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import csv | ||
|
||
table = "COVID" | ||
csv_files = ["data1.csv","data2.csv","data3.csv","data4.csv","data5.csv"] | ||
|
||
for csv_filename in csv_files: | ||
sql_filename = csv_filename.split('.')[0] + ".sql" | ||
sql_file = open(sql_filename, "w") | ||
sql_file.close() | ||
sql_file = open(sql_filename, "a") | ||
sql_file.write("begin;\n") | ||
sql_file.write("delete from " + table + ";\n") | ||
|
||
with open(csv_filename, 'r') as csvfile: | ||
csvreader = csv.reader(csvfile) | ||
header = next(csvreader) | ||
for row in csvreader: | ||
sql_file.write("insert into " + table + " values (") | ||
n = len(row) | ||
for i in range(n): | ||
if row[i] in ["NULL", ""]: | ||
sql_file.write("NULL") | ||
else: | ||
sql_file.write("'" + row[i] + "'") | ||
if i < n-1: | ||
sql_file.write(", ") | ||
sql_file.write(");\n") | ||
|
||
sql_file.write("end;\n") | ||
sql_file.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import psycopg2, config, time | ||
|
||
table = "COVID" | ||
csv_files = ["data1.csv","data2.csv","data3.csv","data4.csv","data5.csv"] | ||
|
||
for file in csv_files: | ||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
sql = "delete from " + table + ";" | ||
cur.execute(sql) | ||
cur.close() | ||
conn.commit() | ||
conn.close() | ||
t1 = time.time() | ||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
sql = "copy " + table + " from STDIN csv header;" | ||
f = open(file, "r") | ||
cur.copy_expert(sql, f) | ||
cur.close() | ||
conn.commit() | ||
conn.close() | ||
t2 = time.time() | ||
print(file, " ", t2-t1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import psycopg2, config, csv, time | ||
|
||
table = "COVID" | ||
csv_files = ["data1.csv","data2.csv","data3.csv","data4.csv","data5.csv"] | ||
|
||
for file in csv_files: | ||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
sql = "delete from " + table + ";" | ||
cur.execute(sql) | ||
cur.close() | ||
conn.commit() | ||
conn.close() | ||
t1 = time.time() | ||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
|
||
with open(file, 'r') as csvfile: | ||
csvreader = csv.reader(csvfile) | ||
header = next(csvreader) | ||
|
||
for row in csvreader: | ||
sql = "insert into " + table + " values (" | ||
n = len(row) | ||
for i in range(n): | ||
sql += "'" + row[i] + "'" | ||
if i < n-1: | ||
sql += ", " | ||
sql += ");" | ||
cur.execute(sql) | ||
|
||
cur.close() | ||
conn.commit() | ||
conn.close() | ||
t2 = time.time() | ||
print(file, " ", t2-t1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import psycopg2, config, csv, time | ||
|
||
table = "COVID" | ||
csv_files = ["data6.csv","data7.csv","data8.csv","data9.csv","data10.csv"] | ||
|
||
for file in csv_files: | ||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
sql = "delete from " + table + ";" | ||
cur.execute(sql) | ||
cur.close() | ||
conn.commit() | ||
conn.close() | ||
t1 = time.time() | ||
|
||
with open(file, 'r') as csvfile: | ||
csvreader = csv.reader(csvfile) | ||
header = next(csvreader) | ||
|
||
for row in csvreader: | ||
sql = "insert into " + table + " values (" | ||
n = len(row) | ||
for i in range(n): | ||
sql += "'" + row[i] + "'" | ||
if i < n-1: | ||
sql += ", " | ||
sql += ");" | ||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
cur.execute(sql) | ||
cur.close() | ||
conn.commit() | ||
conn.close() | ||
|
||
t2 = time.time() | ||
print(file, " ", t2-t1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import psycopg2, config, time | ||
from matplotlib import pyplot as plt | ||
|
||
table = "COVID" | ||
num_rows = 100 | ||
x = [] | ||
y = [] | ||
|
||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
|
||
for i in range(100000//num_rows): | ||
sql = "select * from " + table + " limit " + str(num_rows) + " offset " + str(i*num_rows) + ";" | ||
t1 = time.time() | ||
cur.execute(sql) | ||
rows = cur.fetchall() | ||
t2 = time.time() | ||
t = t2-t1 | ||
if t > 0: | ||
x += [i+1] | ||
y += [t] | ||
|
||
bin = 40 | ||
x = [sum(x[i:i+bin])/bin for i in range(0,len(x),bin)] | ||
y = [sum(y[i:i+bin])/bin for i in range(0,len(y),bin)] | ||
|
||
cur.close() | ||
conn.close() | ||
plt.plot(x,y) | ||
plt.xlabel("iteration") | ||
plt.ylabel("time (s)") | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import psycopg2, config, time | ||
from matplotlib import pyplot as plt | ||
|
||
table = "COVID" | ||
num_rows = 100 | ||
x = [] | ||
y = [] | ||
|
||
conn = psycopg2.connect(database=config.name, user=config.user, password=config.pswd, host=config.host, port=config.port) | ||
cur = conn.cursor() | ||
|
||
sql = "select * from " + table + ";" | ||
cur.execute(sql) | ||
|
||
for i in range(100000//num_rows): | ||
t1 = time.time() | ||
rows = cur.fetchmany(num_rows) | ||
t2 = time.time() | ||
t = t2-t1 | ||
if t > 0 or True: | ||
x += [i+1] | ||
y += [t] | ||
|
||
bin = 40 | ||
x = [sum(x[i:i+bin])/bin for i in range(0,len(x),bin)] | ||
y = [sum(y[i:i+bin])/bin for i in range(0,len(y),bin)] | ||
|
||
cur.close() | ||
conn.close() | ||
plt.plot(x,y) | ||
plt.xlabel("iteration") | ||
plt.ylabel("time (s)") | ||
plt.show() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Benchmarking different methods in PostgreSQL | ||
|
||
* used psycopg2 library to connect to database via python | ||
* comparing bulk loading with individual INSERT statements wrt time-taken | ||
* using CURSORS to reduce fetch time of bulk queries |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
name = "lab4db" | ||
user = "postgres" | ||
pswd = "1234" | ||
host = "127.0.0.1" | ||
port = "5432" |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Graph Databases using Neo4j | ||
|
||
* loading Twitter dataset (in CSV format) using Python | ||
* querying the database with Cypher Query Language |
Oops, something went wrong.