-
Notifications
You must be signed in to change notification settings - Fork 0
/
neo4j_migrate_user_info.py
executable file
·107 lines (80 loc) · 4.43 KB
/
neo4j_migrate_user_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# Script to export the user, session, and query nodes from a HMP Neo4j
# instance. Will generate an output file that can then be fed into the
# corresponding neo4j_import_user_info.py.
#
# Author: James Matsumura
# Contact: [email protected]
import argparse,json,datetime,sys
from py2neo import Graph
def main():
parser = argparse.ArgumentParser(description='Script to build a Neo4j database using OSDF.')
parser.add_argument('--http_port', '-hp', type=int, help='Port to map the http port to, should not be 7474 to avoid conflict of live database.')
parser.add_argument('--bolt_port', '-bp', type=int, help='Port to map the bolt port to, should not be 7687 to avoid conflict of live database.')
parser.add_argument('--neo4j_password', '-np', type=str, help='Password for the Neo4j database')
parser.add_argument('--export_file', '-ef', type=str, help='Name of an export file to WRITE to.')
parser.add_argument('--import_file', '-if', type=str, help='Name of an import file to READ from and BUILD in Neo4j.')
args = parser.parse_args()
if not args.export_file and not args.import_file:
sys.exit("Must provide either an export or import file path.")
# No matter what, talking to Neo4j. Just depends whether we're exporting
# or importing the information.
cy = Graph(password = args.neo4j_password, bolt_port = args.bolt_port, http_port = args.http_port)
if args.export_file:
extract_session_user_query_cypher = """
MATCH (u:user)-[:saved_query]->(q:query)
WITH u,q
OPTIONAL MATCH (s:session)<-[:has_session]-(u)
RETURN s,u,q
"""
# If there's a recent session, first run this query to establish that
create_session_user_cypher = """
MERGE (u:user {{ username:'{0}' }})
MERGE (s:session {{ id:'{1}', created_at:{2} }})
MERGE (u)-[:has_session]->(s)
"""
# Regardless of if a session is present, attach a query to the user
create_user_query_cypher = """
MERGE (u:user {{ username:'{0}' }})
MERGE (q:query {{ query_str:'{1}', url:'{2}', f_count:{3}, s_count:{4} }})
MERGE (u)-[:saved_query]->(q)
"""
# Each element in this list will be a unique query attached to a particular
# user (and potentially a session).
relevant_nodes = cy.run(extract_session_user_query_cypher).data()
session_statements = set() # don't repeat session statements
with open(args.export_file,'w') as out:
for res in relevant_nodes:
if 's' in res:
if res['s']:
# Neo4j, by default, does milliseconds since epoch
seconds_since_epoch = res['s']['created_at'] / 1000.0
today = datetime.datetime.now() # get a point to measure the user's login against
logged_in_time = datetime.datetime.fromtimestamp(seconds_since_epoch)
diff = today - logged_in_time
if diff.days < 1: # if within 24 hrs, leave the login as present
cleansed_string = create_session_user_cypher.format(
res['u']['username'],
res['s']['id'],
res['s']['created_at']
).strip().replace("\n"," ")
if cleansed_string not in session_statements:
session_statements.add(cleansed_string)
out.write("{0}\n".format(cleansed_string))
# no matter whether we stored a session or not,
# there's a query if there's a result here
cleansed_string = create_user_query_cypher.format(
res['u']['username'],
res['q']['query_str'],
res['q']['url'],
res['q']['f_count'],
res['q']['s_count']
).strip().replace("\n"," ")
out.write("{0}\n".format(cleansed_string))
elif args.import_file:
with open(args.import_file,'r') as inp:
for statement in inp:
statement = statement.strip()
cy.run(statement)
if __name__ == '__main__':
main()