Skip to content
Snippets Groups Projects
Commit 81e6397e authored by Senft, Michael's avatar Senft, Michael
Browse files

Upload New File

parent 341d33df
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from neo4j import GraphDatabase
import uuid
import os
import time
#Note: To install the latest stable release of the neo4j python driver use:
# python -m pip install neo4j
# neo4j module needed to connect to the neo4j database using the BOLT API
# uuid module needed to generate unique identifier for each set of Monterey Phoenix traces imported. Aside from the neo4j module, this is the only one essential.
# os module needed to generate a list of files in the neo4j import folder. The code using this module is not essential, rather it is for user convenience.
# time module needed to generate elapsed time for neo4j import. The code using this module is not essential, rather it is for user convenience.
uri = input('Please type in the URL of your Neo4j instance and port. Hit enter for Default - bolt://localhost:7687') or 'bolt://localhost:7687'
#Note this is only intended to be used on your local system because the username and password are being passed in cleartext
username = input('Please type in the name of your Neo4j instance. Hit enter for default - neo4j') or 'neo4j'
password = input('Please type in the password for your Neo4j instance. Hit enter for default - password') or 'password'
driver = GraphDatabase.driver(uri, auth=(username, password))
# In[2]:
#Neo4j Note: You must have APOC Plugin installed for this to work - https://neo4j.com/docs/labs/apoc/current/
#Neo4j Note: You must enable file import in your neo4j config (windows) or apoc.conf file (linux) - apoc.import.file.enabled=true
#Neo4j Note: For Linux - If you don't have an apoc.conf file, you'll need to create it. The command above will be your only entry
#Neo4j Note: More information can be found at https://neo4j.com/docs/labs/apoc/current/import/load-json/
#Neo4j Note: For ingest of more than 1000 traces, you will need to increase the heap memory available in your neo4j config file
#Neo4j Note: Increasing heap memory available is done by changing 'dbms.memory.heap.max_size=1G' to larger than 1G of memory
def load_data_into_neo4j():
# Generates unique identifier for each MP Trace Export (.gry file) imported into neo4j to enable import of multiple traces
traceUUID = str(uuid.uuid4())
# Establish start time for import of MP traces into neo4j
start = time.time()
# Starts a session with the neo4j database
with driver.session() as session:
# Executes command in neo4j to return the file path to neo4j database to provide a listing of available files for the user to enter the file they want to import. This is for user convenience.
# You just need some method to pass the file name to the Cypher script
neodir = session.run('CALL dbms.listConfig() YIELD name, value WHERE name = \'dbms.directories.neo4j_home\' RETURN value').values()
neodir = neodir[0][0] + '/import'
os.chdir(neodir)
files = os.listdir()
#Provides a listing of files in the neo4j import directory. Files need to be in this directory unless you enable global file imports in neo4j
mpfile = input("Please type in the filename for the Monterey Phoenix Trace (.gry) that you want to import. Files in your import directory are: " + str(files).strip('[]') + " Hit Enter for " + str(files[0]) ) or files[0]
# a creates an index for the MP Gryphon trace graph nodes imported to greatly increase the speed of refactoring the nodes to their type as found in MP
a = 'CREATE INDEX mpindex FOR (a:MP_Node) ON (a.id, a.traceIndex, a.traceUUID)'
# b creates the nodes from the MP Gryphon trace as "MP_Node" types with properties for "traceIndex, “label”, “type”, “probability”, “traceUUID” and “mp_code”, a 37 character excerpt of mp_code entry
b = 'CALL apoc.load.json(\'file:///' + mpfile + '\') YIELD value UNWIND value.graphs AS item UNWIND item.nodes AS test CALL apoc.merge.node([\'MP_Node\'],{traceIndex:item.index, label:test.label, id:toInteger(test.id), type:CASE WHEN test.type = \'R\' THEN \'ROOT\' WHEN test.type = \'A\' THEN \'ATOM\' WHEN test.type = \'C\' THEN \'COMPOSITE\' ELSE \'SAY\' END, probability:item.probability, traceUUID:\'' + traceUUID + '\', traceMP_Code: substring(value.mp_code, 3,40)}) YIELD node RETURN node'
# c creates the relationships (edges) between the MP Gryphon trace nodes with properties for "probability”, “label”, “traceUUID” and “traceMP_Code”
c = 'CALL apoc.load.json(\'file:///' + mpfile + '\') YIELD value UNWIND value.graphs AS item UNWIND item.edges AS test MATCH (a:MP_Node {traceIndex:item.index, id:toInteger(test.source), traceUUID:\'' + traceUUID + '\'}), (b:MP_Node {traceIndex:item.index, id:toInteger(test.target), traceUUID:\'' + traceUUID + '\'}) CALL apoc.merge.relationship(a,test.relation,{probability:item.probability, label:CASE WHEN item.label IS NULL THEN \'null\' ELSE item.label END, traceUUID:\'' + traceUUID + '\', traceMP_Code: substring(value.mp_code, 3,40)},{},b) YIELD rel RETURN rel'
# d,e,f,g refactors the "MP_Node" type to their repective ROOT, ATOM, COMPOSITE or SAY types
d = 'match (n:MP_Node) WHERE n.type = "ROOT" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'ROOT\',nodes) yield errorMessages as eMessages return eMessages'
e = 'match (n:MP_Node) WHERE n.type = "ATOM" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'ATOM\',nodes) yield errorMessages as eMessages return eMessages'
f = 'match (n:MP_Node) WHERE n.type = "COMPOSITE" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'COMPOSITE\',nodes) yield errorMessages as eMessages return eMessages'
g = 'match (n:MP_Node) WHERE n.type = "SAY" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'SAY\',nodes) yield errorMessages as eMessages return eMessages;'
# h deletes the index created initially for the "MP_Node" type because all of these nodes have been refactored into their appropriate type
# Neo4j Note: If you attempt to create an Index that already exists or delete an Index that does not exist, Neo4j will return an error
h = 'DROP INDEX mpindex'
#To obtain a copy of the final Cypher script generated, just print the desired line.
#print(a)
#These lines execute the Cypher scripts above
session.run(a),
session.run(b),
session.run(c),
session.run(d),
session.run(e),
session.run(f),
session.run(g),
session.run(h),
session.close()
# This provide timing for the import of traces.
end = time.time()
print("Import into neo4j successful. Time elapsed was " + "%.2f" % (end-start) + " seconds")
# In[3]:
load_data_into_neo4j()
# In[5]:
load_data_into_neo4j()
# In[ ]:
# In[ ]:
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment