Upload New File

81e6397e · Senft, Michael · 341d33df · 81e6397e
Commit 81e6397e authored 4 years ago by Senft, Michael
--- a/MPtoNeo4j_-_GitLab.py
+++ b/MPtoNeo4j_-_GitLab.py
+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+from neo4j import GraphDatabase
+import uuid
+import os
+import time
+#Note: To install the latest stable release of the neo4j python driver use:
+# python -m pip install neo4j
+# neo4j module needed to connect to the neo4j database using the BOLT API 
+# uuid module needed to generate unique identifier for each set of Monterey Phoenix traces imported. Aside from the neo4j module, this is the only one essential. 
+# os module needed to generate a list of files in the neo4j import folder. The code using this module is not essential, rather it is for user convenience.
+# time module needed to generate elapsed time for neo4j import. The code using this module is not essential, rather it is for user convenience.
+uri = input('Please type in the URL of your Neo4j instance and port. Hit enter for Default - bolt://localhost:7687') or 'bolt://localhost:7687'
+#Note this is only intended to be used on your local system because the username and password are being passed in cleartext
+username = input('Please type in the name of your Neo4j instance. Hit enter for default - neo4j') or 'neo4j'
+password = input('Please type in the password for your Neo4j instance. Hit enter for default - password') or 'password'
+driver = GraphDatabase.driver(uri, auth=(username, password))
+# In[2]:
+#Neo4j Note: You must have APOC Plugin installed for this to work - https://neo4j.com/docs/labs/apoc/current/
+#Neo4j Note: You must enable file import in your neo4j config (windows) or apoc.conf file (linux) - apoc.import.file.enabled=true
+#Neo4j Note: For Linux - If you don't have an apoc.conf file, you'll need to create it. The command above will be your only entry
+#Neo4j Note: More information can be found at https://neo4j.com/docs/labs/apoc/current/import/load-json/
+#Neo4j Note: For ingest of more than 1000 traces, you will need to increase the heap memory available in your neo4j config file
+#Neo4j Note: Increasing heap memory available is done by changing 'dbms.memory.heap.max_size=1G' to larger than 1G of memory   
+def load_data_into_neo4j():
+    # Generates unique identifier for each MP Trace Export (.gry file) imported into neo4j to enable import of multiple traces 
+    traceUUID = str(uuid.uuid4())
+    # Establish start time for import of MP traces into neo4j
+    start = time.time()
+    # Starts a session with the neo4j database 
+    with driver.session() as session:
+        # Executes command in neo4j to return the file path to neo4j database to provide a listing of available files for the user to enter the file they want to import. This is for user convenience.
+        # You just need some method to pass the file name to the Cypher script
+        neodir = session.run('CALL dbms.listConfig() YIELD name, value WHERE name = \'dbms.directories.neo4j_home\' RETURN value').values()
+        neodir = neodir[0][0] + '/import'
+        os.chdir(neodir)
+        files = os.listdir()
+        #Provides a listing of files in the neo4j import directory. Files need to be in this directory unless you enable global file imports in neo4j
+        mpfile = input("Please type in the filename for the Monterey Phoenix Trace (.gry) that you want to import. Files in your import directory are: " + str(files).strip('[]') + "   Hit Enter for " + str(files[0]) ) or files[0]
+        # a creates an index for the MP Gryphon trace graph nodes imported to greatly increase the speed of refactoring the nodes to their type as found in MP
+        a = 'CREATE INDEX mpindex FOR (a:MP_Node) ON (a.id, a.traceIndex, a.traceUUID)'
+        # b creates the nodes from the MP Gryphon trace as "MP_Node" types with properties for "traceIndex, “label”, “type”, “probability”, “traceUUID” and “mp_code”, a 37 character excerpt of mp_code entry 
+        b = 'CALL apoc.load.json(\'file:///' + mpfile + '\') YIELD value UNWIND value.graphs AS item UNWIND item.nodes AS test CALL apoc.merge.node([\'MP_Node\'],{traceIndex:item.index, label:test.label, id:toInteger(test.id), type:CASE WHEN test.type = \'R\' THEN \'ROOT\' WHEN test.type = \'A\' THEN \'ATOM\' WHEN test.type = \'C\' THEN \'COMPOSITE\' ELSE \'SAY\' END, probability:item.probability, traceUUID:\'' + traceUUID + '\', traceMP_Code: substring(value.mp_code, 3,40)}) YIELD node RETURN node'
+        # c creates the relationships (edges) between the MP Gryphon trace nodes with properties for "probability”, “label”, “traceUUID” and “traceMP_Code”
+        c = 'CALL apoc.load.json(\'file:///' + mpfile + '\') YIELD value UNWIND value.graphs AS item UNWIND item.edges AS test  MATCH (a:MP_Node {traceIndex:item.index, id:toInteger(test.source), traceUUID:\'' + traceUUID + '\'}), (b:MP_Node {traceIndex:item.index, id:toInteger(test.target), traceUUID:\'' + traceUUID + '\'}) CALL apoc.merge.relationship(a,test.relation,{probability:item.probability, label:CASE WHEN item.label IS NULL THEN \'null\' ELSE item.label END, traceUUID:\'' + traceUUID + '\', traceMP_Code: substring(value.mp_code, 3,40)},{},b) YIELD rel RETURN rel'
+        # d,e,f,g refactors the "MP_Node" type to their repective ROOT, ATOM, COMPOSITE or SAY types 
+        d = 'match (n:MP_Node) WHERE n.type = "ROOT" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'ROOT\',nodes) yield errorMessages as eMessages return eMessages'
+        e = 'match (n:MP_Node) WHERE n.type = "ATOM" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'ATOM\',nodes) yield errorMessages as eMessages return eMessages'
+        f = 'match (n:MP_Node) WHERE n.type = "COMPOSITE" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'COMPOSITE\',nodes) yield errorMessages as eMessages return eMessages'
+        g = 'match (n:MP_Node) WHERE n.type = "SAY" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'SAY\',nodes) yield errorMessages as eMessages return eMessages;'
+        # h deletes the index created initially for the "MP_Node" type because all of these nodes have been refactored into their appropriate type
+        # Neo4j Note: If you attempt to create an Index that already exists or delete an Index that does not exist, Neo4j will return an error
+        h = 'DROP INDEX mpindex'
+        #To obtain a copy of the final Cypher script generated, just print the desired line. 
+        #print(a)
+        #These lines execute the Cypher scripts above
+        session.run(a),
+        session.run(b),
+        session.run(c),
+        session.run(d),
+        session.run(e),
+        session.run(f),
+        session.run(g),
+        session.run(h),
+        session.close()
+    # This provide timing for the import of traces.
+    end = time.time()
+    print("Import into neo4j successful. Time elapsed was " + "%.2f" % (end-start) + " seconds")
+# In[3]:
+load_data_into_neo4j()
+# In[5]:
+load_data_into_neo4j()
+# In[ ]:
+# In[ ]: