diff --git a/MPtoNeo4j_-_GitLab.py b/MPtoNeo4j_-_GitLab.py new file mode 100644 index 0000000000000000000000000000000000000000..8efc05ac87b7205bc90224c989798978ee877019 --- /dev/null +++ b/MPtoNeo4j_-_GitLab.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +from neo4j import GraphDatabase + +import uuid +import os +import time + + +#Note: To install the latest stable release of the neo4j python driver use: +# python -m pip install neo4j + +# neo4j module needed to connect to the neo4j database using the BOLT API +# uuid module needed to generate unique identifier for each set of Monterey Phoenix traces imported. Aside from the neo4j module, this is the only one essential. +# os module needed to generate a list of files in the neo4j import folder. The code using this module is not essential, rather it is for user convenience. +# time module needed to generate elapsed time for neo4j import. The code using this module is not essential, rather it is for user convenience. + +uri = input('Please type in the URL of your Neo4j instance and port. Hit enter for Default - bolt://localhost:7687') or 'bolt://localhost:7687' + +#Note this is only intended to be used on your local system because the username and password are being passed in cleartext +username = input('Please type in the name of your Neo4j instance. Hit enter for default - neo4j') or 'neo4j' +password = input('Please type in the password for your Neo4j instance. Hit enter for default - password') or 'password' + +driver = GraphDatabase.driver(uri, auth=(username, password)) + + +# In[2]: + + +#Neo4j Note: You must have APOC Plugin installed for this to work - https://neo4j.com/docs/labs/apoc/current/ +#Neo4j Note: You must enable file import in your neo4j config (windows) or apoc.conf file (linux) - apoc.import.file.enabled=true +#Neo4j Note: For Linux - If you don't have an apoc.conf file, you'll need to create it. The command above will be your only entry +#Neo4j Note: More information can be found at https://neo4j.com/docs/labs/apoc/current/import/load-json/ +#Neo4j Note: For ingest of more than 1000 traces, you will need to increase the heap memory available in your neo4j config file +#Neo4j Note: Increasing heap memory available is done by changing 'dbms.memory.heap.max_size=1G' to larger than 1G of memory + + +def load_data_into_neo4j(): + + # Generates unique identifier for each MP Trace Export (.gry file) imported into neo4j to enable import of multiple traces + traceUUID = str(uuid.uuid4()) + + # Establish start time for import of MP traces into neo4j + start = time.time() + + # Starts a session with the neo4j database + with driver.session() as session: + + # Executes command in neo4j to return the file path to neo4j database to provide a listing of available files for the user to enter the file they want to import. This is for user convenience. + # You just need some method to pass the file name to the Cypher script + + neodir = session.run('CALL dbms.listConfig() YIELD name, value WHERE name = \'dbms.directories.neo4j_home\' RETURN value').values() + neodir = neodir[0][0] + '/import' + os.chdir(neodir) + files = os.listdir() + + #Provides a listing of files in the neo4j import directory. Files need to be in this directory unless you enable global file imports in neo4j + mpfile = input("Please type in the filename for the Monterey Phoenix Trace (.gry) that you want to import. Files in your import directory are: " + str(files).strip('[]') + " Hit Enter for " + str(files[0]) ) or files[0] + + # a creates an index for the MP Gryphon trace graph nodes imported to greatly increase the speed of refactoring the nodes to their type as found in MP + a = 'CREATE INDEX mpindex FOR (a:MP_Node) ON (a.id, a.traceIndex, a.traceUUID)' + + # b creates the nodes from the MP Gryphon trace as "MP_Node" types with properties for "traceIndex, “labelâ€, “typeâ€, “probabilityâ€, “traceUUID†and “mp_codeâ€, a 37 character excerpt of mp_code entry + b = 'CALL apoc.load.json(\'file:///' + mpfile + '\') YIELD value UNWIND value.graphs AS item UNWIND item.nodes AS test CALL apoc.merge.node([\'MP_Node\'],{traceIndex:item.index, label:test.label, id:toInteger(test.id), type:CASE WHEN test.type = \'R\' THEN \'ROOT\' WHEN test.type = \'A\' THEN \'ATOM\' WHEN test.type = \'C\' THEN \'COMPOSITE\' ELSE \'SAY\' END, probability:item.probability, traceUUID:\'' + traceUUID + '\', traceMP_Code: substring(value.mp_code, 3,40)}) YIELD node RETURN node' + + # c creates the relationships (edges) between the MP Gryphon trace nodes with properties for "probabilityâ€, “labelâ€, “traceUUID†and “traceMP_Code†+ c = 'CALL apoc.load.json(\'file:///' + mpfile + '\') YIELD value UNWIND value.graphs AS item UNWIND item.edges AS test MATCH (a:MP_Node {traceIndex:item.index, id:toInteger(test.source), traceUUID:\'' + traceUUID + '\'}), (b:MP_Node {traceIndex:item.index, id:toInteger(test.target), traceUUID:\'' + traceUUID + '\'}) CALL apoc.merge.relationship(a,test.relation,{probability:item.probability, label:CASE WHEN item.label IS NULL THEN \'null\' ELSE item.label END, traceUUID:\'' + traceUUID + '\', traceMP_Code: substring(value.mp_code, 3,40)},{},b) YIELD rel RETURN rel' + + # d,e,f,g refactors the "MP_Node" type to their repective ROOT, ATOM, COMPOSITE or SAY types + d = 'match (n:MP_Node) WHERE n.type = "ROOT" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'ROOT\',nodes) yield errorMessages as eMessages return eMessages' + e = 'match (n:MP_Node) WHERE n.type = "ATOM" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'ATOM\',nodes) yield errorMessages as eMessages return eMessages' + f = 'match (n:MP_Node) WHERE n.type = "COMPOSITE" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'COMPOSITE\',nodes) yield errorMessages as eMessages return eMessages' + g = 'match (n:MP_Node) WHERE n.type = "SAY" with collect(n) as nodes CALL apoc.refactor.rename.label(\'MP_Node\',\'SAY\',nodes) yield errorMessages as eMessages return eMessages;' + + # h deletes the index created initially for the "MP_Node" type because all of these nodes have been refactored into their appropriate type + # Neo4j Note: If you attempt to create an Index that already exists or delete an Index that does not exist, Neo4j will return an error + h = 'DROP INDEX mpindex' + + #To obtain a copy of the final Cypher script generated, just print the desired line. + #print(a) + + #These lines execute the Cypher scripts above + session.run(a), + session.run(b), + session.run(c), + session.run(d), + session.run(e), + session.run(f), + session.run(g), + session.run(h), + + session.close() + + # This provide timing for the import of traces. + end = time.time() + + print("Import into neo4j successful. Time elapsed was " + "%.2f" % (end-start) + " seconds") + + +# In[3]: + + +load_data_into_neo4j() + + +# In[5]: + + +load_data_into_neo4j() + + +# In[ ]: + + + + + +# In[ ]: + + + +