Source code for srlearn.boostsrl

# Copyright 2017, 2018, 2019 Alexander L. Hayes

"""
(Deprecated) boostsrl class for training and testing.

.. warning:: This module is deprecated, pending removal in 0.6.0.
    See :class:`srlearn.rdn` instead.
"""

import os
import re
import subprocess
import warnings


warnings.warn(
    "Deprecation Warning: "
    "'srlearn.boostsrl' is deprecated and will be removed in 0.6.0. "
    "'srlearn.rdn' will replace this functionality."
)

# Mode definitions and examples can be verified with regular expressions.
mode_re = re.compile(
    r"[a-zA-Z0-9]*\(((\+|\-|\#)[a-zA-Z0-9]*,( )*)*(\+|\-|\#)[a-zA-Z0-9]*\)\."
)
exam_re = re.compile(r"[a-zA-Z0-9]*\(([a-zA-Z0-9]*,( )*)*[a-zA-Z0-9]*\)\.")


[docs]def example_data(example): """ .. deprecated:: 0.5.0 Use :class:`srlearn.example_data` instead. For demo purposes, include some sample data. .. code-block:: python from srlearn.boostsrl import example_data train_pos = example_data('train_pos') train_neg = example_data('train_neg') train_facts = example_data('train_facts') """ print( "Deprecation Warning: " "'srlearn.boostsrl.example_data' will be removed in 0.6.0. " "'srlearn.example_data' will replace this functionality." ) from . import example_data as ex_data if example == "train_pos": return ex_data.train.pos elif example == "train_neg": return ex_data.train.neg elif example == "train_facts": return ex_data.train.facts elif example == "test_pos": return ex_data.test.pos elif example == "test_neg": return ex_data.test.neg elif example == "test_facts": return ex_data.test.facts elif example == "background": return ex_data.test.background else: raise (Exception("Attempted to use sample data that does not exist."))
[docs]def call_process(call): """ .. deprecated:: 0.5.0 Not intended as a public method. Create a subprocess and wait for it to finish. Raise an Exception if errors occur. """ try: p = subprocess.Popen(call, shell=True) os.waitpid(p.pid, 0) except: raise Exception("Encountered problems while running process: ", call)
[docs]def inspect_mode_syntax(example): """ .. deprecated:: 0.5.0 Not intended as a public method. Uses a regular expression to check whether all of the examples in a list are in the correct form. """ if not mode_re.search(example): raise ( Exception( "Error when checking background knowledge; incorrect syntax: " + example + "\nBackground knowledge should only contain letters and numbers, " + "of the form: predicate(+var1, -var2)." ) )
[docs]def inspect_example_syntax(example): """ .. deprecated:: 0.5.0 Not intended as a public method. Uses a regular expression to check whether all of the examples in a list are in the correct form. """ if not exam_re.search(example): raise (Exception("Error when checking example; incorrect syntax: " + example))
[docs]def write_to_file(content, path): """ .. deprecated:: 0.5.0 Not intended as a public method. Takes a list (content) and a path/file (path) and writes each line of the list to the file location. """ with open(path, "w") as f: for line in content: f.write(line + "\n") f.close()
class modes(object): def __init__( self, background, target, bridgers=None, precomputes=None, loadAllLibraries=False, useStdLogicVariables=False, usePrologVariables=False, recursion=False, lineSearch=False, resampleNegs=False, treeDepth=None, maxTreeDepth=None, nodeSize=None, numOfClauses=None, numOfCycles=None, minLCTrees=None, incrLCTrees=None, ): """ target: a list of predicate heads that learning/inference will be performed on. """ print( "Deprecation Warning: " "'srlearn.boostsrl' is deprecated and will be removed in 0.6.0. " "'srlearn.rdn' will replace this functionality." ) self.target = target self.bridgers = bridgers self.precomputes = precomputes self.loadAllLibraries = loadAllLibraries self.useStdLogicVariables = useStdLogicVariables self.usePrologVariables = usePrologVariables self.treeDepth = treeDepth self.maxTreeDepth = maxTreeDepth self.nodeSize = nodeSize self.numOfClauses = numOfClauses self.numOfCycles = numOfCycles self.minLCTrees = minLCTrees self.incrLCTrees = incrLCTrees self.recursion = recursion self.lineSearch = lineSearch self.resampleNegs = resampleNegs # self.queryPred = 'advisedby/2' # Many of the arguments in the modes object are optional this shows # us the values of the ones that are neither false nor none. types = { "background should be a list.": isinstance(background, list), "target should be a list.": isinstance(target, list), "bridgers should be a list.": isinstance(bridgers, list) or bridgers is None, "precomputes should be a dictionary.": isinstance(precomputes, dict) or precomputes is None, "loadAllLibraries should be boolean.": isinstance(loadAllLibraries, bool), "useStdLogicVariables should be boolean.": isinstance( useStdLogicVariables, bool ), "usePrologVariables should be boolean.": isinstance( usePrologVariables, bool ), "recursion should be boolean.": isinstance(recursion, bool), "lineSearch should be boolean.": isinstance(lineSearch, bool), "resampleNegs should be boolean.": isinstance(resampleNegs, bool), "treeDepth should be an int.": isinstance(treeDepth, int) or treeDepth is None, "maxTreeDepth should be an int.": isinstance(maxTreeDepth, int) or maxTreeDepth is None, "nodeSize should be an int.": isinstance(nodeSize, int) or nodeSize is None, "numOfClause should be an int.": isinstance(numOfClauses, int) or numOfClauses is None, "numOfCycles should be an int.": isinstance(numOfCycles, int) or numOfCycles is None, "minLCTrees should be an int.": isinstance(minLCTrees, int) or minLCTrees is None, "incrLCTrees should be an int.": isinstance(incrLCTrees, int) or incrLCTrees is None, } # Force type checking for input validation Issue #5 for type_check in types: if not types[type_check]: raise (TypeError("Error when checking type: " + type_check)) relevant = [ [attr, value] for attr, value in self.__dict__.items() if (value is not False) and (value is not None) ] self.relevant = relevant background_knowledge = [] for a, v in relevant: if a in ["useStdLogicVariables", "usePrologVariables"] and v: s = a + ": " + str(v).lower() + "." background_knowledge.append(s) elif a in ["target", "bridgers", "precomputes"]: pass elif v: s = "setParam: " + a + "=" + str(v).lower() + "." background_knowledge.append(s) else: s = "setParam: " + a + "=" + str(v) + "." background_knowledge.append(s) for pred in background: inspect_mode_syntax(pred) background_knowledge.append("mode: " + pred) if self.bridgers is not None: for bridger in self.bridgers: background_knowledge.append("bridger: " + bridger) if self.precomputes is not None: for precompute in self.precomputes: background_knowledge.append(self.precomputes[precompute]) background_knowledge.append("mode: " + precompute) # Write the newly created background_knowledge to a file: background.txt self.background_knowledge = background_knowledge write_to_file(background_knowledge, "background.txt")
[docs]class train(object): """ .. deprecated:: 0.5.0 Use :class:`srlearn.rdn` instead. """ def __init__( self, background, train_pos, train_neg, train_facts, save=False, advice=False, softm=False, alpha=0.5, beta=-2, trees=10, ): print( "Deprecation Warning: " "'srlearn.boostsrl' is deprecated and will be removed in 0.6.0. " "'srlearn.rdn' will replace this functionality." ) self.target = background.target self.train_pos = train_pos self.train_neg = train_neg self.train_facts = train_facts self.advice = advice self.softm = softm self.alpha = alpha self.beta = beta self.trees = trees # Syntax checking for examples in each set. for example in self.train_pos: inspect_example_syntax(example) for example in self.train_neg: inspect_example_syntax(example) for example in self.train_facts: inspect_example_syntax(example) write_to_file(self.train_pos, "srlearn/train/train_pos.txt") write_to_file(self.train_neg, "srlearn/train/train_neg.txt") write_to_file(self.train_facts, "srlearn/train/train_facts.txt") CALL = ( "(cd srlearn; java -jar v1-0.jar -l -train train/ -target " + ",".join(self.target) + " -trees " + str(self.trees) + " > train_output.txt 2>&1)" ) call_process(CALL)
[docs] def tree(self, treenumber, target, image=False): """ """ # Tree number is between 0 and the self.trees. if treenumber > (self.trees - 1): raise Exception("Tried to find a tree that does not exist.") elif image: """ Writing this with Jupyter notebooks in mind. """ from graphviz import Source tree_file = ( "srlearn/train/models/bRDNs/dotFiles/WILLTreeFor_" + target + str(treenumber) + ".dot" ) with open(tree_file, "r") as f: tree_output = "".join(f.read().splitlines()) src = Source(tree_output) return src else: tree_file = ( "srlearn/train/models/bRDNs/Trees/" + target + "Tree" + str(treenumber) + ".tree" ) with open(tree_file, "r") as f: tree_output = f.read() return tree_output
def _get_training_time(self): """ Return the training time as a float representing the total number of seconds seconds. """ with open("srlearn/train_output.txt", "r") as f: text = f.read() line = re.findall(r"% Total learning time \(\d* trees\):.*", text) # Remove the last character "." from the line and split it on spaces. splitline = line[0][:-1].split() return splitline def _training_time_to_float(self, splitline): """ Convert the string representing training time into a float representing total seconds. """ seconds = [] if "milliseconds" in splitline: seconds.append( (float(splitline[splitline.index("milliseconds") - 1])) / 1000 ) if "seconds" in splitline: seconds.append(float(splitline[splitline.index("seconds") - 1])) if "minutes" in splitline: seconds.append(float(splitline[splitline.index("minutes") - 1]) * 60) if "hours" in splitline: seconds.append(float(splitline[splitline.index("hours") - 1]) * 3600) if "days" in splitline: seconds.append(float(splitline[splitline.index("days") - 1]) * 86400) return sum(seconds)
[docs] def traintime(self) -> float: """ Returns a float representing seconds. """ splitline = self._get_training_time() return self._training_time_to_float(splitline)
[docs]class test(object): """ .. deprecated:: 0.5.0 Use :class:`srlearn.rdn` instead. """ # Possibly a partial fix to Issue #3: checking for the .aucTemp.txt.lock if os.path.isfile("srlearn/test/AUC/.aucTemp.txt.lock"): print("Found lock file srlearn/test/AUC/.aucTemp.txt.lock, removing it:") os.remove("srlearn/test/AUC/.aucTemp.txt.lock") def __init__(self, model, test_pos, test_neg, test_facts, trees=10): write_to_file(test_pos, "srlearn/test/test_pos.txt") write_to_file(test_neg, "srlearn/test/test_neg.txt") write_to_file(test_facts, "srlearn/test/test_facts.txt") print( "Deprecation Warning: " "'srlearn.boostsrl' is deprecated and will be removed in 0.6.0. " "'srlearn.rdn' will replace this functionality." ) self.target = model.target CALL = ( "(cd srlearn; java -jar v1-0.jar -i -model train/models/ " + "-test test/ -target " + ",".join(self.target) + " -trees " + str(trees) + " -aucJarPath . > test_output.txt 2>&1)" ) call_process(CALL) def summarize_results(self): with open("srlearn/test_output.txt", "r") as f: text = f.read() line = re.findall( r"% AUC ROC.*|% AUC PR.*|% CLL.*|% Precision.*|% Recall.*|% F1.*", text, ) line = [ word.replace(" ", "") .replace("\t", "") .replace("%", "") .replace("atthreshold=", ",") for word in line ] results = { "AUC ROC": line[0][line[0].index("=") + 1 :], "AUC PR": line[1][line[1].index("=") + 1 :], "CLL": line[2][line[2].index("=") + 1 :], "Precision": line[3][line[3].index("=") + 1 :], "Recall": line[4][line[4].index("=") + 1 :], "F1": line[5][line[5].index("=") + 1 :], } return results def _float_split(self, line): """Returns a list where the first item is a string and the second is a float. Used when returning inference results. Examples -------- >>> test._float_split('target(pred1, pred2, pred3). 0.85691') ['target(pred1, pred2, pred3).', 0.85691]""" intermediate = line.rsplit(None, 1) return [intermediate[0], float(intermediate[1])]
[docs] def inference_results(self, target): """Converts BoostSRL results into a Python dictionary.""" results_file = "srlearn/test/results_" + target + ".db" inference_dict = {} with open(results_file, "r") as f: for line in f.read().splitlines(): full = self._float_split(line) key_predicate = full[0] value_regression = full[1] inference_dict[key_predicate] = value_regression return inference_dict