Source code for srlearn.database

# Copyright 2017, 2018, 2019 Alexander L. Hayes

"""
database.py

A BoostSRL database consists of positive examples, negative examples, and facts;
all of which need to be stored as .txt files on a file system.

Use Cases
---------

- Creating an instance of the database through code (write to location)
- Files already stored on the filesystem (copy to location)
- Examples stored in a RDBMS?

Examples
--------

Create a new instance of a database, add examples, and write them to the filesystem.

>>> from srlearn.database import Database
>>> db = Database()
>>> db.add_pos("student(alexander).")
>>> db.add_neg("student(sriraam).")
>>> db.add_fact("advises(alexander, sriraam).")

Create an instance of the database from an existing set of files.

>>> from srlearn.database import Database
>>> db = Database()
"""

from shutil import copyfile
import pathlib


[docs]class Database: """Database of examples and facts.""" # pylint: disable=too-many-instance-attributes
[docs] def __init__(self): """Initialize a Database object A database (in this respect) contains positive examples, negative examples, facts, and is augmented with background knowledge. The implementation is done with four attributes: ``pos``, ``neg``, ``facts``, and ``modes``. Each attribute is a list that may be set by mutating, or loaded from files with :func:`Database.from_files`. Examples -------- This initializes a Database object, then sets the ``pos`` attribute. >>> from srlearn import Database >>> db = Database() >>> db.pos = ["student(alexander)."] """ self.pos = [] self.neg = [] self.facts = [] self.modes = []
def write(self, filename="train", location=pathlib.Path("train")) -> None: """Write the database to disk Parameters ---------- filename : str Name of the file to write to: 'train' or 'test' location : :class:`pathlib.Path` Path where data should be written to. Notes ----- This function has polymorphic behavior. When attributes (``self.pos``, ``self.neg``, ``self.facts``) are lists of strings, the lists are written to files. When the attributes are (path-like) strings or pathlib Paths (:class:`pathlib.Path`), the files are copied. """ def _write(_filename, _location, _object, _type): if isinstance(_object, list): with open( _location.joinpath("{0}_{1}.txt".format(_filename, _type)), "w" ) as _fh: for example in _object: _fh.write(example + "\n") else: copyfile( str(_object), str(_location.joinpath("{0}_{1}.txt".format(_filename, _type))), ) _write(filename, location, self.pos, "pos") _write(filename, location, self.neg, "neg") _write(filename, location, self.facts, "facts") def __repr__(self) -> str: return ( "Positive Examples:\n" + str(self.pos) + "\nNegative Examples:\n" + str(self.neg) + "\nFacts:\n" + str(self.facts) ) @staticmethod def from_files(pos="pos.pl", neg="neg.pl", facts="facts.pl", lazy_load=True): """Load files into a Database Return an instance of a Database with pos, neg, and facts set to the contents of files. By default this performs a "lazy load," where the files are not loaded into Python lists, but copied at learning time. Parameters ---------- pos : str or pathlib.Path Location of positive examples neg : str or pathlib.Path Location of negative examples facts : str or pathlib.Path Location of facts lazy_load : bool (default: True) Skip loading the files into a list Returns ------- db : srlearn.Database Instance of a Database object """ _db = Database() if lazy_load: _db.pos = pos _db.neg = neg _db.facts = facts else: with open(pos, "r") as _fh: _db.pos = _fh.read().splitlines() with open(neg, "r") as _fh: _db.neg = _fh.read().splitlines() with open(facts, "r") as _fh: _db.facts = _fh.read().splitlines() return _db