# Copyright © 2017, 2018, 2019 Alexander L. Hayes
"""Handler for file system operations on behalf of BoostSRL."""
import pathlib
import shutil
import os
__all__ = ["reset", "FileSystem"]
[docs]def reset(soft=False):
"""Reset the FileSystem
In some circumstances, a :class:`FileSystem` object may not properly clean up
temporary files on object deallocation. This function performs a hard reset
by explicitly removing the directory tree from the operating system.
Parameters
----------
soft : bool (Default: False)
A *soft reset* reports the contents without removing.
Returns
-------
results : list
A "soft reset" returns a list of the contents.
A "hard reset" returns an empty list.
Notes
-----
Since the FileSystem object reads and writes from a location alongside
the package, these files are also removed when the package is uninstalled.
But uninstalling and reinstalling is overkill when a solution like this
is available.
Examples
--------
This method has a few uses. This could be used for cleaning up your
operating system in the event of failure. It could be a shorthand
method for triggering behavior when the data directory is/not empty.
Or the obvious case where you need to ensure temporary files are gone.
1. Use a "soft reset" to list contents without removing:
.. code-block:: bash
$ python -c "from srlearn import system_manager; print(system_manager.reset(soft=True))"
['data0', 'data1']
2. Trigger conditional behavior. Here we report that the directory is empty.
>>> from srlearn import system_manager
>>> if not system_manager.reset(soft=True): # doctest: +SKIP
... print("Currently Empty")
Currently Empty
3. Use a hard reset to remove any temporary files.
>>> from srlearn import system_manager
>>> system_manager.reset() # doctest: +SKIP
[]
"""
_here = pathlib.Path(__file__).parent
_data = _here.joinpath(FileSystem.boostsrl_data_directory)
if not _data.exists():
return []
if soft:
return os.listdir(_data)
shutil.rmtree(_data)
return []
class BoostSRLFiles:
"""Pointers to all file locations required by BoostSRL
After initialization, all of these should be constant.
"""
def __init__(self, directory, here) -> None:
self.DIRECTORY = directory
self.BOOSTSRL_BACKEND = here.joinpath("BoostSRL.jar")
self.SRLBOOST_BACKEND = here.joinpath("SRLBoost.jar")
self.AUC_JAR = here
self.TRAIN_LOG = directory.joinpath("train_output.txt")
self.TEST_LOG = directory.joinpath("test_output.txt")
self.TRAIN_DIR = directory.joinpath("train")
self.TEST_DIR = directory.joinpath("test")
self.MODELS_DIR = directory.joinpath("train/models/")
self.BRDNS_DIR = directory.joinpath("train/models/bRDNs/")
self.TREES_DIR = directory.joinpath("train/models/bRDNs/Trees")
self.DOT_DIR = directory.joinpath("train/models/bRDNs/dotFiles")
[docs]class FileSystem:
"""BoostSRL File System
BoostSRL has an implicit assumption that it has access to a file system. At runtime
it needs to both read and write with files. This object provides a view into the
files needed, requests files from the operating system, and (most importantly)
prepares and cleans up the file system at allocation/de-allocation time.
Examples
--------
This first example may not appear to do much, but behind the scenes it is
creating directories for each instance of ``FileSystem``, and removing
them upon ``exit()``.
>>> from srlearn.system_manager import FileSystem
>>> systems = []
>>> for _ in range(5):
... systems.append(FileSystem())
Notes
-----
Ideally, each instance of a :class:`srlearn.rdn.BoostedRDN` should have its own directory
where it can operate independently. But this can be problematic and will often
lead to duplicated data and other problems if multiple models are learned in
parallel on the same database.
Another option (which may be more suited to parallel tree learning) would be to
store data in a single location, but write the log files and models to separate
locations.
Attributes
----------
files : :class:`enum.Enum`
Enum providing key,value pairs for a BoostSRL database
"""
# Prefix is the main directory that all databases will reside in.
# In case of failure, this directory should be safe to delete.
boostsrl_data_directory = "bsrl_data"
[docs] def __init__(self):
"""Initialize a BoostSRL File System.
This will create directories that are cleaned up when the instance
is de-allocated.
"""
_here = pathlib.Path(__file__).parent
# Allocate a location where data can safely be stored.
_data = _here.joinpath(FileSystem.boostsrl_data_directory)
_allotment_number = self._allocate_space(_data)
_directory = _data.joinpath("data" + str(_allotment_number))
self.files = BoostSRLFiles(_directory, _here)
self.files.TRAIN_DIR.mkdir()
self.files.TEST_DIR.mkdir()
def __del__(self):
"""Clean up the file system on object de-allocation."""
shutil.rmtree(self.files.DIRECTORY)
@staticmethod
def _allocate_space(current_directory) -> int:
"""Attempt to allocate directory `data{n}`, increment until success.
Returns
-------
_postfix : int
The number corresponding to the allocated directory.
"""
_postfix = 0
while True:
_attempt = current_directory.joinpath("data" + str(_postfix))
if not _attempt.exists():
_attempt.mkdir(parents=True)
break
else:
_postfix += 1
return _postfix