Skip to content

References

API references

cli

Command Line Interface for py-file-attributes.

main()

Run the CLI application.

Source code in src/file_attributes/cli.py
def main() -> None:
    """Run the CLI application."""
    parser = argparse.ArgumentParser(
        prog="file-attributes",
        description="A cross-platform library to manage file attributes on Windows, macOS, and Linux.",
        formatter_class=argparse.RawTextHelpFormatter,
    )

    parser.add_argument("file", type=Path, help="The file to examine or manage attributes for.")

    attrs = list(FileAttributes.get_property_fields(FileAttributes))
    attrs.sort()
    for attr in attrs:
        prop = getattr(FileAttributes, attr)
        doc = prop.__doc__ or "No description available."
        doc = doc.replace("\n", " ").strip()

        setter_name = f"set_{attr}"
        has_setter = hasattr(FileAttributes, setter_name)

        if has_setter:
            parser.add_argument(f"--{attr}", type=str2bool, metavar="BOOL", help=f"Set or unset: {doc}")

    args = parser.parse_args()

    try:
        file_attrs = FileAttributes(args.file)

        changed = False
        for attr in attrs:
            if hasattr(args, attr) and getattr(args, attr) is not None:
                val = getattr(args, attr)
                setter = getattr(file_attrs, f"set_{attr}")
                setter(val)
                changed = True

        if changed:
            # Refresh to reflect updated attributes
            file_attrs = FileAttributes(args.file)

        print(file_attrs)  # noqa: T201
    except Exception as e:
        print(f"Error: {e}")  # noqa: T201

str2bool(v)

Convert string to boolean.

Parameters

v : str The string to convert.

Returns

bool The converted boolean value.

Raises

argparse.ArgumentTypeError If the string cannot be converted to a boolean.

Source code in src/file_attributes/cli.py
def str2bool(v: str) -> bool:
    """Convert string to boolean.

    Parameters
    ----------
    v : str
        The string to convert.

    Returns
    -------
    bool
        The converted boolean value.

    Raises
    ------
    argparse.ArgumentTypeError
        If the string cannot be converted to a boolean.
    """
    if isinstance(v, bool):
        return v
    if v.lower() in ("yes", "true", "t", "y", "1"):
        return True
    if v.lower() in ("no", "false", "f", "n", "0"):
        return False
    raise argparse.ArgumentTypeError("Boolean value expected.")

utils

Utils using FileAttributes to retrieve data from cloud storage.

FileRecallManager

Context Manager to recall a cloud-stored file to local storage.

A context manager that checks the attributes of the passed filepath and, if the file is not available on the local hard-drive, performs a data access request to trigger it's download.

As long as the data access request is not completed successfully (no OSError), there is a retry policy managed by RETRY_MAX and RETRY_DELAY.

The returned object is simply the filepath to not disrupt other classes.

See Also

FileAttributes

Attributes

filename : Path The file we are accessing fileattributes : FileAttributes Class that retrieves all FileAttributes from the OS. Only works with Windows environment.

Examples

import pandas as pd

with FileRecallManager(test.xlsx) as f: display(pd.read_excel(f, engine="calamine"))

Source code in src/file_attributes/utils.py
class FileRecallManager:
    """Context Manager to recall a cloud-stored file to local storage.

    A context manager that checks the attributes of the passed filepath
    and, if the file is not available on the local hard-drive, performs
    a data access request to trigger it's download.

    As long as the data access request is not completed successfully
    (no OSError), there is a retry policy managed by RETRY_MAX and
    RETRY_DELAY.

    The returned object is simply the filepath to not disrupt other classes.

    See Also
    --------
    FileAttributes

    Attributes
    ----------
    filename : Path
        The file we are accessing
    fileattributes : FileAttributes
        Class that retrieves all FileAttributes from the OS.
        Only works with Windows environment.

    Examples
    --------
    >>> import pandas as pd
    >>>
    >>> with FileRecallManager(test.xlsx) as f:
    >>>    display(pd.read_excel(f, engine="calamine"))
    """

    RETRY_MAX = 5
    RETRY_DELAY = 10
    READ_MODE = "r+b"

    def __init__(
        self: Self,
        filename: Path | str,
    ):
        self.filename = Path(filename)
        self.fileattributes = FileAttributes(self.filename)

    def __enter__(self: Self) -> Path:
        download_offline_file(
            self.filename,
            self.RETRY_MAX,
            self.RETRY_DELAY,
            self.READ_MODE,
        )

        return self.filename

    def __exit__(self, exc_type, exc_val, exc_tb):
        pass

download_offline_file(file, RETRY_MAX=5, RETRY_DELAY=10, READ_MODE='r+b')

Trigger download from cloud storage for a single file.

Source code in src/file_attributes/utils.py
def download_offline_file(
    file: Path,
    RETRY_MAX: int = 5,
    RETRY_DELAY: int = 10,
    READ_MODE: str = "r+b",
) -> None:
    """Trigger download from cloud storage for a single file."""
    fileattributes = FileAttributes(file)

    # Check if file is available on the drive, otherwise trigger its download.
    # Retry several times
    if fileattributes.in_cloud:
        for test_counter in range(RETRY_MAX):
            try:
                with builtins.open(file, READ_MODE):
                    return
            except OSError:  # noqa: PERF203
                if test_counter == RETRY_MAX - 1:
                    raise OSError(
                        f"Unable to retrieve {file.as_posix()} from cloud storage. Retry policy exceeded.",
                    ) from None
                time.sleep(RETRY_DELAY)

download_offline_files_parallel(to_download, RETRY_MAX=5, RETRY_DELAY=10, READ_MODE='r+b', max_workers=4)

Trigger download from cloud storage for all provided files in parallel.

Parameters

to_download : list[str | Path] List of files to ensure are available on HDD. RETRY_MAX : int, optional Amount of times to try and trigger the download, by default 5. RETRY_DELAY : int, optional Amount of time to wait between two tries, by default 10 seconds. READ_MODE : str, optional Read mode to be used by open(file, READ_MODE) to trigger the data access, by default "r+b". max_workers : int, optional Maximum number of threads to use for parallel processing, by default 4.

Raises

OSError If FileAttributes.in_cloud does not shift to False (= Available on HDD) after the amount of tries is larger than RETRY_MAX, then fail.

Source code in src/file_attributes/utils.py
def download_offline_files_parallel(
    to_download: list[str | Path],
    RETRY_MAX: int = 5,
    RETRY_DELAY: int = 10,
    READ_MODE: str = "r+b",
    max_workers: int = 4,
) -> None:
    """Trigger download from cloud storage for all provided files in parallel.

    Parameters
    ----------
    to_download : list[str | Path]
        List of files to ensure are available on HDD.
    RETRY_MAX : int, optional
        Amount of times to try and trigger the download, by default 5.
    RETRY_DELAY : int, optional
        Amount of time to wait between two tries, by default 10 seconds.
    READ_MODE : str, optional
        Read mode to be used by open(file, READ_MODE) to trigger the data access, by default "r+b".
    max_workers : int, optional
        Maximum number of threads to use for parallel processing, by default 4.

    Raises
    ------
    OSError
        If FileAttributes.in_cloud does not shift to False (= Available on HDD)
        after the amount of tries is larger than RETRY_MAX, then fail.
    """

    if isinstance(to_download, (str, Path)):
        download_offline_file(Path(to_download), RETRY_MAX, RETRY_DELAY, READ_MODE)

    elif isinstance(to_download, list) and len(to_download) == 1:
        download_offline_file(Path(to_download[0]), RETRY_MAX, RETRY_DELAY, READ_MODE)

    elif isinstance(to_download, list) and len(to_download) > 1:
        _paths = [Path(x) for x in to_download]
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = [
                executor.submit(download_offline_file, file, RETRY_MAX, RETRY_DELAY, READ_MODE) for file in _paths
            ]
            for future in as_completed(futures):
                future.result()  # This will raise an exception if the future raised one

    else:
        raise ValueError(f"Invalid type for to_download: {type(to_download)}")

download_offline_files_sequential(to_download, RETRY_MAX=5, RETRY_DELAY=10, READ_MODE='r+b')

Trigger download from cloud storage for all provided files.

Parameters

to_download : list[str | Path] List of files to ensure are available on HDD RETRY_MAX : int, by default = 5 Amount of times to try and trigger the download RETRY_DELAY : int, by default = 10 Amount of time to wait between two tries READ_MODE : str, by default = r+b Read mode to be used by open(file, READ_MODE) to trigger the data_access It should not matter, but just in case.

See Also

FileAttributes

Raises

OSError If FileAttributes.in_cloud does not shift to False (= Available on HDD) after amount of tries is larger then RETRY_MAX, then fail.

Source code in src/file_attributes/utils.py
def download_offline_files_sequential(
    to_download: list[str | Path],
    RETRY_MAX: int = 5,
    RETRY_DELAY: int = 10,
    READ_MODE: str = "r+b",
) -> None:
    """Trigger download from cloud storage for all provided files.

    Parameters
    ----------
    to_download : list[str | Path]
        List of files to ensure are available on HDD
    RETRY_MAX : int, by default = 5
        Amount of times to try and trigger the download
    RETRY_DELAY : int, by default = 10
        Amount of time to wait between two tries
    READ_MODE : str, by default = r+b
        Read mode to be used by open(file, READ_MODE) to trigger the data_access
        It should not matter, but just in case.

    See Also
    --------
    FileAttributes

    Raises
    ------
    OSError
        If FileAttributes.in_cloud does not shift to False (= Available on HDD)
        after amount of tries is larger then RETRY_MAX, then fail.
    """

    for file in [Path(x) for x in to_download]:
        download_offline_file(file, RETRY_MAX, RETRY_DELAY, READ_MODE)