#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Utility Functions
"""
import os
import logging
from collections.abc import Iterable
from typing import Generator, Iterator, List, Set, Tuple
from pdf2image import convert_from_path
from pdf2image.generators import threadsafe
###############################################################################
LOGGER = logging.getLogger(__name__)
###############################################################################
[docs]def get_files(topdir: str, extn: str) -> Generator[str, None, None]:
"""
Search :code:`topdir` recursively for all files with extension :code:`extn`
extension is checked with :code:`str.endswith()`, instead of the supposedly
better :code:`os.path.splitext()`, in order to facilitate the search with
multiple dots in the :code:`extn`
i.e.
:code:`>>> get_files(topdir, ".xyz.txt")`
wouldn't have worked as expected if :code:`splitext()` was used.
Parameters
----------
topdir : str
Path of the directory to search files in
extn : str
Extension to look for
Returns
-------
Generator[str, None, None]
Matching file paths
"""
return (
os.path.join(dirpath, name)
for dirpath, dirnames, files in os.walk(topdir)
for name in files
if name.lower().endswith(extn.lower())
)
###############################################################################
# PDF Utils
[docs]def list_to_range(list_of_int: List[int]) -> List[Tuple[int, int]]:
"""Convert a list of integers into a list of ranges
A range is tuple (start, end)
Parameters
----------
list_of_int : List[int]
List of integers
Returns
-------
List[Tuple[int, int]]
List of ranges
"""
ranges = []
start, end = None, None
last = None
for current in sorted(set(list_of_int)):
if current == int(current):
current = int(current)
else:
continue
if last is None:
start = current
last = current
else:
if current != last + 1:
end = last
ranges.append((start, end))
start = current
last = current
ranges.append((start, last))
return ranges
# Static Name Generator
@threadsafe
def static_generator(prefix):
while True:
yield prefix
###############################################################################