|
23 | 23 | """
|
24 | 24 | import inspect
|
25 | 25 | import math
|
| 26 | +import os |
26 | 27 | import random
|
27 | 28 | import re
|
28 | 29 | import shlex
|
|
35 | 36 | from decimal import Decimal, getcontext
|
36 | 37 | from os import getenv as env
|
37 | 38 | from subprocess import PIPE, STDOUT
|
38 |
| -from typing import Sequence, List, Union, Tuple, Type, Dict, Any, Iterable, Optional |
| 39 | +from typing import Sequence, List, Union, Tuple, Type, Dict, Any, Iterable, Optional, BinaryIO, Generator |
39 | 40 |
|
40 | 41 | from privex.helpers import settings
|
41 | 42 |
|
|
51 | 52 | """All characters from a-z, A-Z, and 0-9 - for random strings where there's no risk of user font confusion"""
|
52 | 53 |
|
53 | 54 |
|
54 |
| - |
55 |
| - |
56 | 55 | def random_str(size: int = 50, chars: Sequence = SAFE_CHARS) -> str:
|
57 | 56 | """
|
58 | 57 | Generate a random string of arbitrary length using a given character set (string / list / tuple). Uses Python's
|
@@ -118,9 +117,6 @@ def empty(v, zero: bool = False, itr: bool = False) -> bool:
|
118 | 117 | return False
|
119 | 118 |
|
120 | 119 |
|
121 |
| - |
122 |
| - |
123 |
| - |
124 | 120 | def empty_if(v: V, is_empty: K = None, not_empty: T = USE_ORIG_VAR, **kwargs) -> Union[T, K, V]:
|
125 | 121 | """
|
126 | 122 | Syntactic sugar for ``x if empty(y) else z``. If ``not_empty`` isn't specified, then the original value ``v``
|
@@ -757,8 +753,6 @@ def human_name(class_name: Union[str, bytes, callable, Type[object]]) -> str:
|
757 | 753 | return ''.join(new_name).strip()
|
758 | 754 |
|
759 | 755 |
|
760 |
| - |
761 |
| - |
762 | 756 | def shell_quote(*args: str) -> str:
|
763 | 757 | """
|
764 | 758 | Takes command line arguments as positional args, and properly quotes each argument to make it safe to
|
@@ -830,6 +824,146 @@ def call_sys(proc, *args, write: STRBYTES = None, **kwargs) -> Tuple[bytes, byte
|
830 | 824 | return stdout, stderr
|
831 | 825 |
|
832 | 826 |
|
| 827 | +def reverse_io(f: BinaryIO, blocksize: int = 4096) -> Generator[bytes, None, None]: |
| 828 | + """ |
| 829 | + Read file as series of blocks from end of file to start. |
| 830 | +
|
| 831 | + The data itself is in normal order, only the order of the blocks is reversed. |
| 832 | + ie. "hello world" -> ["ld","wor", "lo ", "hel"] |
| 833 | + Note that the file must be opened in binary mode. |
| 834 | +
|
| 835 | + Original source: https://stackoverflow.com/a/136354 |
| 836 | + """ |
| 837 | + if 'b' not in f.mode.lower(): |
| 838 | + raise Exception("File must be opened using binary mode.") |
| 839 | + size = os.stat(f.name).st_size |
| 840 | + fullblocks, lastblock = divmod(size, blocksize) |
| 841 | + |
| 842 | + # The first(end of file) block will be short, since this leaves |
| 843 | + # the rest aligned on a blocksize boundary. This may be more |
| 844 | + # efficient than having the last (first in file) block be short |
| 845 | + f.seek(-lastblock, 2) |
| 846 | + yield f.read(lastblock) |
| 847 | + |
| 848 | + for i in range(fullblocks - 1, -1, -1): |
| 849 | + f.seek(i * blocksize) |
| 850 | + yield f.read(blocksize) |
| 851 | + |
| 852 | + |
| 853 | +def io_tail(f: BinaryIO, nlines: int = 20, bsz: int = 4096) -> Generator[List[str], None, None]: |
| 854 | + """ |
| 855 | + NOTE: If you're only loading a small amount of lines, e.g. less than 1MB, consider using the much easier :func:`.tail` |
| 856 | + function - it only requires one call and returns the lines as a singular, correctly ordered list. |
| 857 | + |
| 858 | + This is a generator function which works similarly to ``tail`` on UNIX systems. It efficiently retrieves lines in reverse order using |
| 859 | + the passed file handle ``f``. |
| 860 | + |
| 861 | + WARNING: This function is a generator which returns "chunks" of lines - while the lines within each chunk are in the correct order, |
| 862 | + the chunks themselves are backwards, i.e. each chunk retrieves lines prior to the previous chunk. |
| 863 | + |
| 864 | + This function was designed as a generator to allow for **memory efficient handling of large files**, and tailing large amounts of lines. |
| 865 | + It only loads ``bsz`` bytes from the file handle into memory with each iteration, allowing you to process each chunk of lines as |
| 866 | + they're read from the file, instead of having to load all ``nlines`` lines into memory at once. |
| 867 | + |
| 868 | + To ensure your retrieved lines are in the correct order, with each iteration you must PREPEND the outputted chunk to your final result, |
| 869 | + rather than APPEND. Example:: |
| 870 | + |
| 871 | + >>> from privex.helpers import io_tail |
| 872 | + >>> lines = [] |
| 873 | + >>> with open('/tmp/example', 'rb') as fp: |
| 874 | + ... # We prepend each chunk from 'io_tail' to our result variable 'lines' |
| 875 | + ... for chunk in io_tail(fp, nlines=10): |
| 876 | + ... lines = chunk + lines |
| 877 | + >>> print('\\n'.join(lines)) |
| 878 | +
|
| 879 | + Modified to be more memory efficient, but originally based on this SO code snippet: https://stackoverflow.com/a/136354 |
| 880 | +
|
| 881 | + :param BinaryIO f: An open file handle for the file to tail, must be in **binary mode** (e.g. ``rb``) |
| 882 | + :param int nlines: Total number of lines to retrieve from the end of the file |
| 883 | + :param int bsz: Block size (in bytes) to load with each iteration (default: 4096 bytes). DON'T CHANGE UNLESS YOU |
| 884 | + UNDERSTAND WHAT THIS MEANS. |
| 885 | + :return Generator chunks: Generates chunks (in reverse order) of correctly ordered lines as ``List[str]`` |
| 886 | + """ |
| 887 | + buf = '' |
| 888 | + lines_read = 0 |
| 889 | + # Load 4096 bytes at a time, from file handle 'f' in reverse |
| 890 | + for block in reverse_io(f, blocksize=int(bsz)): |
| 891 | + # Incase we had a partial line during our previous iteration, we append leftover bytes from |
| 892 | + # the previous iteration to the end of the newly loaded block |
| 893 | + buf = stringify(block) + buf |
| 894 | + lines = buf.splitlines() |
| 895 | + |
| 896 | + # Return all lines except the first (since may be partial) |
| 897 | + if lines: |
| 898 | + # First line may not be complete, since we're loading blocks from the bottom of the file. |
| 899 | + # We yield from line 2 onwards, storing line 1 back into 'buf' to be appended to the next block. |
| 900 | + result = lines[1:] |
| 901 | + res_lines = len(result) |
| 902 | + |
| 903 | + # If we've retrieved enough lines to meet the requested 'nlines', then we just calculate how many |
| 904 | + # more lines the caller wants, yield them, then return to finish execution. |
| 905 | + if (lines_read + res_lines) >= nlines: |
| 906 | + rem_lines = nlines - lines_read |
| 907 | + lines_read += rem_lines |
| 908 | + yield result[-rem_lines:] |
| 909 | + return |
| 910 | + |
| 911 | + # Yield the lines we've loaded so far |
| 912 | + if res_lines > 0: |
| 913 | + lines_read += res_lines |
| 914 | + yield result |
| 915 | + |
| 916 | + # Replace the buffer with the discarded 1st line from earlier. |
| 917 | + buf = lines[0] |
| 918 | + # If the loop is broken, it means we've probably reached the start of the file, and we're missing the first line... |
| 919 | + # Thus we have to yield the buffer, which should contain the first line of the file. |
| 920 | + yield [buf] |
| 921 | + |
| 922 | + |
| 923 | +def tail(filename: str, nlines: int = 20, bsz: int = 4096) -> List[str]: |
| 924 | + """ |
| 925 | + Pure python equivalent of the UNIX ``tail`` command. Simply pass a filename and the number of lines you want to load |
| 926 | + from the end of the file, and a ``List[str]`` of lines (in forward order) will be returned. |
| 927 | + |
| 928 | + This function is simply a wrapper for the highly efficient :func:`.io_tail`, designed for usage with a small (<10,000) amount |
| 929 | + of lines to be tailed. To allow for the lines to be returned in the correct order, it must load all ``nlines`` lines into memory |
| 930 | + before it can return the data. |
| 931 | + |
| 932 | + If you need to ``tail`` a large amount of data, e.g. 10,000+ lines of a logfile, you should consider using the lower level |
| 933 | + function :func:`.io_tail` - which acts as a generator, only loading a certain amount of bytes into memory per iteration. |
| 934 | + |
| 935 | + Example file ``/tmp/testing``:: |
| 936 | + |
| 937 | + this is an example 1 |
| 938 | + this is an example 2 |
| 939 | + this is an example 3 |
| 940 | + this is an example 4 |
| 941 | + this is an example 5 |
| 942 | + this is an example 6 |
| 943 | + |
| 944 | + Example usage:: |
| 945 | + |
| 946 | + >>> from privex.helpers import tail |
| 947 | + >>> lines = tail('/tmp/testing', nlines=3) |
| 948 | + >>> print("\\n".join(lines)) |
| 949 | + this is an example 4 |
| 950 | + this is an example 5 |
| 951 | + this is an example 6 |
| 952 | + |
| 953 | + |
| 954 | + :param str filename: Path to file to tail. Relative or absolute path. Absolute path is recommended for safety. |
| 955 | + :param int nlines: Total number of lines to retrieve from the end of the file |
| 956 | + :param int bsz: Block size (in bytes) to load with each iteration (default: 4096 bytes). DON'T CHANGE UNLESS YOU |
| 957 | + UNDERSTAND WHAT THIS MEANS. |
| 958 | + :return List[str] lines: The last 'nlines' lines of the file 'filename' - in forward order. |
| 959 | + """ |
| 960 | + res = [] |
| 961 | + with open(filename, 'rb') as fp: |
| 962 | + for chunk in io_tail(f=fp, nlines=nlines, bsz=bsz): |
| 963 | + res = chunk + res |
| 964 | + return res |
| 965 | + |
| 966 | + |
833 | 967 | IS_XARGS = re.compile('^\*([a-zA-Z0-9_])+$')
|
834 | 968 | """Pre-compiled regex for matching catch-all positional argument parameter names like ``*args``"""
|
835 | 969 | IS_XKWARGS = re.compile('^\*\*([a-zA-Z0-9_])+$')
|
|
0 commit comments