diff options
Diffstat (limited to 'third_party/python/aiohttp/aiohttp/streams.py')
-rw-r--r-- | third_party/python/aiohttp/aiohttp/streams.py | 647 |
1 files changed, 647 insertions, 0 deletions
diff --git a/third_party/python/aiohttp/aiohttp/streams.py b/third_party/python/aiohttp/aiohttp/streams.py new file mode 100644 index 0000000000..42970b531d --- /dev/null +++ b/third_party/python/aiohttp/aiohttp/streams.py @@ -0,0 +1,647 @@ +import asyncio +import collections +import warnings +from typing import Awaitable, Callable, Generic, List, Optional, Tuple, TypeVar + +from .base_protocol import BaseProtocol +from .helpers import BaseTimerContext, set_exception, set_result +from .log import internal_logger + +try: # pragma: no cover + from typing import Deque +except ImportError: + from typing_extensions import Deque + +__all__ = ( + "EMPTY_PAYLOAD", + "EofStream", + "StreamReader", + "DataQueue", + "FlowControlDataQueue", +) + +_T = TypeVar("_T") + + +class EofStream(Exception): + """eof stream indication.""" + + +class AsyncStreamIterator(Generic[_T]): + def __init__(self, read_func: Callable[[], Awaitable[_T]]) -> None: + self.read_func = read_func + + def __aiter__(self) -> "AsyncStreamIterator[_T]": + return self + + async def __anext__(self) -> _T: + try: + rv = await self.read_func() + except EofStream: + raise StopAsyncIteration + if rv == b"": + raise StopAsyncIteration + return rv + + +class ChunkTupleAsyncStreamIterator: + def __init__(self, stream: "StreamReader") -> None: + self._stream = stream + + def __aiter__(self) -> "ChunkTupleAsyncStreamIterator": + return self + + async def __anext__(self) -> Tuple[bytes, bool]: + rv = await self._stream.readchunk() + if rv == (b"", False): + raise StopAsyncIteration + return rv + + +class AsyncStreamReaderMixin: + def __aiter__(self) -> AsyncStreamIterator[bytes]: + return AsyncStreamIterator(self.readline) # type: ignore + + def iter_chunked(self, n: int) -> AsyncStreamIterator[bytes]: + """Returns an asynchronous iterator that yields chunks of size n. + + Python-3.5 available for Python 3.5+ only + """ + return AsyncStreamIterator(lambda: self.read(n)) # type: ignore + + def iter_any(self) -> AsyncStreamIterator[bytes]: + """Returns an asynchronous iterator that yields all the available + data as soon as it is received + + Python-3.5 available for Python 3.5+ only + """ + return AsyncStreamIterator(self.readany) # type: ignore + + def iter_chunks(self) -> ChunkTupleAsyncStreamIterator: + """Returns an asynchronous iterator that yields chunks of data + as they are received by the server. The yielded objects are tuples + of (bytes, bool) as returned by the StreamReader.readchunk method. + + Python-3.5 available for Python 3.5+ only + """ + return ChunkTupleAsyncStreamIterator(self) # type: ignore + + +class StreamReader(AsyncStreamReaderMixin): + """An enhancement of asyncio.StreamReader. + + Supports asynchronous iteration by line, chunk or as available:: + + async for line in reader: + ... + async for chunk in reader.iter_chunked(1024): + ... + async for slice in reader.iter_any(): + ... + + """ + + total_bytes = 0 + + def __init__( + self, + protocol: BaseProtocol, + limit: int, + *, + timer: Optional[BaseTimerContext] = None, + loop: Optional[asyncio.AbstractEventLoop] = None + ) -> None: + self._protocol = protocol + self._low_water = limit + self._high_water = limit * 2 + if loop is None: + loop = asyncio.get_event_loop() + self._loop = loop + self._size = 0 + self._cursor = 0 + self._http_chunk_splits = None # type: Optional[List[int]] + self._buffer = collections.deque() # type: Deque[bytes] + self._buffer_offset = 0 + self._eof = False + self._waiter = None # type: Optional[asyncio.Future[None]] + self._eof_waiter = None # type: Optional[asyncio.Future[None]] + self._exception = None # type: Optional[BaseException] + self._timer = timer + self._eof_callbacks = [] # type: List[Callable[[], None]] + + def __repr__(self) -> str: + info = [self.__class__.__name__] + if self._size: + info.append("%d bytes" % self._size) + if self._eof: + info.append("eof") + if self._low_water != 2 ** 16: # default limit + info.append("low=%d high=%d" % (self._low_water, self._high_water)) + if self._waiter: + info.append("w=%r" % self._waiter) + if self._exception: + info.append("e=%r" % self._exception) + return "<%s>" % " ".join(info) + + def get_read_buffer_limits(self) -> Tuple[int, int]: + return (self._low_water, self._high_water) + + def exception(self) -> Optional[BaseException]: + return self._exception + + def set_exception(self, exc: BaseException) -> None: + self._exception = exc + self._eof_callbacks.clear() + + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_exception(waiter, exc) + + waiter = self._eof_waiter + if waiter is not None: + self._eof_waiter = None + set_exception(waiter, exc) + + def on_eof(self, callback: Callable[[], None]) -> None: + if self._eof: + try: + callback() + except Exception: + internal_logger.exception("Exception in eof callback") + else: + self._eof_callbacks.append(callback) + + def feed_eof(self) -> None: + self._eof = True + + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_result(waiter, None) + + waiter = self._eof_waiter + if waiter is not None: + self._eof_waiter = None + set_result(waiter, None) + + for cb in self._eof_callbacks: + try: + cb() + except Exception: + internal_logger.exception("Exception in eof callback") + + self._eof_callbacks.clear() + + def is_eof(self) -> bool: + """Return True if 'feed_eof' was called.""" + return self._eof + + def at_eof(self) -> bool: + """Return True if the buffer is empty and 'feed_eof' was called.""" + return self._eof and not self._buffer + + async def wait_eof(self) -> None: + if self._eof: + return + + assert self._eof_waiter is None + self._eof_waiter = self._loop.create_future() + try: + await self._eof_waiter + finally: + self._eof_waiter = None + + def unread_data(self, data: bytes) -> None: + """rollback reading some data from stream, inserting it to buffer head.""" + warnings.warn( + "unread_data() is deprecated " + "and will be removed in future releases (#3260)", + DeprecationWarning, + stacklevel=2, + ) + if not data: + return + + if self._buffer_offset: + self._buffer[0] = self._buffer[0][self._buffer_offset :] + self._buffer_offset = 0 + self._size += len(data) + self._cursor -= len(data) + self._buffer.appendleft(data) + self._eof_counter = 0 + + # TODO: size is ignored, remove the param later + def feed_data(self, data: bytes, size: int = 0) -> None: + assert not self._eof, "feed_data after feed_eof" + + if not data: + return + + self._size += len(data) + self._buffer.append(data) + self.total_bytes += len(data) + + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_result(waiter, None) + + if self._size > self._high_water and not self._protocol._reading_paused: + self._protocol.pause_reading() + + def begin_http_chunk_receiving(self) -> None: + if self._http_chunk_splits is None: + if self.total_bytes: + raise RuntimeError( + "Called begin_http_chunk_receiving when" "some data was already fed" + ) + self._http_chunk_splits = [] + + def end_http_chunk_receiving(self) -> None: + if self._http_chunk_splits is None: + raise RuntimeError( + "Called end_chunk_receiving without calling " + "begin_chunk_receiving first" + ) + + # self._http_chunk_splits contains logical byte offsets from start of + # the body transfer. Each offset is the offset of the end of a chunk. + # "Logical" means bytes, accessible for a user. + # If no chunks containig logical data were received, current position + # is difinitely zero. + pos = self._http_chunk_splits[-1] if self._http_chunk_splits else 0 + + if self.total_bytes == pos: + # We should not add empty chunks here. So we check for that. + # Note, when chunked + gzip is used, we can receive a chunk + # of compressed data, but that data may not be enough for gzip FSM + # to yield any uncompressed data. That's why current position may + # not change after receiving a chunk. + return + + self._http_chunk_splits.append(self.total_bytes) + + # wake up readchunk when end of http chunk received + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_result(waiter, None) + + async def _wait(self, func_name: str) -> None: + # StreamReader uses a future to link the protocol feed_data() method + # to a read coroutine. Running two read coroutines at the same time + # would have an unexpected behaviour. It would not possible to know + # which coroutine would get the next data. + if self._waiter is not None: + raise RuntimeError( + "%s() called while another coroutine is " + "already waiting for incoming data" % func_name + ) + + waiter = self._waiter = self._loop.create_future() + try: + if self._timer: + with self._timer: + await waiter + else: + await waiter + finally: + self._waiter = None + + async def readline(self) -> bytes: + if self._exception is not None: + raise self._exception + + line = [] + line_size = 0 + not_enough = True + + while not_enough: + while self._buffer and not_enough: + offset = self._buffer_offset + ichar = self._buffer[0].find(b"\n", offset) + 1 + # Read from current offset to found b'\n' or to the end. + data = self._read_nowait_chunk(ichar - offset if ichar else -1) + line.append(data) + line_size += len(data) + if ichar: + not_enough = False + + if line_size > self._high_water: + raise ValueError("Line is too long") + + if self._eof: + break + + if not_enough: + await self._wait("readline") + + return b"".join(line) + + async def read(self, n: int = -1) -> bytes: + if self._exception is not None: + raise self._exception + + # migration problem; with DataQueue you have to catch + # EofStream exception, so common way is to run payload.read() inside + # infinite loop. what can cause real infinite loop with StreamReader + # lets keep this code one major release. + if __debug__: + if self._eof and not self._buffer: + self._eof_counter = getattr(self, "_eof_counter", 0) + 1 + if self._eof_counter > 5: + internal_logger.warning( + "Multiple access to StreamReader in eof state, " + "might be infinite loop.", + stack_info=True, + ) + + if not n: + return b"" + + if n < 0: + # This used to just loop creating a new waiter hoping to + # collect everything in self._buffer, but that would + # deadlock if the subprocess sends more than self.limit + # bytes. So just call self.readany() until EOF. + blocks = [] + while True: + block = await self.readany() + if not block: + break + blocks.append(block) + return b"".join(blocks) + + # TODO: should be `if` instead of `while` + # because waiter maybe triggered on chunk end, + # without feeding any data + while not self._buffer and not self._eof: + await self._wait("read") + + return self._read_nowait(n) + + async def readany(self) -> bytes: + if self._exception is not None: + raise self._exception + + # TODO: should be `if` instead of `while` + # because waiter maybe triggered on chunk end, + # without feeding any data + while not self._buffer and not self._eof: + await self._wait("readany") + + return self._read_nowait(-1) + + async def readchunk(self) -> Tuple[bytes, bool]: + """Returns a tuple of (data, end_of_http_chunk). When chunked transfer + encoding is used, end_of_http_chunk is a boolean indicating if the end + of the data corresponds to the end of a HTTP chunk , otherwise it is + always False. + """ + while True: + if self._exception is not None: + raise self._exception + + while self._http_chunk_splits: + pos = self._http_chunk_splits.pop(0) + if pos == self._cursor: + return (b"", True) + if pos > self._cursor: + return (self._read_nowait(pos - self._cursor), True) + internal_logger.warning( + "Skipping HTTP chunk end due to data " + "consumption beyond chunk boundary" + ) + + if self._buffer: + return (self._read_nowait_chunk(-1), False) + # return (self._read_nowait(-1), False) + + if self._eof: + # Special case for signifying EOF. + # (b'', True) is not a final return value actually. + return (b"", False) + + await self._wait("readchunk") + + async def readexactly(self, n: int) -> bytes: + if self._exception is not None: + raise self._exception + + blocks = [] # type: List[bytes] + while n > 0: + block = await self.read(n) + if not block: + partial = b"".join(blocks) + raise asyncio.IncompleteReadError(partial, len(partial) + n) + blocks.append(block) + n -= len(block) + + return b"".join(blocks) + + def read_nowait(self, n: int = -1) -> bytes: + # default was changed to be consistent with .read(-1) + # + # I believe the most users don't know about the method and + # they are not affected. + if self._exception is not None: + raise self._exception + + if self._waiter and not self._waiter.done(): + raise RuntimeError( + "Called while some coroutine is waiting for incoming data." + ) + + return self._read_nowait(n) + + def _read_nowait_chunk(self, n: int) -> bytes: + first_buffer = self._buffer[0] + offset = self._buffer_offset + if n != -1 and len(first_buffer) - offset > n: + data = first_buffer[offset : offset + n] + self._buffer_offset += n + + elif offset: + self._buffer.popleft() + data = first_buffer[offset:] + self._buffer_offset = 0 + + else: + data = self._buffer.popleft() + + self._size -= len(data) + self._cursor += len(data) + + chunk_splits = self._http_chunk_splits + # Prevent memory leak: drop useless chunk splits + while chunk_splits and chunk_splits[0] < self._cursor: + chunk_splits.pop(0) + + if self._size < self._low_water and self._protocol._reading_paused: + self._protocol.resume_reading() + return data + + def _read_nowait(self, n: int) -> bytes: + """ Read not more than n bytes, or whole buffer if n == -1 """ + chunks = [] + + while self._buffer: + chunk = self._read_nowait_chunk(n) + chunks.append(chunk) + if n != -1: + n -= len(chunk) + if n == 0: + break + + return b"".join(chunks) if chunks else b"" + + +class EmptyStreamReader(AsyncStreamReaderMixin): + def exception(self) -> Optional[BaseException]: + return None + + def set_exception(self, exc: BaseException) -> None: + pass + + def on_eof(self, callback: Callable[[], None]) -> None: + try: + callback() + except Exception: + internal_logger.exception("Exception in eof callback") + + def feed_eof(self) -> None: + pass + + def is_eof(self) -> bool: + return True + + def at_eof(self) -> bool: + return True + + async def wait_eof(self) -> None: + return + + def feed_data(self, data: bytes, n: int = 0) -> None: + pass + + async def readline(self) -> bytes: + return b"" + + async def read(self, n: int = -1) -> bytes: + return b"" + + async def readany(self) -> bytes: + return b"" + + async def readchunk(self) -> Tuple[bytes, bool]: + return (b"", True) + + async def readexactly(self, n: int) -> bytes: + raise asyncio.IncompleteReadError(b"", n) + + def read_nowait(self) -> bytes: + return b"" + + +EMPTY_PAYLOAD = EmptyStreamReader() + + +class DataQueue(Generic[_T]): + """DataQueue is a general-purpose blocking queue with one reader.""" + + def __init__(self, loop: asyncio.AbstractEventLoop) -> None: + self._loop = loop + self._eof = False + self._waiter = None # type: Optional[asyncio.Future[None]] + self._exception = None # type: Optional[BaseException] + self._size = 0 + self._buffer = collections.deque() # type: Deque[Tuple[_T, int]] + + def __len__(self) -> int: + return len(self._buffer) + + def is_eof(self) -> bool: + return self._eof + + def at_eof(self) -> bool: + return self._eof and not self._buffer + + def exception(self) -> Optional[BaseException]: + return self._exception + + def set_exception(self, exc: BaseException) -> None: + self._eof = True + self._exception = exc + + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_exception(waiter, exc) + + def feed_data(self, data: _T, size: int = 0) -> None: + self._size += size + self._buffer.append((data, size)) + + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_result(waiter, None) + + def feed_eof(self) -> None: + self._eof = True + + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_result(waiter, None) + + async def read(self) -> _T: + if not self._buffer and not self._eof: + assert not self._waiter + self._waiter = self._loop.create_future() + try: + await self._waiter + except (asyncio.CancelledError, asyncio.TimeoutError): + self._waiter = None + raise + + if self._buffer: + data, size = self._buffer.popleft() + self._size -= size + return data + else: + if self._exception is not None: + raise self._exception + else: + raise EofStream + + def __aiter__(self) -> AsyncStreamIterator[_T]: + return AsyncStreamIterator(self.read) + + +class FlowControlDataQueue(DataQueue[_T]): + """FlowControlDataQueue resumes and pauses an underlying stream. + + It is a destination for parsed data.""" + + def __init__( + self, protocol: BaseProtocol, limit: int, *, loop: asyncio.AbstractEventLoop + ) -> None: + super().__init__(loop=loop) + + self._protocol = protocol + self._limit = limit * 2 + + def feed_data(self, data: _T, size: int = 0) -> None: + super().feed_data(data, size) + + if self._size > self._limit and not self._protocol._reading_paused: + self._protocol.pause_reading() + + async def read(self) -> _T: + try: + return await super().read() + finally: + if self._size < self._limit and self._protocol._reading_paused: + self._protocol.resume_reading() |