From e62fba3d8295c199010b6613136c6523892bd18c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Mach=C3=A1=C4=8Dek?= Date: Fri, 2 Jun 2023 11:24:41 +0200 Subject: [PATCH] line packet commited originally from ELITR -- TODO -- change comments --- line_packet.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 line_packet.py diff --git a/line_packet.py b/line_packet.py new file mode 100644 index 0000000..364ade2 --- /dev/null +++ b/line_packet.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +"""Functions for sending and receiving individual lines of text over a socket. + +Used by marian-server-server.py to communicate with the Marian worker. + +A line is transmitted using one or more fixed-size packets of UTF-8 bytes +containing: + + - Zero or more bytes of UTF-8, excluding \n and \0, followed by + + - Zero or more \0 bytes as required to pad the packet to PACKET_SIZE + +""" + +PACKET_SIZE = 65536 + + +def send_one_line(socket, text): + """Sends a line of text over the given socket. + + The 'text' argument should contain a single line of text (line break + characters are optional). Line boundaries are determined by Python's + str.splitlines() function [1]. We also count '\0' as a line terminator. + If 'text' contains multiple lines then only the first will be sent. + + If the send fails then an exception will be raised. + + [1] https://docs.python.org/3.5/library/stdtypes.html#str.splitlines + + Args: + socket: a socket object. + text: string containing a line of text for transmission. + """ + text.replace('\0', '\n') + lines = text.splitlines() + first_line = '' if len(lines) == 0 else lines[0] + # TODO Is there a better way of handling bad input than 'replace'? + data = first_line.encode('utf-8', errors='replace') + b'\n\0' + for offset in range(0, len(data), PACKET_SIZE): + bytes_remaining = len(data) - offset + if bytes_remaining < PACKET_SIZE: + padding_length = PACKET_SIZE - bytes_remaining + packet = data[offset:] + b'\0' * padding_length + else: + packet = data[offset:offset+PACKET_SIZE] + socket.sendall(packet) + + +def receive_one_line(socket): + """Receives a line of text from the given socket. + + This function will (attempt to) receive a single line of text. If data is + currently unavailable then it will block until data becomes available or + the sender has closed the connection (in which case it will return an + empty string). + + The string should not contain any newline characters, but if it does then + only the first line will be returned. + + Args: + socket: a socket object. + + Returns: + A string representing a single line with a terminating newline or + None if the connection has been closed. + """ + data = b'' + while True: + packet = socket.recv(PACKET_SIZE) + if not packet: # Connection has been closed. + return None + data += packet + if b'\0' in packet: + break + # TODO Is there a better way of handling bad input than 'replace'? + text = data.decode('utf-8', errors='replace').strip('\0') + lines = text.split('\n') + return lines[0] + '\n' + + +def receive_lines(socket): + try: + data = socket.recv(PACKET_SIZE) + except BlockingIOError: + return [] + if data is None: # Connection has been closed. + return None + # TODO Is there a better way of handling bad input than 'replace'? + text = data.decode('utf-8', errors='replace').strip('\0') + lines = text.split('\n') + if len(lines)==1 and not lines[0]: + return None + return lines