Python: Structures

Posted on Jul 16, 2022

Struct: Options in Python

Structs are commonly used to format data sent between services over a network.
A struct contains types organized in continuous memmory.
In c/c++, structs can be directly created using the ‘struct’ keyword.

Structs can be transformed to/from their continuous byte representations.
This allows for a consise representation in code.

Python does not contain a built-in ‘struct’ object.
There are a number of ways to replicate the functionality of structs in python.

Example c struct.

1
2
3
4
5
6
struct Message {
    byte message_type;
    char message_description[50];
    uint32 message_number;
};
Message myMessage = {7, "useless message", 500}

Method 1: Struct Module

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
import struct
from io import BytesIO

# from python types to bytes
myMessage = struct.pack('>B50sI', 7, b"useless message", 500)

# from bytes to python types
stream = BytesIO(myMessage)
message_type = struct.unpack('>B', stream.read(1))[0]
message_description = struct.unpack('>50s', stream.read(50))[0]
message_number = struct.unpack('>I', stream.read(4))[0]

Pros: Single line, fast.
Cons: No syntax highlighting, poor readability (requires format character knowledge),
Requires double definition for encoder and decoder


Method 2: Ctypes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
from ctypes import BigEndianStructure, c_ubyte, c_uint32, c_char

# BigEndianStructure matches struct '>' symbol for endianness
class Message(BigEndianStructure):
    _pack_ = 1
    _fields_ = [('message_type', c_ubyte),
                ('message_decription', c_char * 50),
                ('message_number', c_uint32)]

# from python types to bytes
myMessage = Message(7, b"useless message", 500)
message_bytes = bytearray(myMessage)

# from bytes to python types
myMessageNew = Message.from_buffer(message_bytes)
myMessageNew.message_type
myMessageNew.message_decription
myMessageNew.message_number

Pros: Single class defines encode/decode, fast, readable types
Cons: No syntax property autocomplete, poor readability


Method 3: Dataclass (With Added Methods)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
from ctypes import c_ubyte, c_uint32, c_char
# "Struct" class defined below

@dataclass
class Message(Struct):
    message_type: c_ubyte
    message_description: c_char * 50
    message_number: c_uint32

# from python types to bytes
myMessage = Message(7, b"useless message", 500)
message_bytes = myMessage.to_bytes()

# from bytes to python types
decode = Message.from_bytes(message_bytes)
decode.message_type
decode.message_description
decode.message_number

Pros: Single class defines encode/decode, readable types, good readability, syntax property autocomplete
Cons: Slower that c based solutions


Custom Struct Class: Used in Method 3

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""python STRUCT like class for encoding / decoding structs with minimal boiler plate code """  

from io import BytesIO
from functools import lru_cache
from collections.abc import Iterable
from dataclasses import Field, dataclass, fields

from typing import get_type_hints
from ctypes import sizeof

@dataclass
class Struct:
    """Struct class for encoding/decoding between python types"""

    def __setattr__(self, name, value):
        """generic assignment converter for ctypes"""
        _type = get_type_hints(type(self))[name] # dictionary lookup
        super().__setattr__(name, value if isinstance(value, _type) else (_type(*value) if isinstance(value, Iterable) else _type(value)))

    def to_bytes(self, endian=-1):
        """convert Struct to bytes"""
        _values = []
        for field in fields(self):
            # don't reorder strings
            order = 1 if "c_char_Array" in field.type.__name__ else endian
            _values.append(bytearray(getattr(self, field.name))[::order])

        return b''.join(_values)

    @classmethod
    def from_bytes(cls, data, endian=-1):
        """convert bytes to Struct"""
        if len(data) != cls.size:
            raise ValueError(f"invalid bytes size: class: {cls} input: {len(data)} expected: {cls.size}")

        kwargs = {}
        with BytesIO(data) as stream:
            for field in fields(cls):
                """don't swap the order of character arrays"""
                order = 1 if "c_char_Array" in field.type.__name__ else endian
                kwargs[field.name] = field.type.from_buffer_copy(stream.read(sizeof(field.type))[::order])

        return cls(**kwargs)

    @classmethod
    def default(cls):
        """assumes ctype default constructor exists"""
        return cls(**{field.name: field.type() for field in fields(cls)})

    @classmethod
    @property
    @lru_cache(maxsize=None)
    def size(cls) -> int:
        """size in bytes"""
        return sum([sizeof(field.type) for field in fields(cls)])

    def fields(self) -> tuple[Field, ...]:
        """fields"""
        return fields(self)

    def __str__(self) -> str:
        """formated string representation"""
        data = "\n  ".join([f"{field.name} : {getattr(self, field.name)}" for field in fields(self)])
        return f"{type(self).__name__}\n  {data}\n"
    
    def line_log(self) -> str:
        return ", ".join([f"{getattr(self, field.name).value}" for field in fields(self)])