Struct: Options in Python
Structs are commonly used to format data sent between services over a network.
A struct contains types organized in continuous memmory.
In c/c++, structs can be directly created using the ‘struct’ keyword.
Structs can be transformed to/from their continuous byte representations.
This allows for a consise representation in code.
Python does not contain a built-in ‘struct’ object.
There are a number of ways to replicate the functionality of structs in python.
Example c struct.
1
2
3
4
5
6
|
struct Message {
byte message_type;
char message_description[50];
uint32 message_number;
};
Message myMessage = {7, "useless message", 500}
|
Method 1: Struct Module
1
2
3
4
5
6
7
8
9
10
11
|
import struct
from io import BytesIO
# from python types to bytes
myMessage = struct.pack('>B50sI', 7, b"useless message", 500)
# from bytes to python types
stream = BytesIO(myMessage)
message_type = struct.unpack('>B', stream.read(1))[0]
message_description = struct.unpack('>50s', stream.read(50))[0]
message_number = struct.unpack('>I', stream.read(4))[0]
|
Pros: Single line, fast.
Cons: No syntax highlighting, poor readability (requires format character knowledge),
Requires double definition for encoder and decoder
Method 2: Ctypes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
from ctypes import BigEndianStructure, c_ubyte, c_uint32, c_char
# BigEndianStructure matches struct '>' symbol for endianness
class Message(BigEndianStructure):
_pack_ = 1
_fields_ = [('message_type', c_ubyte),
('message_decription', c_char * 50),
('message_number', c_uint32)]
# from python types to bytes
myMessage = Message(7, b"useless message", 500)
message_bytes = bytearray(myMessage)
# from bytes to python types
myMessageNew = Message.from_buffer(message_bytes)
myMessageNew.message_type
myMessageNew.message_decription
myMessageNew.message_number
|
Pros: Single class defines encode/decode, fast, readable types
Cons: No syntax property autocomplete, poor readability
Method 3: Dataclass (With Added Methods)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
from ctypes import c_ubyte, c_uint32, c_char
# "Struct" class defined below
@dataclass
class Message(Struct):
message_type: c_ubyte
message_description: c_char * 50
message_number: c_uint32
# from python types to bytes
myMessage = Message(7, b"useless message", 500)
message_bytes = myMessage.to_bytes()
# from bytes to python types
decode = Message.from_bytes(message_bytes)
decode.message_type
decode.message_description
decode.message_number
|
Pros: Single class defines encode/decode, readable types, good readability, syntax property autocomplete
Cons: Slower that c based solutions
Custom Struct Class: Used in Method 3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
"""python STRUCT like class for encoding / decoding structs with minimal boiler plate code """
from io import BytesIO
from functools import lru_cache
from collections.abc import Iterable
from dataclasses import Field, dataclass, fields
from typing import get_type_hints
from ctypes import sizeof
@dataclass
class Struct:
"""Struct class for encoding/decoding between python types"""
def __setattr__(self, name, value):
"""generic assignment converter for ctypes"""
_type = get_type_hints(type(self))[name] # dictionary lookup
super().__setattr__(name, value if isinstance(value, _type) else (_type(*value) if isinstance(value, Iterable) else _type(value)))
def to_bytes(self, endian=-1):
"""convert Struct to bytes"""
_values = []
for field in fields(self):
# don't reorder strings
order = 1 if "c_char_Array" in field.type.__name__ else endian
_values.append(bytearray(getattr(self, field.name))[::order])
return b''.join(_values)
@classmethod
def from_bytes(cls, data, endian=-1):
"""convert bytes to Struct"""
if len(data) != cls.size:
raise ValueError(f"invalid bytes size: class: {cls} input: {len(data)} expected: {cls.size}")
kwargs = {}
with BytesIO(data) as stream:
for field in fields(cls):
"""don't swap the order of character arrays"""
order = 1 if "c_char_Array" in field.type.__name__ else endian
kwargs[field.name] = field.type.from_buffer_copy(stream.read(sizeof(field.type))[::order])
return cls(**kwargs)
@classmethod
def default(cls):
"""assumes ctype default constructor exists"""
return cls(**{field.name: field.type() for field in fields(cls)})
@classmethod
@property
@lru_cache(maxsize=None)
def size(cls) -> int:
"""size in bytes"""
return sum([sizeof(field.type) for field in fields(cls)])
def fields(self) -> tuple[Field, ...]:
"""fields"""
return fields(self)
def __str__(self) -> str:
"""formated string representation"""
data = "\n ".join([f"{field.name} : {getattr(self, field.name)}" for field in fields(self)])
return f"{type(self).__name__}\n {data}\n"
def line_log(self) -> str:
return ", ".join([f"{getattr(self, field.name).value}" for field in fields(self)])
|