Untitled

 avatar
unknown
python
a month ago
3.2 kB
2
Indexable
def __fast_extract_field(data: bytes, path: str) -> str | int | bool | bytes | None:
    pos  = 0
    path = [ f"\"{field}\"" for field in path.split(".") ]

    def next_byte(cur_pos: int) -> bytes:
        return data[cur_pos:cur_pos + 1]

    def jump_padding(cur_pos: int) -> int:
        while cur_pos < len(data) and next_byte(cur_pos).isspace():
            cur_pos += 1
        
        if next_byte(cur_pos) == b":":
            cur_pos += 1

        while cur_pos < len(data) and next_byte(cur_pos).isspace():
            cur_pos += 1

        return cur_pos

    try:
        for field in path:
            # traverse
            pos  = data.index(field.encode(), pos)
            pos += len(field.encode())
            pos  = jump_padding(pos)

            if field != path[-1]:
                continue
            
            # get value
            match next_byte(pos):
                case b"\"": # str
                    start = pos + 1
                    end   = data.index(b"\"", start)

                    return data[start:end].decode("utf-8")
                case b"{" | b"[": # dict, list (as bytes)
                    start = pos
                    end   = pos + 1

                    stack = [ next_byte(start) ]

                    # traverse to closing bracket
                    while stack:
                        if next_byte(end) in ( b"{", b"[" ):
                            if stack[-1] != b"\"": # don't append opening brackets within strings
                                stack.append(next_byte(end))
                        elif next_byte(end) == b"}" and stack[-1] == b"{":
                            stack.pop()
                        elif next_byte(end) == b"]" and stack[-1] == b"[":
                            stack.pop()
                        elif next_byte(end) == b"\"":
                            if stack[-1] == b"\"":
                                stack.pop()
                            else:
                                stack.append(next_byte(end))

                        end += 1

                    return data[start:end]
                case _: # numerical, boolean, nonetype
                    start = pos
                    end   = pos

                    # while not the end, a space, or closing
                    while end < len(data) and not next_byte(end).isspace() and next_byte not in ( b",", b"}", b"]" ):
                        end += 1

                    value = data[start:end].decode("utf-8")

                    match value:
                        case "null":
                            return None
                        case "true":
                            return True
                        case "false":
                            return False
                        case _: # numerical
                            try:
                                return int(value)
                            except ValueError:
                                try: 
                                    return float(value)
                                except ValueError:
                                    return value
    except (ValueError, IndexError):
        return None
Leave a Comment