import zlib from striprtf.striprtf import rtf_to_text class Text: @staticmethod def decompress_bytes(vf_string): """ Descomprime e retorna BYTES (sem decode/ignore), ideal para RTF/DOCX. REGRA: - Se estiver compactado (zlib) → retorna bytes descompactados - Se NÃO estiver compactado → retorna os bytes originais """ if vf_string is None: return b"" # 1) Se for stream (BLOB) if hasattr(vf_string, "read"): try: vf_string = vf_string.read() except Exception: return b"" if not vf_string: return b"" # 2) Garantir bytes if isinstance(vf_string, str): vf_bytes = vf_string.encode("latin1", errors="ignore") else: try: vf_bytes = bytes(vf_string) except Exception: return b"" # 3) Detectar zlib (header 0x78 0x01/0x9C/0xDA) is_zlib = ( len(vf_bytes) > 2 and vf_bytes[0] == 0x78 and vf_bytes[1] in (0x01, 0x9C, 0xDA) ) # 4) Descompactar se necessário (RETORNA BYTES) if is_zlib: try: return zlib.decompress(vf_bytes) except Exception: # fallback: retorna bytes originais return vf_bytes return vf_bytes @staticmethod def decompress(vf_string): """ Mantido para compatibilidade: retorna STR (uso geral). ATENÇÃO: para RTF/DOCX use decompress_bytes(). """ raw = Text.decompress_bytes(vf_string) if not raw: return "" try: return raw.decode("iso-8859-1", errors="ignore") except Exception: return "" @staticmethod def compress(text, *, encoding: str = "iso-8859-1"): if text is None or text == "": return b"" if hasattr(text, "read"): raw = text.read() else: raw = text if isinstance(raw, str): raw_bytes = raw.encode(encoding, errors="ignore") else: raw_bytes = bytes(raw) return zlib.compress(raw_bytes) @staticmethod def to_text(raw_text: str) -> str: if not raw_text: return "" if raw_text.strip().startswith("{\\rtf"): try: return rtf_to_text(raw_text).strip() except Exception: return raw_text return raw_text.strip()