97 lines
2.5 KiB
Python
97 lines
2.5 KiB
Python
import zlib
|
|
from striprtf.striprtf import rtf_to_text
|
|
|
|
|
|
class Text:
|
|
@staticmethod
|
|
def decompress_bytes(vf_string):
|
|
"""
|
|
Descomprime e retorna BYTES (sem decode/ignore), ideal para RTF/DOCX.
|
|
|
|
REGRA:
|
|
- Se estiver compactado (zlib) → retorna bytes descompactados
|
|
- Se NÃO estiver compactado → retorna os bytes originais
|
|
"""
|
|
|
|
if vf_string is None:
|
|
return b""
|
|
|
|
# 1) Se for stream (BLOB)
|
|
if hasattr(vf_string, "read"):
|
|
try:
|
|
vf_string = vf_string.read()
|
|
except Exception:
|
|
return b""
|
|
|
|
if not vf_string:
|
|
return b""
|
|
|
|
# 2) Garantir bytes
|
|
if isinstance(vf_string, str):
|
|
vf_bytes = vf_string.encode("latin1", errors="ignore")
|
|
else:
|
|
try:
|
|
vf_bytes = bytes(vf_string)
|
|
except Exception:
|
|
return b""
|
|
|
|
# 3) Detectar zlib (header 0x78 0x01/0x9C/0xDA)
|
|
is_zlib = (
|
|
len(vf_bytes) > 2
|
|
and vf_bytes[0] == 0x78
|
|
and vf_bytes[1] in (0x01, 0x9C, 0xDA)
|
|
)
|
|
|
|
# 4) Descompactar se necessário (RETORNA BYTES)
|
|
if is_zlib:
|
|
try:
|
|
return zlib.decompress(vf_bytes)
|
|
except Exception:
|
|
# fallback: retorna bytes originais
|
|
return vf_bytes
|
|
|
|
return vf_bytes
|
|
|
|
@staticmethod
|
|
def decompress(vf_string):
|
|
"""
|
|
Mantido para compatibilidade: retorna STR (uso geral).
|
|
ATENÇÃO: para RTF/DOCX use decompress_bytes().
|
|
"""
|
|
raw = Text.decompress_bytes(vf_string)
|
|
if not raw:
|
|
return ""
|
|
try:
|
|
return raw.decode("iso-8859-1", errors="ignore")
|
|
except Exception:
|
|
return ""
|
|
|
|
@staticmethod
|
|
def compress(text, *, encoding: str = "iso-8859-1"):
|
|
if text is None or text == "":
|
|
return b""
|
|
|
|
if hasattr(text, "read"):
|
|
raw = text.read()
|
|
else:
|
|
raw = text
|
|
|
|
if isinstance(raw, str):
|
|
raw_bytes = raw.encode(encoding, errors="ignore")
|
|
else:
|
|
raw_bytes = bytes(raw)
|
|
|
|
return zlib.compress(raw_bytes)
|
|
|
|
@staticmethod
|
|
def to_text(raw_text: str) -> str:
|
|
if not raw_text:
|
|
return ""
|
|
|
|
if raw_text.strip().startswith("{\\rtf"):
|
|
try:
|
|
return rtf_to_text(raw_text).strip()
|
|
except Exception:
|
|
return raw_text
|
|
|
|
return raw_text.strip()
|