saas_api/actions/data/text.py

97 lines
2.5 KiB
Python

import zlib
from striprtf.striprtf import rtf_to_text
class Text:
@staticmethod
def decompress_bytes(vf_string):
"""
Descomprime e retorna BYTES (sem decode/ignore), ideal para RTF/DOCX.
REGRA:
- Se estiver compactado (zlib) → retorna bytes descompactados
- Se NÃO estiver compactado → retorna os bytes originais
"""
if vf_string is None:
return b""
# 1) Se for stream (BLOB)
if hasattr(vf_string, "read"):
try:
vf_string = vf_string.read()
except Exception:
return b""
if not vf_string:
return b""
# 2) Garantir bytes
if isinstance(vf_string, str):
vf_bytes = vf_string.encode("latin1", errors="ignore")
else:
try:
vf_bytes = bytes(vf_string)
except Exception:
return b""
# 3) Detectar zlib (header 0x78 0x01/0x9C/0xDA)
is_zlib = (
len(vf_bytes) > 2
and vf_bytes[0] == 0x78
and vf_bytes[1] in (0x01, 0x9C, 0xDA)
)
# 4) Descompactar se necessário (RETORNA BYTES)
if is_zlib:
try:
return zlib.decompress(vf_bytes)
except Exception:
# fallback: retorna bytes originais
return vf_bytes
return vf_bytes
@staticmethod
def decompress(vf_string):
"""
Mantido para compatibilidade: retorna STR (uso geral).
ATENÇÃO: para RTF/DOCX use decompress_bytes().
"""
raw = Text.decompress_bytes(vf_string)
if not raw:
return ""
try:
return raw.decode("iso-8859-1", errors="ignore")
except Exception:
return ""
@staticmethod
def compress(text, *, encoding: str = "iso-8859-1"):
if text is None or text == "":
return b""
if hasattr(text, "read"):
raw = text.read()
else:
raw = text
if isinstance(raw, str):
raw_bytes = raw.encode(encoding, errors="ignore")
else:
raw_bytes = bytes(raw)
return zlib.compress(raw_bytes)
@staticmethod
def to_text(raw_text: str) -> str:
if not raw_text:
return ""
if raw_text.strip().startswith("{\\rtf"):
try:
return rtf_to_text(raw_text).strip()
except Exception:
return raw_text
return raw_text.strip()