Spaces:
Sleeping
Sleeping
Update cluster_insight.py
Browse files- cluster_insight.py +3 -63
cluster_insight.py
CHANGED
|
@@ -23,73 +23,13 @@ import pickle
|
|
| 23 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 24 |
import jieba
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
import matplotlib.font_manager as fm
|
| 28 |
-
|
| 29 |
-
# __file__ 就是当前执行的 app.py 文件本身
|
| 30 |
-
APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 31 |
-
print(f"✅ app.py 所在目录:{APP_DIR}") # 输出类似 /app(Spaces 中)
|
| 32 |
-
|
| 33 |
-
# 1. 明确字体文件的绝对路径(根据你的文件位置填写)
|
| 34 |
-
# 2. 动态拼接字体文件路径(字体和 app.py 同级,直接拼文件名)
|
| 35 |
-
FONT_FILE_NAME = "SourceHanSansCN-Light.otf" # 你的字体文件名(必须和实际一致)
|
| 36 |
-
FONT_PATH = os.path.join(APP_DIR, FONT_FILE_NAME)
|
| 37 |
-
print(f"✅ 字体文件完整路径:{FONT_PATH}") # 输出 /app/SourceHanSansCN-Light.otf(正确路径)
|
| 38 |
-
|
| 39 |
-
# 2. 检查文件是否存在(调试用,确认路径是否正确)
|
| 40 |
-
if not os.path.exists(FONT_PATH):
|
| 41 |
-
print(f"❌ 字体文件不存在!检查路径:{FONT_PATH}")
|
| 42 |
-
else:
|
| 43 |
-
print(f"✅ 找到字体文件:{FONT_PATH}")
|
| 44 |
-
|
| 45 |
-
# 3. 手动注册字体到系统(强制让 Plotly 识别)
|
| 46 |
-
try:
|
| 47 |
-
fm.fontManager.addfont(FONT_PATH) # 关键:手动添加字体到 matplotlib 管理器
|
| 48 |
-
# 获取字体名称(用于 Plotly 配置)
|
| 49 |
-
font_prop = fm.FontProperties(fname=FONT_PATH)
|
| 50 |
-
CHINESE_FONT = font_prop.get_name()
|
| 51 |
-
print(f"✅ 字体注册成功!名称:{CHINESE_FONT}")
|
| 52 |
-
except Exception as e:
|
| 53 |
-
print(f"❌ 字体注册失败:{str(e)}")
|
| 54 |
-
CHINESE_FONT = "Source Han Sans CN Light" # 兜底:直接用字体名(otf 可能识别为这个)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
# 第一步:在文件开头导入 plotly.io
|
| 59 |
import plotly.io as pio
|
| 60 |
-
|
| 61 |
-
pio.kaleido.scope.fonts = {
|
| 62 |
-
"Source Han Sans CN": FONT_PATH # 字体名+绝对路径
|
| 63 |
-
}
|
| 64 |
-
print("✅ 已给 kaleido 显式指定字体路径")
|
| 65 |
-
# 第二步:设置默认引擎为 kaleido(旧版本 Plotly 也支持)
|
| 66 |
-
pio.kaleido.scope.default_format = "png"
|
| 67 |
-
print("✅ 已设置 kaleido 为默认图片引擎")
|
| 68 |
-
|
| 69 |
-
# 在服务器端代码中,新增字体检测逻辑(放在生成 fig 之前)
|
| 70 |
-
COMPATIBLE_FONTS = [
|
| 71 |
-
"WenQuanYi Zen Hei", # Linux 首选
|
| 72 |
-
"Source Han Sans CN Light", # 跨平台备选
|
| 73 |
-
"SimHei" # Windows Server 备选
|
| 74 |
-
]
|
| 75 |
|
| 76 |
-
def get_available_font():
|
| 77 |
-
import matplotlib.font_manager as fm
|
| 78 |
-
for font in COMPATIBLE_FONTS:
|
| 79 |
-
if any(font.lower() in f.lower() for f in fm.findSystemFonts()):
|
| 80 |
-
return font
|
| 81 |
-
return "Arial" # 最后兜底(中文可能方块,但不会报错)
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
# ========== 核心配置(和之前一致) ==========
|
| 85 |
FONT_FILE_PATH = "./SourceHanSansCN-Light.otf" # 字体文件在根目录
|
| 86 |
# CHINESE_FONT = "Source Han Sans CN Light"
|
| 87 |
-
|
| 88 |
-
print(f"✅ 服务器端使用字体:{CHINESE_FONT}")
|
| 89 |
-
# ========== 关键:设置环境变量,让 Plotly/Kaleido 找到字体 ==========
|
| 90 |
-
os.environ["KALEIDO_FONT_SEARCH_PATH"] = os.getcwd() # 字体搜索路径 = 当前目录
|
| 91 |
-
print(f"🔧 字体搜索路径:{os.getcwd()}")
|
| 92 |
-
print(f"🔧 字体文件是否存在:{os.path.exists(FONT_FILE_PATH)}")
|
| 93 |
|
| 94 |
# CHINESE_FONT = "Noto Sans SC" # 思源黑体(跨平台兼容,Plotly 自带)
|
| 95 |
|
|
|
|
| 23 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 24 |
import jieba
|
| 25 |
|
| 26 |
+
# ========== 核心配置(和之前一致) ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
import plotly.io as pio
|
| 28 |
+
pio.kaleido.scope.default_font = "Noto Sans CJK SC"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
FONT_FILE_PATH = "./SourceHanSansCN-Light.otf" # 字体文件在根目录
|
| 31 |
# CHINESE_FONT = "Source Han Sans CN Light"
|
| 32 |
+
CHINESE_FONT = "Noto Sans CJK SC"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# CHINESE_FONT = "Noto Sans SC" # 思源黑体(跨平台兼容,Plotly 自带)
|
| 35 |
|