diff --git a/docs/advanced_usage/custom_translations.md b/docs/advanced_usage/custom_translations.md
new file mode 100644
index 000000000..99291a436
--- /dev/null
+++ b/docs/advanced_usage/custom_translations.md
@@ -0,0 +1,75 @@
+# 自定义翻译
+
+ydata-profiling 支持用户自定义翻译文件,让您可以添加新的语言支持或修改现有翻译。
+
+## 快速开始
+### 1. 安装 ydata-profiling
+```bash
+pip install ydata-profiling
+```
+
+### 2. 导出翻译模板
+#### 方法A:使用命令行工具生成模板
+```python
+ydata-profiling-translate create-template -l en -o ./
+这会生成 en_template.json 文件
+```
+#### 方法B:使用Python代码生成模板
+```python
+from ydata_profiling.i18n import export_translation_template
+
+# 导出英文模板作为翻译基础
+export_translation_template('en', 'translation_template.json')
+```
+
+### 3.编辑翻译文件
+#### 将模板文件重命名并翻译,例如创建 french.json:
+```json
+{
+ "report": {
+ "title": "Rapport d'Analyse YData",
+ "overview": "Aperçu"
+ }
+}
+```
+
+### 4. 在您的项目代码中使用
+```python
+# 您的项目文件,例如 data_analysis.py
+import pandas as pd
+from ydata_profiling import ProfileReport
+from ydata_profiling.i18n import load_translation_file, set_locale
+
+# 加载您创建的翻译文件
+load_translation_file('./french.json', 'fr')
+
+# 设置语言并生成报告
+set_locale('fr')
+df = pd.read_csv('your_data.csv')
+profile = ProfileReport(df, title="Mon Analyse de Données")
+profile.to_file("rapport_francais.html")
+```
+
+### 项目结构示例
+#### 您的项目结构可能如下:
+```bash
+my_data_project/
+├── data/
+│ └── dataset.csv
+├── translations/ # 您的翻译文件目录
+│ ├── zh.json
+│ ├── french.json
+│ └── german.json
+├── analysis.py # 您的分析脚本
+└── requirements.txt
+```
+#### 在 analysis.py 中:
+```python
+from ydata_profiling.i18n import add_translation_directory
+
+# 加载整个翻译目录
+add_translation_directory('./translations/')
+
+# 现在可以使用任何语言
+set_locale('zh') # 使用法语
+```
\ No newline at end of file
diff --git a/docs/features/internationalization.md b/docs/features/internationalization.md
new file mode 100644
index 000000000..3a0e1d64c
--- /dev/null
+++ b/docs/features/internationalization.md
@@ -0,0 +1,40 @@
+# 国际化支持
+
+ydata-profiling 现在支持多语言报告生成。
+
+## 支持的语言
+
+- 英文 (en) - 默认
+- 中文 (zh)
+
+## 使用方法
+
+### 方法1:全局设置语言
+
+```python
+from ydata_profiling import ProfileReport
+from ydata_profiling.i18n import set_locale
+
+# 设置中文
+set_locale('zh')
+profile = ProfileReport(df)
+profile.to_file('中文报告.html')
+```
+
+### 方法2:在初始化时指定语言
+```python
+# 直接指定语言
+profile = ProfileReport(df, locale='zh', title='我的数据报告')
+profile.to_file('报告.html')
+```
+### 方法3:通过配置文件
+```python
+# config.yaml
+i18n:
+ locale: "zh"
+ auto_detect: false
+ fallback_locale: "en"
+```
+```python
+profile = ProfileReport(df, config_file='config.yaml')
+```
diff --git a/examples/i18n/advanced_usage_example.py b/examples/i18n/advanced_usage_example.py
new file mode 100644
index 000000000..3925940be
--- /dev/null
+++ b/examples/i18n/advanced_usage_example.py
@@ -0,0 +1,1025 @@
+"""
+Complete workflow example for custom translations in ydata-profiling
+演示如何使用 ydata-profiling 的自定义翻译功能的完整工作流程
+"""
+import pandas as pd
+import json
+import tempfile
+import shutil
+from pathlib import Path
+from ydata_profiling import ProfileReport
+from ydata_profiling.i18n import (
+ export_translation_template,
+ load_translation_file,
+ add_translation_directory,
+ set_locale,
+ get_available_locales,
+ get_locale
+)
+from ydata_profiling.i18n import _
+
+
+def create_sample_data():
+ """创建示例数据"""
+ print("📊 Creating sample dataset...")
+
+ data = {
+ 'product_name': ['iPhone 14', 'Samsung Galaxy', 'Google Pixel', 'iPhone 14', 'OnePlus 10'],
+ 'price': [999, 899, 799, 999, 649],
+ 'category': ['Electronics', 'Electronics', 'Electronics', 'Electronics', 'Electronics'],
+ 'rating': [4.5, 4.2, 4.0, 4.5, 3.8],
+ 'in_stock': [True, True, False, True, True],
+ 'release_date': ['2022-09-16', '2022-02-25', '2022-10-13', '2022-09-16', '2022-01-11']
+ }
+
+ df = pd.DataFrame(data)
+ print(f"✅ Sample dataset created with {len(df)} rows and {len(df.columns)} columns")
+ return df
+
+
+def step1_export_template():
+ """步骤1: 导出翻译模板"""
+ print("\n🔧 Step 1: Exporting translation template...")
+
+ # 导出英文模板作为翻译基础
+ template_file = "en_translation_template.json"
+ export_translation_template('en', template_file)
+
+ print(f"✅ Translation template exported to: {template_file}")
+
+ # 显示模板内容预览
+ with open(template_file, 'r', encoding='utf-8') as f:
+ template_data = json.load(f)
+
+ print("📋 Template preview (first few keys):")
+ print(json.dumps({k: v for k, v in list(template_data.items())[:2]}, indent=2))
+
+ return template_file
+
+
+def step2_create_custom_translations(template_file):
+ """步骤2: 基于模板创建自定义翻译"""
+ print(f"\n🌍 Step 2: Creating custom translations based on {template_file}...")
+
+ # 读取模板
+ with open(template_file, 'r', encoding='utf-8') as f:
+ template = json.load(f)
+
+ # 创建法语翻译
+ french_translation = {
+ "report": {
+ "title": "Rapport de profilage YData",
+ "overview": "Aperçu",
+ "variables": "Variables",
+ "interactions": "Interactions",
+ "correlations": "Corrélations",
+ "missing_values": "Valeurs manquantes",
+ "sample": "Échantillon",
+ "duplicates": "Lignes dupliquées",
+ "footer_text": "Rapport généré par YData.",
+ "most_frequently_occurring": "Les plus fréquemment observés",
+ "columns": "Colonnes",
+ "more_details": "Plus de détails"
+ },
+ "overview": {
+ "dataset_info": "Informations sur l'ensemble de données",
+ "variable_types": "Types de variables",
+ "dataset_statistics": "Statistiques de l'ensemble de données",
+ "number_of_variables": "Nombre de variables",
+ "number_of_observations": "Nombre d'observations",
+ "missing_cells": "Cellules manquantes",
+ "missing_cells_percentage": "Cellules manquantes (%)",
+ "duplicate_rows": "Lignes dupliquées",
+ "duplicate_rows_percentage": "Lignes dupliquées (%)",
+ "average_record_size": "Taille moyenne d'un enregistrement en mémoire"
+ },
+ "variables": {
+ "numeric": "Numérique",
+ "categorical": "Catégorique",
+ "boolean": "Booléen",
+ "date": "Date",
+ "text": "Texte",
+ "url": "URL",
+ "path": "Chemin",
+ "image": "Image",
+ "distinct": "Distinct",
+ "distinct_percentage": "Distinct (%)",
+ "missing": "Manquant",
+ "missing_percentage": "Manquant (%)",
+ "statistics": "Statistiques",
+ "quantile_statistics": "Statistiques de quantiles",
+ "common_values": "Valeurs courantes",
+ "histogram": "Histogramme",
+ "mode": "Mode",
+ "standard_deviation": "Écart-type",
+ "sum": "Somme",
+ "mad": "Déviation absolue médiane (DAM)",
+ "coefficient_of_variation": "Coefficient de variation (CV)",
+ "kurtosis": "Kurtosis",
+ "skewness": "Asymétrie",
+ "range": "Étendue",
+ "interquartile_range": "Écart interquartile (IQR)",
+ "length": "Longueur",
+ "sample": "Échantillon"
+ },
+ "correlations": {
+ "pearson": "Corrélation de Pearson (r)",
+ "spearman": "Corrélation de Spearman (ρ)",
+ "kendall": "Corrélation de Kendall (τ)",
+ "phi_k": "Phik (φk)",
+ "cramers": "V de Cramér (φc)",
+ "auto": "Automatique"
+ },
+ "interactions": {
+ "scatter_plot": "Nuage de points",
+ "variable": "Variable"
+ },
+ "missing_values": {
+ "matrix": "Matrice",
+ "bar_chart": "Graphique à barres",
+ "heatmap": "Carte thermique",
+ "dendrogram": "Dendrogramme"
+ },
+ "alerts": {
+ "high_correlation": "Corrélation élevée",
+ "high_cardinality": "Cardinalité élevée",
+ "constant": "Constante",
+ "zeros": "Zéros",
+ "missing": "Manquant",
+ "skewed": "Asymétrique",
+ "type_date": "Date",
+ "uniform": "Uniforme",
+ "unique": "Unique",
+ "constant_length": "Longueur constante",
+ "duplicates": "Duplicatas",
+ "empty": "Vide",
+ "imbalance": "Déséquilibre",
+ "near_duplicates": "Quasi-duplicatas",
+ "non_stationary": "Non stationnaire",
+ "seasonal": "Saisonnier",
+ "truncated": "Tronqué",
+ "unsupported": "Non supporté",
+ "dirty_category": "Catégorie non propre"
+ },
+ "formatting": {
+ "bytes": "{value} octets",
+ "kb": "{value} Ko",
+ "mb": "{value} Mo",
+ "gb": "{value} Go",
+ "percentage": "{value} %"
+ },
+ "rendering": {
+ "generate_structure": "Générer la structure du rapport",
+ "html_progress": "Rendu HTML",
+ "json_progress": "Rendu JSON",
+ "widgets_progress": "Rendu des widgets",
+ "other_values_count": "Autres valeurs ({other_count})",
+ "missing": "(Manquant)"
+ },
+ "core": {
+ "unknown": "inconnu",
+ "alerts": "Alertes",
+ "collapse": "Réduire",
+ "container": "Conteneur",
+ "correlationTable": "Tableau de corrélation",
+ "dropdown": "Menu déroulant",
+ "duplicate": "Duplicata",
+ "frequencyTable": "Tableau de fréquence",
+ "frequencyTableSmall": "Tableau de fréquence réduit",
+ "html": "HTML",
+ "image": "Image",
+ "sample": "Échantillon",
+ "scores": "Scores",
+ "table": "Tableau",
+ "toggle_button": "Bouton de bascule",
+ "variable": "Variable",
+ "variable_info": "Informations sur la variable",
+ "model": {
+ "bar_count": "Compte",
+ "bar_caption": "Une visualisation simple des valeurs nulles par colonne.",
+ "matrix": "Matrice",
+ "matrix_caption": "La matrice de nullité est une représentation dense des données qui permet de repérer rapidement visuellement les modèles de complétude des données.",
+ "heatmap": "Carte thermique",
+ "heatmap_caption": "La carte thermique de corrélation mesure la corrélation de nullité : à quel point la présence ou l'absence d'une variable affecte la présence d'une autre.",
+ "first_rows": "Premières lignes",
+ "last_rows": "Dernières lignes",
+ "random_sample": "Échantillon aléatoire"
+ },
+ "structure": {
+ "correlations": "Corrélations",
+ "heatmap": "Carte thermique",
+ "table": "Tableau",
+ "overview": {
+ "values": "valeurs",
+ "number_variables": "Nombre de variables",
+ "number_observations": "Nombre d'observations",
+ "missing_cells": "Cellules manquantes",
+ "missing_cells_percentage": "Cellules manquantes (%)",
+ "duplicate_rows": "Lignes dupliquées",
+ "duplicate_rows_percentage": "Lignes dupliquées (%)",
+ "total_size_memory": "Taille totale en mémoire",
+ "average_record_memory": "Taille moyenne d'un enregistrement en mémoire",
+ "dataset_statistics": "Statistiques de l'ensemble de données",
+ "variable_types": "Types de variables",
+ "overview": "Aperçu",
+ "url": "URL",
+ "copyright": "Droits d'auteur",
+ "dataset": "Ensemble de données",
+ "analysis_started": "Analyse commencée",
+ "analysis_finished": "Analyse terminée",
+ "duration": "Durée",
+ "software_version": "Version du logiciel",
+ "download_configuration": "Télécharger la configuration",
+ "reproduction": "Reproduction",
+ "variable_descriptions": "Descriptions des variables",
+ "variables": "Variables",
+ "alerts_count": "Alertes ({count})",
+ "number_of_series": "Nombre de séries",
+ "timeseries_length": "Longueur de la série temporelle",
+ "starting_point": "Point de départ",
+ "ending_point": "Point de fin",
+ "period": "Période",
+ "timeseries_statistics": "Statistiques des séries temporelles",
+ "original": "Original",
+ "scaled": "Échelonné",
+ "time_series": "Séries temporelles",
+ "interactions": "Interactions",
+ "distinct": "Distinct",
+ "distinct_percentage": "Distinct (%)",
+ "missing": "Manquant",
+ "missing_percentage": "Manquant (%)",
+ "memory_size": "Taille en mémoire",
+ "file": "Fichier",
+ "size": "Taille",
+ "file_size": "Taille du fichier",
+ "file_size_caption": "Histogramme avec des intervalles fixes de tailles de fichiers (en octets)",
+ "unique": "Unique",
+ "unique_help": "Le nombre de valeurs uniques (toutes les valeurs qui n'apparaissent qu'une seule fois dans l'ensemble de données).",
+ "unique_percentage": "Unique (%)",
+ "max_length": "Longueur maximale",
+ "median_length": "Longueur médiane",
+ "mean_length": "Longueur moyenne",
+ "min_length": "Longueur minimale",
+ "length": "Longueur",
+ "length_histogram": "Histogramme de longueur",
+ "histogram_lengths_category": "Histogramme des longueurs de la catégorie",
+ "most_occurring_categories": "Catégories les plus fréquentes",
+ "most_frequent_character_per_category": "Caractère le plus fréquent par catégorie",
+ "most_occurring_scripts": "Scripts les plus fréquents",
+ "most_frequent_character_per_script": "Caractère le plus fréquent par script",
+ "most_occurring_blocks": "Blocs les plus fréquents",
+ "most_frequent_character_per_block": "Caractère le plus fréquent par bloc",
+ "total_characters": "Nombre total de caractères",
+ "distinct_characters": "Caractères distincts",
+ "distinct_categories": "Catégories distinctes",
+ "unicode_categories": "Catégories Unicode (cliquez pour plus d'informations)",
+ "distinct_scripts": "Scripts distincts",
+ "unicode_scripts": "Scripts Unicode (cliquez pour plus d'informations)",
+ "distinct_blocks": "Blocs distincts",
+ "unicode_blocks": "Blocs Unicode (cliquez pour plus d'informations)",
+ "characters_unicode": "Caractères et Unicode",
+ "characters_unicode_caption": "La norme Unicode attribue des propriétés à chaque point de code, qui peuvent être utilisées pour analyser des variables textuelles.",
+ "most_occurring_characters": "Caractères les plus fréquents",
+ "characters": "Caractères",
+ "categories": "Catégories",
+ "scripts": "Scripts",
+ "blocks": "Blocs",
+ "unicode": "Unicode",
+ "common_values": "Valeurs courantes",
+ "common_values_table": "Valeurs courantes (Tableau)",
+ "1st_row": "1ère ligne",
+ "2nd_row": "2ème ligne",
+ "3rd_row": "3ème ligne",
+ "4th_row": "4ème ligne",
+ "5th_row": "5ème ligne",
+ "categories_passes_threshold": "Nombre de catégories de variables dépassant le seuil (config.plot.cat_freq.max_unique)",
+ "common_values_plot": "Valeurs courantes (Graphique)",
+ "common_words": "Mots courants",
+ "wordcloud": "Nuage de mots",
+ "words": "Mots",
+ "mean": "Moyenne",
+ "min": "Minimum",
+ "max": "Maximum",
+ "zeros": "Zéros",
+ "zeros_percentage": "Zéros (%)",
+ "scatter": "Nuage",
+ "scatterplot": "Nuage de points",
+ "scatterplot_caption": "Nuage de points dans le plan complexe",
+ "mini_histogram": "Mini-histogramme",
+ "histogram": "Histogramme",
+ "histogram_caption": "Histogramme avec des intervalles fixes",
+ "extreme_values": "Valeurs extrêmes",
+ "histogram_s": "Histogramme(s)",
+ "invalid_dates": "Dates invalides",
+ "invalid_dates_percentage": "Dates invalides (%)",
+ "created": "Créé",
+ "accessed": "Accédé",
+ "modified": "Modifié",
+ "min_width": "Largeur minimale",
+ "median_width": "Largeur médiane",
+ "max_width": "Largeur maximale",
+ "min_height": "Hauteur minimale",
+ "median_height": "Hauteur médiane",
+ "max_height": "Hauteur maximale",
+ "min_area": "Surface minimale",
+ "median_area": "Surface médiane",
+ "max_area": "Surface maximale",
+ "scatter_plot_image_sizes": "Nuage de points des tailles d'image",
+ "scatter_plot": "Nuage de points",
+ "dimensions": "Dimensions",
+ "exif_keys": "Clés EXIF",
+ "exif_data": "Données EXIF",
+ "image": "Image",
+ "common_prefix": "Préfixe commun",
+ "unique_stems": "Racines uniques",
+ "unique_names": "Noms uniques",
+ "unique_extensions": "Extensions uniques",
+ "unique_directories": "Répertoires uniques",
+ "unique_anchors": "Ancres uniques",
+ "full": "Complet",
+ "stem": "Racine",
+ "name": "Nom",
+ "extension": "Extension",
+ "parent": "Parent",
+ "anchor": "Ancre",
+ "path": "Chemin",
+ "infinite": "Infini",
+ "infinite_percentage": "Infini (%)",
+ "Negative": "Négatif",
+ "Negative_percentage": "Négatif (%)",
+ "5_th_percentile": "5e centile",
+ "q1": "Q1",
+ "median": "Médiane",
+ "q3": "Q3",
+ "95_th_percentile": "95e centile",
+ "range": "Étendue",
+ "iqr": "Écart interquartile (IQR)",
+ "quantile_statistics": "Statistiques de quantiles",
+ "standard_deviation": "Écart-type",
+ "cv": "Coefficient de variation (CV)",
+ "kurtosis": "Kurtosis",
+ "mad": "Déviation absolue médiane (DAM)",
+ "skewness": "Asymétrie",
+ "sum": "Somme",
+ "variance": "Variance",
+ "monotonicity": "Monotonie",
+ "descriptive_statistics": "Statistiques descriptives",
+ "statistics": "Statistiques",
+ "augmented_dickey_fuller_test_value": "Valeur p du test de Dickey-Fuller augmenté",
+ "autocorrelation": "Autocorrélation",
+ "autocorrelation_caption": "ACF et PACF",
+ "timeseries": "Série temporelle",
+ "timeseries_plot": "Graphique de série temporelle",
+ "scheme": "Schéma",
+ "netloc": "Emplacement réseau",
+ "query": "Requête",
+ "fragment": "Fragment",
+ "heatmap": "Carte thermique"
+ }
+ }
+ },
+ "html": {
+ "alerts": {
+ "title": "Alertes",
+ "not_present": "Aucune alerte présente dans cet ensemble de données",
+ "has_constant_value": "a une valeur constante",
+ "has_constant_length": "a une longueur constante",
+ "has_dirty_categories": "a des catégories non propres",
+ "has_high_cardinality": "a une cardinalité élevée",
+ "distinct_values": "valeurs distinctes",
+ "dataset_has": "L'ensemble de données a",
+ "duplicate_rows": "lignes dupliquées",
+ "dataset_is_empty": "L'ensemble de données est vide",
+ "is_highly": "est fortement",
+ "correlated_with": "corrélé avec",
+ "and": "et",
+ "other_fields": "autres champs",
+ "highly_imbalanced": "est fortement déséquilibré",
+ "has": "a",
+ "infinite_values": "valeurs infinies",
+ "missing_values": "valeurs manquantes",
+ "near_duplicate_rows": "lignes quasi-dupliquées",
+ "non_stationary": "est non stationnaire",
+ "seasonal": "est saisonnier",
+ "highly_skewed": "est fortement asymétrique",
+ "truncated_files": "fichiers tronqués",
+ "alert_type_date": "contient uniquement des valeurs datetime, mais est catégorique. Envisagez d'appliquer",
+ "uniformly_distributed": "est uniformément distribué",
+ "unique_values": "a des valeurs uniques",
+ "alert_unsupported": "est un type non supporté, vérifiez s'il nécessite un nettoyage ou une analyse supplémentaire",
+ "zeros": "zéros"
+ },
+ "sequence": {
+ "overview_tabs": {
+ "brought_to_you_by": "Présenté par YData"
+ }
+ },
+ "dropdown": "Sélectionner les colonnes",
+ "frequency_table": {
+ "value": "Valeur",
+ "count": "Compte",
+ "frequency_percentage": "Fréquence (%)",
+ "redacted_value": "Valeur masquée",
+ "no_values_found": "Aucune valeur trouvée"
+ },
+ "scores": {
+ "overall_data_quality": "Score global de la qualité des données"
+ },
+ "variable_info": {
+ "no_alerts": "Aucune alerte"
+ }
+ }
+ }
+
+ # 创建西班牙语翻译
+ spanish_translation = {
+ "report": {
+ "title": "Informe de Perfilado de YData",
+ "overview": "Resumen",
+ "variables": "Variables",
+ "interactions": "Interacciones",
+ "correlations": "Correlaciones",
+ "missing_values": "Valores faltantes",
+ "sample": "Muestra",
+ "duplicates": "Filas duplicadas",
+ "footer_text": "Informe generado por YData.",
+ "most_frequently_occurring": "Los más frecuentes",
+ "columns": "Columnas",
+ "more_details": "Más detalles"
+ },
+ "overview": {
+ "dataset_info": "Información del conjunto de datos",
+ "variable_types": "Tipos de variables",
+ "dataset_statistics": "Estadísticas del conjunto de datos",
+ "number_of_variables": "Número de variables",
+ "number_of_observations": "Número de observaciones",
+ "missing_cells": "Celdas faltantes",
+ "missing_cells_percentage": "Celdas faltantes (%)",
+ "duplicate_rows": "Filas duplicadas",
+ "duplicate_rows_percentage": "Filas duplicadas (%)",
+ "average_record_size": "Tamaño promedio de registro en memoria"
+ },
+ "variables": {
+ "numeric": "Numérico",
+ "categorical": "Categórico",
+ "boolean": "Booleano",
+ "date": "Fecha",
+ "text": "Texto",
+ "url": "URL",
+ "path": "Ruta",
+ "image": "Imagen",
+ "distinct": "Distinto",
+ "distinct_percentage": "Distinto (%)",
+ "missing": "Faltante",
+ "missing_percentage": "Faltante (%)",
+ "statistics": "Estadísticas",
+ "quantile_statistics": "Estadísticas de cuantiles",
+ "common_values": "Valores comunes",
+ "histogram": "Histograma",
+ "mode": "Moda",
+ "standard_deviation": "Desviación estándar",
+ "sum": "Suma",
+ "mad": "Desviación absoluta mediana (DAM)",
+ "coefficient_of_variation": "Coeficiente de variación (CV)",
+ "kurtosis": "Curtosis",
+ "skewness": "Asimetría",
+ "range": "Rango",
+ "interquartile_range": "Rango intercuartílico (IQR)",
+ "length": "Longitud",
+ "sample": "Muestra"
+ },
+ "correlations": {
+ "pearson": "Correlación de Pearson (r)",
+ "spearman": "Correlación de Spearman (ρ)",
+ "kendall": "Correlación de Kendall (τ)",
+ "phi_k": "Phik (φk)",
+ "cramers": "V de Cramér (φc)",
+ "auto": "Automático"
+ },
+ "interactions": {
+ "scatter_plot": "Gráfico de dispersión",
+ "variable": "Variable"
+ },
+ "missing_values": {
+ "matrix": "Matriz",
+ "bar_chart": "Gráfico de barras",
+ "heatmap": "Mapa de calor",
+ "dendrogram": "Dendrograma"
+ },
+ "alerts": {
+ "high_correlation": "Correlación alta",
+ "high_cardinality": "Alta cardinalidad",
+ "constant": "Constante",
+ "zeros": "Ceros",
+ "missing": "Faltante",
+ "skewed": "Asimétrico",
+ "type_date": "Fecha",
+ "uniform": "Uniforme",
+ "unique": "Único",
+ "constant_length": "Longitud constante",
+ "duplicates": "Duplicados",
+ "empty": "Vacío",
+ "imbalance": "Desequilibrio",
+ "near_duplicates": "Casi duplicados",
+ "non_stationary": "No estacionario",
+ "seasonal": "Estacional",
+ "truncated": "Truncado",
+ "unsupported": "No soportado",
+ "dirty_category": "Categoría sucia"
+ },
+ "formatting": {
+ "bytes": "{value} bytes",
+ "kb": "{value} KB",
+ "mb": "{value} MB",
+ "gb": "{value} GB",
+ "percentage": "{value}%"
+ },
+ "rendering": {
+ "generate_structure": "Generar estructura del informe",
+ "html_progress": "Renderizar HTML",
+ "json_progress": "Renderizar JSON",
+ "widgets_progress": "Renderizar widgets",
+ "other_values_count": "Otros valores ({other_count})",
+ "missing": "(Faltante)"
+ },
+ "core": {
+ "unknown": "desconocido",
+ "alerts": "Alertas",
+ "collapse": "Colapsar",
+ "container": "Contenedor",
+ "correlationTable": "Tabla de correlación",
+ "dropdown": "Menú desplegable",
+ "duplicate": "Duplicado",
+ "frequencyTable": "Tabla de frecuencia",
+ "frequencyTableSmall": "Tabla de frecuencia pequeña",
+ "html": "HTML",
+ "image": "Imagen",
+ "sample": "Muestra",
+ "scores": "Puntuaciones",
+ "table": "Tabla",
+ "toggle_button": "Botón de alternancia",
+ "variable": "Variable",
+ "variable_info": "Información de la variable",
+ "model": {
+ "bar_count": "Conteo",
+ "bar_caption": "Una visualización simple de la nulidad por columna.",
+ "matrix": "Matriz",
+ "matrix_caption": "La matriz de nulidad es una representación densa de datos que permite identificar rápidamente patrones visuales en la completitud de los datos.",
+ "heatmap": "Mapa de calor",
+ "heatmap_caption": "El mapa de calor de correlación mide la correlación de nulidad: cómo la presencia o ausencia de una variable afecta la presencia de otra.",
+ "first_rows": "Primeras filas",
+ "last_rows": "Últimas filas",
+ "random_sample": "Muestra aleatoria"
+ },
+ "structure": {
+ "correlations": "Correlaciones",
+ "heatmap": "Mapa de calor",
+ "table": "Tabla",
+ "overview": {
+ "values": "valores",
+ "number_variables": "Número de variables",
+ "number_observations": "Número de observaciones",
+ "missing_cells": "Celdas faltantes",
+ "missing_cells_percentage": "Celdas faltantes (%)",
+ "duplicate_rows": "Filas duplicadas",
+ "duplicate_rows_percentage": "Filas duplicadas (%)",
+ "total_size_memory": "Tamaño total en memoria",
+ "average_record_memory": "Tamaño promedio de registro en memoria",
+ "dataset_statistics": "Estadísticas del conjunto de datos",
+ "variable_types": "Tipos de variables",
+ "overview": "Resumen",
+ "url": "URL",
+ "copyright": "Derechos de autor",
+ "dataset": "Conjunto de datos",
+ "analysis_started": "Análisis iniciado",
+ "analysis_finished": "Análisis finalizado",
+ "duration": "Duración",
+ "software_version": "Versión del software",
+ "download_configuration": "Descargar configuración",
+ "reproduction": "Reproducción",
+ "variable_descriptions": "Descripciones de variables",
+ "variables": "Variables",
+ "alerts_count": "Alertas ({count})",
+ "number_of_series": "Número de series",
+ "timeseries_length": "Longitud de la serie temporal",
+ "starting_point": "Punto de inicio",
+ "ending_point": "Punto final",
+ "period": "Período",
+ "timeseries_statistics": "Estadísticas de series temporales",
+ "original": "Original",
+ "scaled": "Escalado",
+ "time_series": "Series temporales",
+ "interactions": "Interacciones",
+ "distinct": "Distinto",
+ "distinct_percentage": "Distinto (%)",
+ "missing": "Faltante",
+ "missing_percentage": "Faltante (%)",
+ "memory_size": "Tamaño en memoria",
+ "file": "Archivo",
+ "size": "Tamaño",
+ "file_size": "Tamaño del archivo",
+ "file_size_caption": "Histograma con intervalos fijos de tamaños de archivo (en bytes)",
+ "unique": "Único",
+ "unique_help": "El número de valores únicos (todos los valores que aparecen exactamente una vez en el conjunto de datos).",
+ "unique_percentage": "Único (%)",
+ "max_length": "Longitud máxima",
+ "median_length": "Longitud mediana",
+ "mean_length": "Longitud media",
+ "min_length": "Longitud mínima",
+ "length": "Longitud",
+ "length_histogram": "Histograma de longitud",
+ "histogram_lengths_category": "Histograma de longitudes de la categoría",
+ "most_occurring_categories": "Categorías más frecuentes",
+ "most_frequent_character_per_category": "Carácter más frecuente por categoría",
+ "most_occurring_scripts": "Scripts más frecuentes",
+ "most_frequent_character_per_script": "Carácter más frecuente por script",
+ "most_occurring_blocks": "Bloques más frecuentes",
+ "most_frequent_character_per_block": "Carácter más frecuente por bloque",
+ "total_characters": "Total de caracteres",
+ "distinct_characters": "Caracteres distintos",
+ "distinct_categories": "Categorías distintas",
+ "unicode_categories": "Categorías Unicode (haga clic para más información)",
+ "distinct_scripts": "Scripts distintos",
+ "unicode_scripts": "Scripts Unicode (haga clic para más información)",
+ "distinct_blocks": "Bloques distintos",
+ "unicode_blocks": "Bloques Unicode (haga clic para más información)",
+ "characters_unicode": "Caracteres y Unicode",
+ "characters_unicode_caption": "El estándar Unicode asigna propiedades a cada punto de código, que pueden usarse para analizar variables textuales.",
+ "most_occurring_characters": "Caracteres más frecuentes",
+ "characters": "Caracteres",
+ "categories": "Categorías",
+ "scripts": "Scripts",
+ "blocks": "Bloques",
+ "unicode": "Unicode",
+ "common_values": "Valores comunes",
+ "common_values_table": "Valores comunes (Tabla)",
+ "1st_row": "1ª fila",
+ "2nd_row": "2ª fila",
+ "3rd_row": "3ª fila",
+ "4th_row": "4ª fila",
+ "5th_row": "5ª fila",
+ "categories_passes_threshold": "Número de categorías de variables que superan el umbral (config.plot.cat_freq.max_unique)",
+ "common_values_plot": "Valores comunes (Gráfico)",
+ "common_words": "Palabras comunes",
+ "wordcloud": "Nube de palabras",
+ "words": "Palabras",
+ "mean": "Media",
+ "min": "Mínimo",
+ "max": "Máximo",
+ "zeros": "Ceros",
+ "zeros_percentage": "Ceros (%)",
+ "scatter": "Dispersión",
+ "scatterplot": "Gráfico de dispersión",
+ "scatterplot_caption": "Gráfico de dispersión en el plano complejo",
+ "mini_histogram": "Mini-histograma",
+ "histogram": "Histograma",
+ "histogram_caption": "Histograma con intervalos fijos",
+ "extreme_values": "Valores extremos",
+ "histogram_s": "Histograma(s)",
+ "invalid_dates": "Fechas inválidas",
+ "invalid_dates_percentage": "Fechas inválidas (%)",
+ "created": "Creado",
+ "accessed": "Accedido",
+ "modified": "Modificado",
+ "min_width": "Ancho mínimo",
+ "median_width": "Ancho mediano",
+ "max_width": "Ancho máximo",
+ "min_height": "Altura mínima",
+ "median_height": "Altura mediana",
+ "max_height": "Altura máxima",
+ "min_area": "Área mínima",
+ "median_area": "Área mediana",
+ "max_area": "Área máxima",
+ "scatter_plot_image_sizes": "Gráfico de dispersión de tamaños de imagen",
+ "scatter_plot": "Gráfico de dispersión",
+ "dimensions": "Dimensiones",
+ "exif_keys": "Claves EXIF",
+ "exif_data": "Datos EXIF",
+ "image": "Imagen",
+ "common_prefix": "Prefijo común",
+ "unique_stems": "Raíces únicas",
+ "unique_names": "Nombres únicos",
+ "unique_extensions": "Extensiones únicas",
+ "unique_directories": "Directorios únicos",
+ "unique_anchors": "Anclas únicas",
+ "full": "Completo",
+ "stem": "Raíz",
+ "name": "Nombre",
+ "extension": "Extensión",
+ "parent": "Padre",
+ "anchor": "Ancla",
+ "path": "Ruta",
+ "infinite": "Infinito",
+ "infinite_percentage": "Infinito (%)",
+ "Negative": "Negativo",
+ "Negative_percentage": "Negativo (%)",
+ "5_th_percentile": "Percentil 5",
+ "q1": "Q1",
+ "median": "Mediana",
+ "q3": "Q3",
+ "95_th_percentile": "Percentil 95",
+ "range": "Rango",
+ "iqr": "Rango intercuartílico (IQR)",
+ "quantile_statistics": "Estadísticas de cuantiles",
+ "standard_deviation": "Desviación estándar",
+ "cv": "Coeficiente de variación (CV)",
+ "kurtosis": "Curtosis",
+ "mad": "Desviación absoluta mediana (DAM)",
+ "skewness": "Asimetría",
+ "sum": "Suma",
+ "variance": "Varianza",
+ "monotonicity": "Monotonía",
+ "descriptive_statistics": "Estadísticas descriptivas",
+ "statistics": "Estadísticas",
+ "augmented_dickey_fuller_test_value": "Valor p del test de Dickey-Fuller aumentado",
+ "autocorrelation": "Autocorrelación",
+ "autocorrelation_caption": "ACF y PACF",
+ "timeseries": "Serie temporal",
+ "timeseries_plot": "Gráfico de serie temporal",
+ "scheme": "Esquema",
+ "netloc": "Ubicación de red",
+ "query": "Consulta",
+ "fragment": "Fragmento",
+ "heatmap": "Mapa de calor"
+ }
+ }
+ },
+ "html": {
+ "alerts": {
+ "title": "Alertas",
+ "not_present": "No hay alertas presentes en este conjunto de datos",
+ "has_constant_value": "tiene un valor constante",
+ "has_constant_length": "tiene una longitud constante",
+ "has_dirty_categories": "tiene categorías sucias",
+ "has_high_cardinality": "tiene una alta cardinalidad",
+ "distinct_values": "valores distintos",
+ "dataset_has": "El conjunto de datos tiene",
+ "duplicate_rows": "filas duplicadas",
+ "dataset_is_empty": "El conjunto de datos está vacío",
+ "is_highly": "está altamente",
+ "correlated_with": "correlacionado con",
+ "and": "y",
+ "other_fields": "otros campos",
+ "highly_imbalanced": "está altamente desequilibrado",
+ "has": "tiene",
+ "infinite_values": "valores infinitos",
+ "missing_values": "valores faltantes",
+ "near_duplicate_rows": "filas casi duplicadas",
+ "non_stationary": "es no estacionario",
+ "seasonal": "es estacional",
+ "highly_skewed": "es altamente asimétrico",
+ "truncated_files": "archivos truncados",
+ "alert_type_date": "solo contiene valores de fecha y hora, pero es categórico. Considere aplicar",
+ "uniformly_distributed": "está uniformemente distribuido",
+ "unique_values": "tiene valores únicos",
+ "alert_unsupported": "es un tipo no soportado, verifique si necesita limpieza o análisis adicional",
+ "zeros": "ceros"
+ },
+ "sequence": {
+ "overview_tabs": {
+ "brought_to_you_by": "Presentado por YData"
+ }
+ },
+ "dropdown": "Seleccionar columnas",
+ "frequency_table": {
+ "value": "Valor",
+ "count": "Conteo",
+ "frequency_percentage": "Frecuencia (%)",
+ "redacted_value": "Valor redactado",
+ "no_values_found": "No se encontraron valores"
+ },
+ "scores": {
+ "overall_data_quality": "Puntuación general de calidad de datos"
+ },
+ "variable_info": {
+ "no_alerts": "Sin alertas"
+ }
+ }
+ }
+
+ # 保存翻译文件
+ french_file = "french_translation.json"
+ spanish_file = "spanish_translation.json"
+
+ with open(french_file, 'w', encoding='utf-8') as f:
+ json.dump(french_translation, f, indent=2, ensure_ascii=False)
+
+ with open(spanish_file, 'w', encoding='utf-8') as f:
+ json.dump(spanish_translation, f, indent=2, ensure_ascii=False)
+
+ print(f"✅ French translation saved to: {french_file}")
+ print(f"✅ Spanish translation saved to: {spanish_file}")
+
+ return french_file, spanish_file
+
+
+def step3_single_file_loading(df, french_file):
+ """步骤3: 单个翻译文件加载示例"""
+ print(f"\n📁 Step 3: Loading single translation file - {french_file}")
+
+ # 加载法语翻译
+ load_translation_file(french_file, 'fr')
+
+ print(f"📋 Available locales after loading: {get_available_locales()}")
+
+ # 设置为法语并生成报告
+ set_locale('fr')
+ print(f"🌍 Current locale set to: {get_locale()}")
+
+ profile = ProfileReport(df, title="Rapport d'Analyse des Produits")
+ output_file = "product_analysis_french.html"
+
+ # 强制覆盖生成报告
+ try:
+ profile.to_file(output_file)
+ print(f"✅ French report generated: {output_file}")
+ except Exception as e:
+ print(f"⚠️ Warning generating French report: {e}")
+ # 如果报告生成失败,删除已存在的文件再重试
+ if Path(output_file).exists():
+ Path(output_file).unlink()
+ profile.to_file(output_file)
+ print(f"✅ French report generated (after cleanup): {output_file}")
+
+ return output_file
+
+
+def step4_directory_loading(df, french_file, spanish_file):
+ """步骤4: 翻译目录加载示例"""
+ print(f"\n📂 Step 4: Loading translation directory")
+
+ # 创建翻译目录
+ translations_dir = Path("custom_translations")
+ translations_dir.mkdir(exist_ok=True)
+
+ # 移动翻译文件到目录
+ french_target = translations_dir / "fr.json"
+ spanish_target = translations_dir / "es.json"
+
+ # 复制文件而不是移动,避免文件已存在的错误
+ try:
+ shutil.copy2(french_file, french_target)
+ print(f"📄 Copied {french_file} to {french_target}")
+ except Exception as e:
+ print(f"⚠️ Warning copying French file: {e}")
+ # 如果复制失败,直接覆盖
+ shutil.copyfile(french_file, french_target)
+
+ try:
+ shutil.copy2(spanish_file, spanish_target)
+ print(f"📄 Copied {spanish_file} to {spanish_target}")
+ except Exception as e:
+ print(f"⚠️ Warning copying Spanish file: {e}")
+ # 如果复制失败,直接覆盖
+ shutil.copyfile(spanish_file, spanish_target)
+
+ print(f"📁 Created translation directory: {translations_dir}")
+ print(f"📄 Files in directory: {list(translations_dir.glob('*.json'))}")
+
+ # 加载整个翻译目录
+ add_translation_directory(translations_dir)
+
+ print(f"📋 Available locales after directory loading: {get_available_locales()}")
+
+ # 生成西班牙语报告
+ set_locale('es')
+ print(f"🌍 Current locale set to: {get_locale()}")
+
+ profile = ProfileReport(df, title="Informe de Análisis de Productos")
+ output_file = "product_analysis_spanish.html"
+
+ # 强制覆盖生成报告
+ try:
+ profile.to_file(output_file)
+ print(f"✅ Spanish report generated: {output_file}")
+ except Exception as e:
+ print(f"⚠️ Warning generating Spanish report: {e}")
+ # 如果报告生成失败,删除已存在的文件再重试
+ if Path(output_file).exists():
+ Path(output_file).unlink()
+ profile.to_file(output_file)
+ print(f"✅ Spanish report generated (after cleanup): {output_file}")
+
+ return output_file, translations_dir
+
+
+def step5_using_locale_parameter(df):
+ """步骤5: 使用ProfileReport的locale参数"""
+ print(f"\n⚙️ Step 5: Using ProfileReport locale parameter")
+
+ # 直接在ProfileReport中指定语言
+ print("🔄 Generating report with locale='zh' parameter...")
+ profile_zh = ProfileReport(df, title="产品分析报告", locale='zh')
+ output_file = "product_analysis_chinese.html"
+
+ # 强制覆盖生成报告
+ try:
+ profile_zh.to_file(output_file)
+ print(f"✅ Chinese report generated: {output_file}")
+ except Exception as e:
+ print(f"⚠️ Warning generating Chinese report: {e}")
+ # 如果报告生成失败,删除已存在的文件再重试
+ if Path(output_file).exists():
+ Path(output_file).unlink()
+ profile_zh.to_file(output_file)
+ print(f"✅ Chinese report generated (after cleanup): {output_file}")
+
+ print(f"🌍 Current global locale remains: {get_locale()}")
+
+ return output_file
+
+
+def cleanup_files(files_to_clean):
+ """清理生成的文件"""
+ print(f"\n🧹 Cleaning up generated files...")
+
+ for file_path in files_to_clean:
+ try:
+ if isinstance(file_path, str):
+ file_path = Path(file_path)
+
+ if file_path.exists():
+ if file_path.is_file():
+ file_path.unlink()
+ elif file_path.is_dir():
+ shutil.rmtree(file_path)
+ print(f"🗑️ Removed: {file_path}")
+ except Exception as e:
+ print(f"⚠️ Could not remove {file_path}: {e}")
+
+
+def safe_file_operation(operation_func, *args, **kwargs):
+ """安全执行文件操作,包含重试逻辑"""
+ max_retries = 3
+ for attempt in range(max_retries):
+ try:
+ return operation_func(*args, **kwargs)
+ except Exception as e:
+ if attempt < max_retries - 1:
+ print(f"⚠️ Attempt {attempt + 1} failed: {e}. Retrying...")
+ import time
+ time.sleep(0.5) # 短暂等待
+ else:
+ print(f"❌ All attempts failed: {e}")
+ raise
+
+
+def main():
+ """主函数 - 演示完整的翻译工作流程"""
+ print("🚀 YData Profiling Custom Translation Workflow Example")
+ print("=" * 60)
+
+ # 记录要清理的文件
+ files_to_clean = []
+
+ try:
+ # 创建示例数据
+ df = create_sample_data()
+
+ # 步骤1: 导出模板
+ template_file = step1_export_template()
+ files_to_clean.append(template_file)
+
+ # 步骤2: 创建自定义翻译
+ french_file, spanish_file = step2_create_custom_translations(template_file)
+ files_to_clean.extend([french_file, spanish_file])
+
+ # 步骤3: 单文件加载
+ french_report = safe_file_operation(step3_single_file_loading, df, french_file)
+ files_to_clean.append(french_report)
+
+ # 步骤4: 目录加载
+ spanish_report, translations_dir = safe_file_operation(step4_directory_loading, df, french_file, spanish_file)
+ files_to_clean.extend([spanish_report, translations_dir])
+
+ # 步骤5: 使用locale参数
+ chinese_report = safe_file_operation(step5_using_locale_parameter, df)
+ files_to_clean.append(chinese_report)
+
+ print(f"\n🎉 All steps completed successfully!")
+ print(f"📊 Generated reports:")
+ print(f" - {french_report} (French)")
+ print(f" - {spanish_report} (Spanish)")
+ print(f" - {chinese_report} (Chinese)")
+ print(f"\n💡 You can open these HTML files in your browser to see the translated reports.")
+
+ except Exception as e:
+ print(f"❌ Error occurred: {e}")
+ import traceback
+ traceback.print_exc()
+
+ finally:
+ # 询问是否清理文件
+ try:
+ response = input(f"\n🤔 Do you want to clean up generated files? (y/N): ").strip().lower()
+ if response in ['y', 'yes']:
+ cleanup_files(files_to_clean)
+ else:
+ print("📁 Files kept for your review.")
+ print("💡 Tip: You can run this script multiple times to see the overwrites working.")
+ except KeyboardInterrupt:
+ print(f"\n📁 Files kept for your review.")
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/examples/i18n/i18n_example.py b/examples/i18n/i18n_example.py
new file mode 100644
index 000000000..faca0a566
--- /dev/null
+++ b/examples/i18n/i18n_example.py
@@ -0,0 +1,42 @@
+"""
+Example of using ydata-profiling with internationalization
+"""
+import pandas as pd
+from ydata_profiling import ProfileReport
+from ydata_profiling.i18n import set_locale
+from ydata_profiling.utils.locale_utils import auto_set_locale
+
+# Create sample data
+df = pd.DataFrame({
+ 'numeric_column': [1, 2, 3, 4, 5],
+ 'categorical_column': ['A', 'B', 'A', 'C', 'B'],
+ 'text_column': ['Hello', 'World', 'Test', 'Data', 'Science']
+})
+
+# Use the default report generation method
+print("Default report generation report...")
+profile_default = ProfileReport(df, title="Default Data Profiling Report")
+profile_default.to_file("default_report.html")
+
+# Auto-detect and set language
+print("Auto-detect generation report...")
+auto_set_locale()
+profile_zh = ProfileReport(df, title="Auto Detect Data Profiling Report")
+profile_zh.to_file("auto_report_chinese.html")
+
+# Generate a report in English
+print("Generating English report...")
+set_locale('en')
+profile_en = ProfileReport(df, title="English Data Profiling Report")
+profile_en.to_file("report_english.html")
+
+# Generate a report in Chinese
+print("Generating Chinese report...")
+set_locale('zh')
+profile_zh = ProfileReport(df, title="中文数据分析报告")
+profile_zh.to_file("report_chinese.html")
+
+# Specify the language during initialization
+print("Generating report with locale parameter...")
+profile_locale = ProfileReport(df, title="报告标题", locale='zh')
+profile_locale.to_file("report_with_locale.html")
\ No newline at end of file
diff --git a/src/ydata_profiling/config.py b/src/ydata_profiling/config.py
index 09dbecdde..d02b677df 100644
--- a/src/ydata_profiling/config.py
+++ b/src/ydata_profiling/config.py
@@ -5,6 +5,7 @@
import yaml
from pydantic.v1 import BaseModel, BaseSettings, Field, PrivateAttr
+from ydata_profiling.i18n import set_locale, get_locale
def _merge_dictionaries(dict1: dict, dict2: dict) -> dict:
@@ -290,6 +291,14 @@ class Report(BaseModel):
precision: int = 8
+# Add a new configuration item in the Settings class
+class I18n(BaseModel):
+ """Internationalization settings"""
+ locale: str = "en"
+ auto_detect: bool = True
+ fallback_locale: str = "en"
+
+
class Settings(BaseSettings):
# Default prefix to avoid collisions with environment variables
class Config:
@@ -355,6 +364,24 @@ class Config:
html: Html = Html()
notebook: Notebook = Notebook()
+ # Add internationalization configuration
+ i18n: I18n = I18n()
+
+ def __init__(self, **data):
+ # Check the current language setting before initialization
+ current_locale = get_locale()
+
+ super().__init__(**data)
+
+ # If no locale is explicitly specified and there is currently a non-default language setting,
+ # maintain the current setting
+ if 'i18n' not in data and current_locale != 'en':
+ self.i18n.locale = current_locale
+
+ # Set locale
+ if self.i18n.locale:
+ set_locale(self.i18n.locale)
+
def update(self, updates: dict) -> "Settings":
update = _merge_dictionaries(self.dict(), updates)
return self.parse_obj(self.copy(update=update))
@@ -368,10 +395,21 @@ def from_file(config_file: Union[Path, str]) -> "Settings":
Returns:
Settings
"""
+ # Save current language settings
+ current_locale = get_locale()
+
with open(config_file) as f:
data = yaml.safe_load(f)
- return Settings.parse_obj(data)
+ settings = Settings.parse_obj(data)
+
+ # If no language is specified in the configuration file and there is currently a non-default language setting,
+ # maintain the current setting
+ if 'i18n' not in data and current_locale != 'en':
+ settings.i18n.locale = current_locale
+ set_locale(current_locale)
+
+ return settings
class SparkSettings(Settings):
diff --git a/src/ydata_profiling/config_default.yaml b/src/ydata_profiling/config_default.yaml
index a72b8bdc2..cddd5d1f2 100644
--- a/src/ydata_profiling/config_default.yaml
+++ b/src/ydata_profiling/config_default.yaml
@@ -221,3 +221,8 @@ html:
- "#198754"
full_width: false
+
+i18n:
+ locale: "en"
+ auto_detect: true
+ fallback_locale: "en"
\ No newline at end of file
diff --git a/src/ydata_profiling/config_minimal.yaml b/src/ydata_profiling/config_minimal.yaml
index a3cb46211..dd364e7be 100644
--- a/src/ydata_profiling/config_minimal.yaml
+++ b/src/ydata_profiling/config_minimal.yaml
@@ -220,3 +220,8 @@ html:
- "#198754"
full_width: false
+
+i18n:
+ locale: "en"
+ auto_detect: true
+ fallback_locale: "en"
\ No newline at end of file
diff --git a/src/ydata_profiling/i18n/__init__.py b/src/ydata_profiling/i18n/__init__.py
new file mode 100644
index 000000000..eb414bbbd
--- /dev/null
+++ b/src/ydata_profiling/i18n/__init__.py
@@ -0,0 +1,249 @@
+"""
+Internationalization module for ydata-profiling
+"""
+import os
+import json
+from pathlib import Path
+from typing import Dict, Optional, List, Union
+import threading
+
+class TranslationManager:
+ """Manages translations for ydata-profiling with support for external translation files"""
+
+ _instance = None
+ _lock = threading.Lock()
+
+ def __new__(cls):
+ if cls._instance is None:
+ with cls._lock:
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ return cls._instance
+
+ def __init__(self):
+ if not hasattr(self, 'initialized'):
+ self.translations: Dict[str, Dict[str, str]] = {}
+ self.current_locale = 'en'
+ self.fallback_locale = 'en'
+ self.external_translation_dirs: List[Path] = []
+ self.initialized = True
+ self._load_translations()
+
+ def add_translation_directory(self, directory: Union[str, Path]):
+ """Add external translation directory
+
+ Args:
+ directory: Path to directory containing translation JSON files
+ """
+ dir_path = Path(directory)
+ if dir_path.exists() and dir_path.is_dir():
+ if dir_path not in self.external_translation_dirs:
+ self.external_translation_dirs.append(dir_path)
+ self._load_external_translations(dir_path)
+ else:
+ print(f"Warning: Translation directory {directory} does not exist")
+
+ def load_translation_file(self, file_path: Union[str, Path], locale: Optional[str] = None):
+ """Load a specific translation file
+
+ Args:
+ file_path: Path to the translation JSON file
+ locale: Locale code. If None, will be inferred from filename
+ """
+ file_path = Path(file_path)
+ if not file_path.exists():
+ print(f"Warning: Translation file {file_path} does not exist")
+ return
+
+ if locale is None:
+ locale = file_path.stem
+
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ external_translations = json.load(f)
+
+ # Merge with existing translations
+ if locale in self.translations:
+ self.translations[locale] = self._merge_translations(
+ self.translations[locale],
+ external_translations
+ )
+ else:
+ self.translations[locale] = external_translations
+
+ print(f"Successfully loaded translation file for locale '{locale}' from {file_path}")
+ except Exception as e:
+ print(f"Warning: Failed to load translation file {file_path}: {e}")
+
+ def _merge_translations(self, base: dict, override: dict) -> dict:
+ """Recursively merge translation dictionaries"""
+ result = base.copy()
+ for key, value in override.items():
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+ result[key] = self._merge_translations(result[key], value)
+ else:
+ result[key] = value
+ return result
+
+ def _load_translations(self):
+ """Load built-in translation files"""
+ translations_dir = Path(__file__).parent / 'locales'
+ if translations_dir.exists():
+ self._load_translations_from_directory(translations_dir)
+
+ def _load_external_translations(self, directory: Path):
+ """Load translations from external directory"""
+ self._load_translations_from_directory(directory)
+
+ def _load_translations_from_directory(self, directory: Path):
+ """Load all translation files from a directory"""
+ for locale_file in directory.glob('*.json'):
+ locale = locale_file.stem
+ try:
+ with open(locale_file, 'r', encoding='utf-8') as f:
+ translations = json.load(f)
+
+ if locale in self.translations:
+ # Merge with existing translations
+ self.translations[locale] = self._merge_translations(
+ self.translations[locale],
+ translations
+ )
+ else:
+ self.translations[locale] = translations
+
+ except Exception as e:
+ print(f"Warning: Failed to load translation file {locale_file}: {e}")
+
+ def get_available_locales(self) -> List[str]:
+ """Get list of available locales"""
+ return list(self.translations.keys())
+
+ def set_locale(self, locale: str):
+ """Set the current locale"""
+ if locale in self.translations or locale == self.fallback_locale:
+ self.current_locale = locale
+ else:
+ print(f"Warning: Locale '{locale}' not found, using fallback '{self.fallback_locale}'")
+ print(f"Available locales: {self.get_available_locales()}")
+
+ def get_translation(self, key: str, locale: Optional[str] = None, **kwargs) -> str:
+ """Get translation for a key"""
+ target_locale = locale or self.current_locale
+
+ # Try current locale
+ if target_locale in self.translations:
+ translation = self._get_nested_value(self.translations[target_locale], key)
+ if translation:
+ return self._format_translation(translation, **kwargs)
+
+ # Try fallback locale
+ if target_locale != self.fallback_locale and self.fallback_locale in self.translations:
+ translation = self._get_nested_value(self.translations[self.fallback_locale], key)
+ if translation:
+ return self._format_translation(translation, **kwargs)
+
+ # Return key if no translation found
+ return key
+
+ def _get_nested_value(self, data: dict, key: str) -> Optional[str]:
+ """Get nested value from dictionary using dot notation"""
+ keys = key.split('.')
+ current = data
+ for k in keys:
+ if isinstance(current, dict) and k in current:
+ current = current[k]
+ else:
+ return None
+ return current if isinstance(current, str) else None
+
+ def _format_translation(self, translation: str, **kwargs) -> str:
+ """Format translation with parameters"""
+ try:
+ return translation.format(**kwargs)
+ except (KeyError, ValueError):
+ return translation
+
+ def export_template(self, locale: str, output_file: Union[str, Path]):
+ """Export translation template for a specific locale
+
+ Args:
+ locale: Source locale to export (usually 'en')
+ output_file: Output file path
+ """
+ if locale not in self.translations:
+ print(f"Warning: Locale '{locale}' not found")
+ return
+
+ output_path = Path(output_file)
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(output_path, 'w', encoding='utf-8') as f:
+ json.dump(self.translations[locale], f, indent=2, ensure_ascii=False)
+
+ print(f"Translation template exported to {output_path}")
+
+# Global translation manager instance
+_translation_manager = TranslationManager()
+
+def set_locale(locale: str):
+ """Set the global locale"""
+ _translation_manager.set_locale(locale)
+
+def get_locale() -> str:
+ """Get the current locale"""
+ return _translation_manager.current_locale
+
+def add_translation_directory(directory: Union[str, Path]):
+ """Add external translation directory"""
+ _translation_manager.add_translation_directory(directory)
+
+def load_translation_file(file_path: Union[str, Path], locale: Optional[str] = None):
+ """Load a specific translation file"""
+ _translation_manager.load_translation_file(file_path, locale)
+
+def get_available_locales() -> List[str]:
+ """Get list of available locales"""
+ return _translation_manager.get_available_locales()
+
+def export_translation_template(locale: str = 'en', output_file: Union[str, Path] = 'translation_template.json'):
+ """Export translation template for customization"""
+ _translation_manager.export_template(locale, output_file)
+
+def _(key: str, default: Optional[str] = None, **kwargs) -> str:
+ """Translation function with optional default fallback
+
+ Args:
+ key: Translation key in dot notation (e.g., 'report.title')
+ default: Default value to return if translation is not found
+ **kwargs: Parameters for string formatting
+
+ Returns:
+ Translated string, default value, or the key itself if no translation found
+ """
+ translation = _translation_manager.get_translation(key, **kwargs)
+
+ # If the translation key is not found and a default value is provided, use the default value
+ if translation == key and default is not None:
+ return default
+
+ return translation
+
+def t(key: str, **kwargs) -> str:
+ """Translation function - alias for _()
+
+ Args:
+ key: Translation key in dot notation
+ **kwargs: Parameters for string formatting
+
+ Returns:
+ Translated string
+ """
+ return _(key, **kwargs)
+
+# Export main functions
+__all__ = [
+ 'set_locale', 'get_locale', '_', 't', 'TranslationManager',
+ 'add_translation_directory', 'load_translation_file',
+ 'get_available_locales', 'export_translation_template'
+]
\ No newline at end of file
diff --git a/src/ydata_profiling/i18n/locales/en.json b/src/ydata_profiling/i18n/locales/en.json
new file mode 100644
index 000000000..f0698a5da
--- /dev/null
+++ b/src/ydata_profiling/i18n/locales/en.json
@@ -0,0 +1,364 @@
+{
+ "report": {
+ "title": "YData Profiling Report",
+ "overview": "Overview",
+ "variables": "Variables",
+ "interactions": "Interactions",
+ "correlations": "Correlations",
+ "missing_values": "Missing values",
+ "sample": "Sample",
+ "duplicates": "Duplicate rows",
+ "footer_text": "Report generated by YData.",
+ "most_frequently_occurring": "Most frequently occurring",
+ "columns": "Columns",
+ "more_details": "More details"
+ },
+ "overview": {
+ "dataset_info": "Dataset info",
+ "variable_types": "Variable types",
+ "dataset_statistics": "Dataset statistics",
+ "number_of_variables": "Number of variables",
+ "number_of_observations": "Number of observations",
+ "missing_cells": "Missing cells",
+ "missing_cells_percentage": "Missing cells (%)",
+ "duplicate_rows": "Duplicate rows",
+ "duplicate_rows_percentage": "Duplicate rows (%)",
+ "average_record_size": "Average record size in memory"
+ },
+ "variables": {
+ "numeric": "Numeric",
+ "categorical": "Categorical",
+ "boolean": "Boolean",
+ "date": "Date",
+ "text": "Text",
+ "url": "URL",
+ "path": "Path",
+ "image": "Image",
+ "distinct": "Distinct",
+ "distinct_percentage": "Distinct (%)",
+ "missing": "Missing",
+ "missing_percentage": "Missing (%)",
+ "statistics": "Statistics",
+ "quantile_statistics": "Quantile statistics",
+ "common_values": "Common values",
+ "histogram": "Histogram",
+ "mode": "Mode",
+ "standard_deviation": "Standard deviation",
+ "sum": "Sum",
+ "mad": "Median Absolute Deviation (MAD)",
+ "coefficient_of_variation": "Coefficient of variation (CV)",
+ "kurtosis": "Kurtosis",
+ "skewness": "Skewness",
+ "range": "Range",
+ "interquartile_range": "Interquartile range (IQR)",
+ "length": "Length",
+ "sample": "Sample"
+ },
+ "correlations": {
+ "pearson": "Pearson's r",
+ "spearman": "Spearman's ρ",
+ "kendall": "Kendall's τ",
+ "phi_k": "Phik (φk)",
+ "cramers": "Cramér's V (φc)",
+ "auto": "Auto"
+ },
+ "interactions": {
+ "scatter_plot": "Scatter plot",
+ "variable": "Variable"
+ },
+ "missing_values": {
+ "matrix": "Matrix",
+ "bar_chart": "Bar chart",
+ "heatmap": "Heatmap",
+ "dendrogram": "Dendrogram"
+ },
+ "alerts": {
+ "high_correlation": "High correlation",
+ "high_cardinality": "High cardinality",
+ "constant": "Constant",
+ "zeros": "Zeros",
+ "missing": "Missing",
+ "skewed": "Skewed",
+ "type_date": "Date",
+ "uniform": "Uniform",
+ "unique": "Unique",
+ "constant_length": "Constant length",
+ "duplicates": "Duplicates",
+ "empty": "Empty",
+ "imbalance": "Imbalance",
+ "near_duplicates": "Near duplicates",
+ "non_stationary": "Non stationary",
+ "seasonal": "Seasonal",
+ "truncated": "Truncated",
+ "unsupported": "Unsupported",
+ "dirty_category": "Dirty category"
+ },
+ "formatting": {
+ "bytes": "{value} bytes",
+ "kb": "{value} KB",
+ "mb": "{value} MB",
+ "gb": "{value} GB",
+ "percentage": "{value}%"
+ },
+ "rendering": {
+ "generate_structure": "Generate report structure",
+ "html_progress": "Render HTML",
+ "json_progress": "Render JSON",
+ "widgets_progress": "Render widgets",
+ "other_values_count": "Other values ({other_count})",
+ "missing": "(Missing)"
+ },
+ "core": {
+ "unknown": "unknown",
+ "alerts": "Alerts",
+ "collapse": "Collapse",
+ "container": "Container",
+ "correlationTable": "CorrelationTable",
+ "dropdown": "Dropdown",
+ "duplicate": "Duplicate",
+ "frequencyTable": "FrequencyTable",
+ "frequencyTableSmall": "FrequencyTableSmall",
+ "html": "HTML",
+ "image": "Image",
+ "sample": "Sample",
+ "scores": "Scores",
+ "table": "Table",
+ "toggle_button": "ToggleButton",
+ "variable": "Variable",
+ "variable_info": "VariableInfo",
+ "model": {
+ "bar_count": "Count",
+ "bar_caption": "A simple visualization of nullity by column.",
+ "matrix": "Matrix",
+ "matrix_caption":"Nullity matrix is a data-dense display which lets you quickly visually pick out patterns in data completion.",
+ "heatmap": "Heatmap",
+ "heatmap_caption": "The correlation heatmap measures nullity correlation: how strongly the presence or absence of one variable affects the presence of another.",
+ "first_rows": "First rows",
+ "last_rows": "Last rows",
+ "random_sample": "Random sample"
+ },
+ "structure": {
+ "correlations": "Correlations",
+ "heatmap": "Heatmap",
+ "table": "Table",
+ "overview": {
+ "values": "values",
+ "number_variables": "Number of variables",
+ "number_observations": "Number of observations",
+ "missing_cells": "Missing cells",
+ "missing_cells_percentage": "Missing cells (%)",
+ "duplicate_rows": "Duplicate rows",
+ "duplicate_rows_percentage": "Duplicate rows (%)",
+ "total_size_memory": "Total size in memory",
+ "average_record_memory": "Average record size in memory",
+ "dataset_statistics": "Dataset statistics",
+ "variable_types": "Variable types",
+ "overview": "Overview",
+ "url": "URL",
+ "copyright": "Copyright",
+ "dataset": "Dataset",
+ "analysis_started": "Analysis started",
+ "analysis_finished": "Analysis finished",
+ "duration": "Duration",
+ "software_version": "Software version",
+ "download_configuration": "Download configuration",
+ "reproduction": "Reproduction",
+ "variable_descriptions": "Variable descriptions",
+ "variables": "Variables",
+ "alerts_count": "Alerts ({count})",
+ "number_of_series": "Number of series",
+ "timeseries_length": "Time series length",
+ "starting_point": "Starting point",
+ "ending_point": "Ending point",
+ "period": "Period",
+ "timeseries_statistics": "Timeseries statistics",
+ "original": "Original",
+ "scaled": "Scaled",
+ "time_series": "Time Series",
+ "interactions": "Interactions",
+ "distinct": "Distinct",
+ "distinct_percentage": "Distinct (%)",
+ "missing": "Missing",
+ "missing_percentage": "Missing (%)",
+ "memory_size": "Memory size",
+ "file": "File",
+ "size": "Size",
+ "file_size": "File size",
+ "file_size_caption":"Histogram with fixed size bins of file sizes (in bytes)",
+ "unique": "Unique",
+ "unique_help": "The number of unique values (all values that occur exactly once in the dataset).",
+ "unique_percentage": "Unique (%)",
+ "max_length": "Max length",
+ "median_length": "Median length",
+ "mean_length": "Mean length",
+ "min_length": "Min length",
+ "length": "Length",
+ "length_histogram": "length histogram",
+ "histogram_lengths_category": "Histogram of lengths of the category",
+ "most_occurring_categories": "Most occurring categories",
+ "most_frequent_character_per_category": "Most frequent character per category",
+ "most_occurring_scripts": "Most occurring scripts",
+ "most_frequent_character_per_script": "Most frequent character per script",
+ "most_occurring_blocks": "Most occurring blocks",
+ "most_frequent_character_per_block": "Most frequent character per block",
+ "total_characters": "Total characters",
+ "distinct_characters": "Distinct characters",
+ "distinct_categories": "Distinct categories",
+ "unicode_categories": "Unicode categories (click for more information)",
+ "distinct_scripts": "Distinct scripts",
+ "unicode_scripts": "Unicode scripts (click for more information)",
+ "distinct_blocks": "Distinct blocks",
+ "unicode_blocks": "Unicode blocks (click for more information)",
+ "characters_unicode": "Characters and Unicode",
+ "characters_unicode_caption": "The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.",
+ "most_occurring_characters": "Most occurring characters",
+ "characters": "Characters",
+ "categories": "Categories",
+ "scripts": "Scripts",
+ "blocks": "Blocks",
+ "unicode": "Unicode",
+ "common_values": "Common Values",
+ "common_values_table": "Common Values (Table)",
+ "1st_row": "1st row",
+ "2nd_row": "2nd row",
+ "3rd_row":"3rd row",
+ "4th_row": "4th row",
+ "5th_row": "5th row",
+ "categories_passes_threshold ": "Number of variable categories passes threshold (config.plot.cat_freq.max_unique)",
+ "common_values_plot": "Common Values (Plot)",
+ "common_words": "Common words",
+ "wordcloud": "Wordcloud",
+ "words": "Words",
+ "mean": "Mean",
+ "min": "Minimum",
+ "max": "Maximum",
+ "zeros": "Zeros",
+ "zeros_percentage": "Zeros (%)",
+ "scatter": "Scatter",
+ "scatterplot": "Scatterplot",
+ "scatterplot_caption": "Scatterplot in the complex plane",
+ "mini_histogram": "Mini histogram",
+ "histogram": "Histogram",
+ "histogram_caption": "Histogram with fixed size bins",
+ "extreme_values": "Extreme values",
+ "histogram_s": "Histogram(s)",
+ "invalid_dates": "Invalid dates",
+ "invalid_dates_percentage": "Invalid dates (%)",
+ "created": "Created",
+ "accessed": "Accessed",
+ "modified": "Modified",
+ "min_width": "Min width",
+ "median_width": "Median width",
+ "max_width": "Max width",
+ "min_height": "Min height",
+ "median_height": "Median height",
+ "max_height": "Max height",
+ "min_area": "Min area",
+ "median_area": "Median area",
+ "max_area": "Max area",
+ "scatter_plot_image_sizes": "Scatter plot of image sizes",
+ "scatter_plot":"Scatter plot",
+ "dimensions": "Dimensions",
+ "exif_keys": "Exif keys",
+ "exif_data": "Exif data",
+ "image": "Image",
+ "common_prefix": "Common prefix",
+ "unique_stems": "Unique stems",
+ "unique_names": "Unique names",
+ "unique_extensions": "Unique extensions",
+ "unique_directories": "Unique directories",
+ "unique_anchors": "Unique anchors",
+ "full": "Full",
+ "stem": "Stem",
+ "name": "Name",
+ "extension": "Extension",
+ "parent": "Parent",
+ "anchor": "Anchor",
+ "path": "Path",
+ "infinite": "Infinite",
+ "infinite_percentage": "Infinite (%)",
+ "Negative": "Negative",
+ "Negative_percentage": "Negative (%)",
+ "5_th_percentile": "5-th percentile",
+ "q1": "Q1",
+ "median": "median",
+ "q3": "Q3",
+ "95_th_percentile": "95-th percentile",
+ "range": "Range",
+ "iqr": "Interquartile range (IQR)",
+ "quantile_statistics": "Quantile statistics",
+ "standard_deviation": "Standard deviation",
+ "cv": "Coefficient of variation (CV)",
+ "kurtosis": "Kurtosis",
+ "mad": "Median Absolute Deviation (MAD)",
+ "skewness": "Skewness",
+ "sum": "Sum",
+ "variance": "Variance",
+ "monotonicity": "Monotonicity",
+ "descriptive_statistics": "Descriptive statistics",
+ "statistics": "Statistics",
+ "augmented_dickey_fuller_test_value": "Augmented Dickey-Fuller test p-value",
+ "autocorrelation": "Autocorrelation",
+ "autocorrelation_caption": "ACF and PACF",
+ "timeseries": "Time-series",
+ "timeseries_plot": "Time-series plot",
+ "scheme": "Scheme",
+ "netloc": "Netloc",
+ "query": "Query",
+ "fragment": "Fragment",
+ "heatmap": "Heatmap"
+ }
+ }
+ },
+ "html": {
+ "alerts": {
+ "title": "Alerts",
+ "not_present": "Alert not present in this dataset",
+ "has_constant_value": "has constant value",
+ "has_constant_length": "has constant length",
+ "has_dirty_categories": "has dirty categories",
+ "has_high_cardinality": "has a high cardinality",
+ "distinct_values": "distinct values",
+ "dataset_has": "Dataset has",
+ "duplicate_rows": "duplicate rows",
+ "dataset_is_empty": "Dataset is empty",
+ "is_highly": "is highly",
+ "correlated_with": "correlated with",
+ "and": "and",
+ "other_fields": "other fields",
+ "highly_imbalanced": "is highly imbalanced",
+ "has": "has",
+ "infinite_values": "infinite values",
+ "missing_values": "missing values",
+ "near_duplicate_rows": "near duplicate rows",
+ "non_stationary": "is non stationary",
+ "seasonal": "is seasonal",
+ "highly_skewed": "is highly skewed",
+ "truncated_files": "truncated files",
+ "alert_type_date": "only contains datetime values, but is categorical. Consider applying",
+ "uniformly_distributed": "is uniformly distributed",
+ "unique_values": "has unique values",
+ "alert_unsupported": "is an unsupported type, check if it needs cleaning or further analysis",
+ "zeros": "zeros"
+ },
+ "sequence": {
+ "overview_tabs": {
+ "brought_to_you_by": "Brought to you by YData"
+ }
+ },
+ "dropdown": "Select Columns",
+ "frequency_table":{
+ "value": "Value",
+ "count": "Count",
+ "frequency_percentage": "Frequency (%)",
+ "redacted_value": "Redacted value",
+ "no_values_found": "No values found"
+ },
+ "scores": {
+ "overall_data_quality": "Overall Data Quality Score"
+ },
+ "variable_info": {
+ "no_alerts": "No alerts"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ydata_profiling/i18n/locales/zh.json b/src/ydata_profiling/i18n/locales/zh.json
new file mode 100644
index 000000000..14657255d
--- /dev/null
+++ b/src/ydata_profiling/i18n/locales/zh.json
@@ -0,0 +1,363 @@
+{
+ "report": {
+ "title": "YData 数据分析报告",
+ "overview": "概览",
+ "variables": "变量",
+ "interactions": "交互分析",
+ "correlations": "相关性",
+ "missing_values": "缺失值",
+ "sample": "样本数据",
+ "duplicates": "重复数据",
+ "footer_text": "报告由 YData 生成。",
+ "most_frequently_occurring": "出现频率最高的",
+ "columns": "列",
+ "more_details": "更多详情"
+ },
+ "overview": {
+ "dataset_info": "数据集信息",
+ "variable_types": "变量类型",
+ "dataset_statistics": "数据集统计",
+ "number_of_variables": "变量数量",
+ "number_of_observations": "观测数量",
+ "missing_cells": "缺失单元格",
+ "missing_cells_percentage": "缺失单元格百分比",
+ "duplicate_rows": "重复行",
+ "duplicate_rows_percentage": "重复行百分比",
+ "average_record_size": "平均记录内存大小"
+ },
+ "variables": {
+ "numeric": "数值型",
+ "categorical": "分类型",
+ "boolean": "布尔型",
+ "date": "日期型",
+ "text": "文本型",
+ "url": "网址型",
+ "path": "路径型",
+ "image": "图像型",
+ "distinct": "唯一值",
+ "distinct_percentage": "唯一值百分比",
+ "missing": "缺失值",
+ "missing_percentage": "缺失值百分比",
+ "statistics": "统计信息",
+ "quantile_statistics": "分位数统计",
+ "common_values": "常见值",
+ "histogram": "直方图",
+ "mode": "更多",
+ "standard_deviation": "标准差",
+ "sum": "总和",
+ "mad": "中位绝对偏差 (MAD)",
+ "coefficient_of_variation": "变异系数 (CV)",
+ "kurtosis": "峰度",
+ "skewness": "偏度",
+ "range": "范围",
+ "interquartile_range": "四分位距 (IQR)",
+ "length": "长度",
+ "sample": "样本"
+ },
+ "correlations": {
+ "pearson": "皮尔逊相关系数",
+ "spearman": "斯皮尔曼相关系数",
+ "kendall": "肯德尔相关系数",
+ "phi_k": "Phik相关系数 (φk)",
+ "cramers": "克拉默V系数 (φc)",
+ "auto": "自动"
+ },
+ "interactions": {
+ "scatter_plot": "散点图",
+ "variable": "变量"
+ },
+ "missing_values": {
+ "matrix": "矩阵图",
+ "bar_chart": "条形图",
+ "heatmap": "热力图",
+ "dendrogram": "树状图"
+ },
+ "alerts": {
+ "high_correlation": "高相关性",
+ "high_cardinality": "高基数",
+ "constant": "常量",
+ "zeros": "零值",
+ "missing": "缺失",
+ "skewed": "偏斜",
+ "infinite": "无穷值",
+ "type_date": "日期类型",
+ "uniform": "均匀分布",
+ "unique": "唯一值",
+ "duplicates": "重复值",
+ "empty": "空值",
+ "imbalance": "不平衡",
+ "near_duplicates": "近似重复",
+ "non_stationary": "非平稳",
+ "seasonal": "季节性",
+ "truncated": "截断",
+ "unsupported": "不支持",
+ "dirty_category": "脏数据分类"
+ },
+ "formatting": {
+ "bytes": "{value} 字节",
+ "kb": "{value} KB",
+ "mb": "{value} MB",
+ "gb": "{value} GB",
+ "percentage": "{value}%"
+ },
+ "rendering": {
+ "generate_structure": "生成报告结构",
+ "html_progress": "渲染HTML报告",
+ "json_progress": "渲染JSON报告",
+ "widgets_progress": "渲染组件",
+ "other_values_count": "其他值 ({other_count})",
+ "missing": "(缺失)"
+ },
+ "core": {
+ "unknown": "未知",
+ "alerts": "警告",
+ "collapse": "收起",
+ "container": "容器",
+ "correlationTable": "相关表",
+ "dropdown": "下拉选项",
+ "duplicate": "重复",
+ "frequencyTable": "频率表",
+ "frequencyTableSmall": "小频率表",
+ "html": "HTML",
+ "image": "Image",
+ "sample": "样本",
+ "scores": "评分",
+ "table": "表",
+ "toggle_button": "切换按钮",
+ "variable": "变量",
+ "variable_info": "变量信息",
+ "model": {
+ "bar_count": "总计",
+ "bar_caption": "按列对空值进行简单可视化。",
+ "matrix": "矩阵",
+ "matrix_caption":"零矩阵是一种数据密集显示,可让您快速直观地找出数据完成中的模式。",
+ "heatmap": "热力图",
+ "heatmap_caption": "相关热图用于衡量零值相关性:一个变量的存在或缺失对另一个变量的影响程度。",
+ "first_rows": "前几行",
+ "last_rows": "最后几行",
+ "random_sample": "随机抽样"
+ },
+ "structure": {
+ "correlations": "相关性",
+ "heatmap": "热力图",
+ "table": "表",
+ "overview": {
+ "values": "值",
+ "number_variables": "变量数量",
+ "number_observations": "观测数量",
+ "missing_cells": "缺失单元格",
+ "missing_cells_percentage": "缺失单元格占比(%)",
+ "duplicate_rows": "重复行",
+ "duplicate_rows_percentage": "重复行占比 (%)",
+ "total_size_memory": "内存中总大小",
+ "average_record_memory": "内存中的平均记录大小",
+ "dataset_statistics": "数据集统计",
+ "variable_types": "变量类型",
+ "overview": "概览",
+ "url": "网址",
+ "copyright": "版权",
+ "dataset": "数据集",
+ "analysis_started": "分析开始",
+ "analysis_finished": "分析结束",
+ "duration": "持续时间",
+ "software_version": "软件版本",
+ "download_configuration": "下载配置",
+ "reproduction": "复现",
+ "variable_descriptions": "变量描述",
+ "variables": "变量",
+ "alerts_count": "警告 ({count})",
+ "number_of_series": "序列数量",
+ "timeseries_length": "时间序列长度",
+ "starting_point": "起始点",
+ "ending_point": "结束点",
+ "period": "周期",
+ "timeseries_statistics": "时序统计",
+ "original": "原始",
+ "scaled": "缩放",
+ "time_series": "时间序列",
+ "interactions": "交互",
+ "distinct": "独特值",
+ "distinct_percentage": "独特值占比 (%)",
+ "missing": "缺失值",
+ "missing_percentage": "缺失值占比 (%)",
+ "memory_size": "内存大小",
+ "file": "文件",
+ "size": "大小",
+ "file_size": "文件大小",
+ "file_size_caption":"具有固定大小文件大小(以字节为单位)的直方图",
+ "unique": "唯一值",
+ "unique_help": "唯一值(即数据集中只出现一次的所有值)的数量。",
+ "unique_percentage": "唯一值占比 (%)",
+ "max_length": "最大长度",
+ "median_length": "中位长度",
+ "mean_length": "平均长度",
+ "min_length": "最小长度",
+ "length": "长度",
+ "length_histogram": "长度直方图",
+ "histogram_lengths_category": "该类别的长度直方图",
+ "most_occurring_categories": "最常见类别",
+ "most_frequent_character_per_category": "每个类别中出现频率最高的字符",
+ "most_occurring_scripts": "最常见的脚本",
+ "most_frequent_character_per_script": "每种脚本中出现频率最高的字符",
+ "most_occurring_blocks": "出现最多的块",
+ "most_frequent_character_per_block": "每个区块中出现频率最高的字符",
+ "total_characters": "总字符数",
+ "distinct_characters": "唯一字符",
+ "distinct_categories": "唯一类别",
+ "unicode_categories": "Unicode 类别(点击查看更多信息)",
+ "distinct_scripts": "唯一脚本",
+ "unicode_scripts": "Unicode 脚本(点击查看更多信息)",
+ "distinct_blocks": "唯一块",
+ "unicode_blocks": "Unicode 块(点击查看更多信息)",
+ "characters_unicode": "字符与Unicode",
+ "characters_unicode_caption": "Unicode标准为每个码位分配了字符属性,这些属性可用于分析文本变量。",
+ "most_occurring_characters": "出现频率最高的字符",
+ "characters": "字符",
+ "categories": "分类",
+ "scripts": "脚本",
+ "blocks": "块",
+ "unicode": "Unicode",
+ "common_values": "常见值",
+ "common_values_table": "常见值 (表)",
+ "1st_row": "第1行",
+ "2nd_row": "第2行",
+ "3rd_row": "第3行",
+ "4th_row": "第4行",
+ "5th_row": "第5行",
+ "categories_passes_threshold ": "通过阈值的变量类别数量 (config.plot.cat_freq.max_unique)",
+ "common_values_plot": "常见值 (图)",
+ "common_words": "通用词汇",
+ "wordcloud": "词云",
+ "words": "词汇",
+ "mean": "均值",
+ "min": "最小值",
+ "max": "最大值",
+ "zeros": "零值",
+ "zeros_percentage": "零值占比 (%)",
+ "scatter": "散点图",
+ "scatterplot": "散点图",
+ "scatterplot_caption": "复平面散点图",
+ "mini_histogram": "迷你直方图",
+ "histogram": "直方图",
+ "histogram_caption": "等宽直方图",
+ "extreme_values": "隐藏值",
+ "histogram_s": "直方图",
+ "invalid_dates": "无效日期",
+ "invalid_dates_percentage": "无效日期占比 (%)",
+ "created": "创建",
+ "accessed": "访问",
+ "modified": "修改",
+ "min_width": "最小宽度",
+ "median_width": "中位宽度",
+ "max_width": "最大宽度",
+ "min_height": "最小高度",
+ "median_height": "中位高度",
+ "max_height": "最大高度",
+ "min_area": "最小区间",
+ "median_area": "中位区间",
+ "max_area": "最大区间",
+ "scatter_plot_image_sizes": "图像大小的散点图",
+ "scatter_plot":"散点图",
+ "dimensions": "维度",
+ "exif_keys": "Exif 键",
+ "exif_data": "Exif 数据",
+ "image": "图片",
+ "common_prefix": "通用前缀",
+ "unique_stems": "唯一值",
+ "unique_names": "具有唯一值",
+ "unique_extensions": "唯一拓展名",
+ "unique_directories": "唯一目录",
+ "unique_anchors": "唯一值",
+ "full": "完整",
+ "stem": "词干",
+ "name": "名称",
+ "extension": "扩展名",
+ "parent": "父级",
+ "anchor": "锚点",
+ "path": "路径",
+ "infinite": "无限值",
+ "infinite_percentage": "无限值占比 (%)",
+ "Negative": "负值",
+ "Negative_percentage": "负值占比 (%)",
+ "5_th_percentile": "5% 分位数",
+ "q1": "Q1",
+ "median": "中位数",
+ "q3": "Q3",
+ "95_th_percentile": "95%分位数",
+ "range": "范围",
+ "iqr": "四分位距 (IQR)",
+ "quantile_statistics": "数据集统计",
+ "standard_deviation": "标准差",
+ "cv": "变异系数 (CV)",
+ "kurtosis": "峰度",
+ "mad": "绝对中位差 (MAD)",
+ "skewness": "偏度",
+ "sum": "总和",
+ "variance": "变量",
+ "monotonicity": "单调性",
+ "descriptive_statistics": "数据集统计",
+ "statistics": "数据集统计",
+ "augmented_dickey_fuller_test_value": "增强型Dickey-Fuller检验的p值",
+ "autocorrelation": "相关性",
+ "autocorrelation_caption": "ACF 和 PACF",
+ "timeseries": "时序列",
+ "timeseries_plot": "时序图",
+ "scheme": "方案",
+ "netloc": "主机标识",
+ "query": "请求参数",
+ "fragment": "锚点"
+ }
+ }
+ },
+ "html": {
+ "alerts": {
+ "title": "警告",
+ "not_present": "此数据集中不存在此警告",
+ "has_constant_value": "具有恒定值",
+ "has_constant_length": "具有恒定长度",
+ "has_dirty_categories": "存在脏分类",
+ "has_high_cardinality": "具有高基数性",
+ "distinct_values": "不同值",
+ "dataset_has": "数据集包含",
+ "duplicate_rows": "重复行",
+ "dataset_is_empty": "数据集为空",
+ "is_highly": "高度",
+ "correlated_with": "与之相关",
+ "and": "与",
+ "other_fields": "其他字段",
+ "highly_imbalanced": "存在高度不平衡",
+ "has": "有",
+ "infinite_values": "无限值",
+ "missing_values": "缺失值",
+ "near_duplicate_rows": "近似重复的行",
+ "non_stationary": "是非平稳的",
+ "seasonal": "是季节性的",
+ "highly_skewed": "高度偏斜",
+ "truncated_files": "截断的文件",
+ "alert_type_date": "仅包含日期时间值,但属于分类数据。考虑应用",
+ "uniformly_distributed": "均匀分布",
+ "unique_values": "具有唯一值",
+ "alert_unsupported": "这是一个不受支持的类型,请检查是否需要清理或进一步分析",
+ "zeros": "零值"
+ },
+ "sequence": {
+ "overview_tabs": {
+ "brought_to_you_by": "由 YData 为您提供"
+ }
+ },
+ "dropdown": "选择列",
+ "frequency_table":{
+ "value": "值",
+ "count": "计数",
+ "frequency_percentage": "频率 (%)",
+ "redacted_value": "已隐藏的值",
+ "no_values_found": "未找到任何值。"
+ },
+ "scores": {
+ "overall_data_quality": "整体数据质量评分"
+ },
+ "variable_info": {
+ "no_alerts": "无警报"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ydata_profiling/i18n/tools.py b/src/ydata_profiling/i18n/tools.py
new file mode 100644
index 000000000..d1cc41029
--- /dev/null
+++ b/src/ydata_profiling/i18n/tools.py
@@ -0,0 +1,119 @@
+"""
+Translation tools for ydata-profiling
+"""
+import json
+from pathlib import Path
+from typing import Dict, Any, Union
+import click
+
+
+def create_translation_template(locale: str = 'en', output_dir: Union[str, Path] = '.'):
+ """Create a translation template file for customization
+
+ Args:
+ locale: Source locale to use as template
+ output_dir: Output directory
+ """
+ from ydata_profiling.i18n import export_translation_template
+
+ output_path = Path(output_dir) / f"{locale}_template.json"
+ export_translation_template(locale, output_path)
+ return output_path
+
+
+def validate_translation_file(file_path: Union[str, Path], reference_locale: str = 'en') -> Dict[str, Any]:
+ """Validate a translation file against reference
+
+ Args:
+ file_path: Path to translation file to validate
+ reference_locale: Reference locale to compare against
+
+ Returns:
+ Validation result dictionary
+ """
+ from ydata_profiling.i18n import _translation_manager
+
+ file_path = Path(file_path)
+ if not file_path.exists():
+ return {"valid": False, "error": f"File {file_path} does not exist"}
+
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ translations = json.load(f)
+ except Exception as e:
+ return {"valid": False, "error": f"Invalid JSON: {e}"}
+
+ # Get reference translations
+ if reference_locale not in _translation_manager.translations:
+ return {"valid": False, "error": f"Reference locale '{reference_locale}' not found"}
+
+ reference = _translation_manager.translations[reference_locale]
+
+ # Check for missing and extra keys
+ missing_keys = []
+ extra_keys = []
+
+ def check_keys(ref_dict: dict, trans_dict: dict, prefix: str = ""):
+ for key, value in ref_dict.items():
+ full_key = f"{prefix}.{key}" if prefix else key
+ if key not in trans_dict:
+ missing_keys.append(full_key)
+ elif isinstance(value, dict) and isinstance(trans_dict[key], dict):
+ check_keys(value, trans_dict[key], full_key)
+
+ for key in trans_dict:
+ full_key = f"{prefix}.{key}" if prefix else key
+ if key not in ref_dict:
+ extra_keys.append(full_key)
+
+ check_keys(reference, translations)
+
+ result = {
+ "valid": len(missing_keys) == 0,
+ "missing_keys": missing_keys,
+ "extra_keys": extra_keys,
+ "total_keys": len(missing_keys) + len(extra_keys)
+ }
+
+ return result
+
+
+@click.group()
+def cli():
+ """YData Profiling Translation Tools"""
+ pass
+
+
+@cli.command()
+@click.option('--locale', '-l', default='en', help='Source locale for template')
+@click.option('--output', '-o', default='.', help='Output directory')
+def create_template(locale: str, output: str):
+ """Create a translation template file"""
+ output_path = create_translation_template(locale, output)
+ click.echo(f"Translation template created: {output_path}")
+
+
+@cli.command()
+@click.argument('file_path')
+@click.option('--reference', '-r', default='en', help='Reference locale')
+def validate(file_path: str, reference: str):
+ """Validate a translation file"""
+ result = validate_translation_file(file_path, reference)
+
+ if result["valid"]:
+ click.echo(click.style("✓ Translation file is valid!", fg='green'))
+ else:
+ click.echo(click.style("✗ Translation file has issues:", fg='red'))
+
+ if 'error' in result:
+ click.echo(f"Error: {result['error']}")
+ else:
+ if result['missing_keys']:
+ click.echo(f"\nMissing keys ({len(result['missing_keys'])}):")
+ for key in result['missing_keys']:
+ click.echo(f" - {key}")
+
+ if result['extra_keys']:
+ click.echo(f"\nExtra keys ({len(result['extra_keys'])}):")
+ for key in result['extra_keys']:
+ click.echo(f" + {key}")
\ No newline at end of file
diff --git a/src/ydata_profiling/model/missing.py b/src/ydata_profiling/model/missing.py
index 46ec2dee3..8264bbbd2 100644
--- a/src/ydata_profiling/model/missing.py
+++ b/src/ydata_profiling/model/missing.py
@@ -5,6 +5,7 @@
import pandas as pd
from ydata_profiling.config import Settings
+from ydata_profiling.i18n import _
class MissingDataBackend:
@@ -70,20 +71,20 @@ def get_missing_active(config: Settings, table_stats: dict) -> Dict[str, Any]:
missing_map = {
"bar": {
"min_missing": 0,
- "name": "Count",
- "caption": "A simple visualization of nullity by column.",
+ "name": _("core.model.bar_count"),
+ "caption": _("core.model.bar_caption"),
"function": MissingBar(),
},
"matrix": {
"min_missing": 0,
- "name": "Matrix",
- "caption": "Nullity matrix is a data-dense display which lets you quickly visually pick out patterns in data completion.",
+ "name": _("core.model.matrix"),
+ "caption": _("core.model.matrix_caption"),
"function": MissingMatrix(),
},
"heatmap": {
"min_missing": 2,
- "name": "Heatmap",
- "caption": "The correlation heatmap measures nullity correlation: how strongly the presence or absence of one variable affects the presence of another.",
+ "name": _("core.model.heatmap"),
+ "caption": _("core.model.heatmap_caption"),
"function": MissingHeatmap(),
},
}
diff --git a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py b/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
index e711acdd9..ba98f0601 100644
--- a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
+++ b/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
@@ -17,6 +17,7 @@
series_hashable,
)
from ydata_profiling.utils.information import DisplayInfo
+from ydata_profiling.i18n import _
def get_character_counts_vc(vc: pd.Series) -> pd.Series:
@@ -67,7 +68,7 @@ def unicode_summary_vc(vc: pd.Series) -> dict:
from unicodedata import category as _category # pylint: disable=import-error
category = _category # type: ignore
- char_handler = lambda char: "(unknown)" # noqa: E731
+ char_handler = lambda char: f"{_("core.unknown")}" # noqa: E731
block = char_handler
block_abbr = char_handler
category_long = char_handler
diff --git a/src/ydata_profiling/model/pandas/sample_pandas.py b/src/ydata_profiling/model/pandas/sample_pandas.py
index 10feb8969..9f0a53fe6 100644
--- a/src/ydata_profiling/model/pandas/sample_pandas.py
+++ b/src/ydata_profiling/model/pandas/sample_pandas.py
@@ -4,6 +4,7 @@
from ydata_profiling.config import Settings
from ydata_profiling.model.sample import Sample, get_sample
+from ydata_profiling.i18n import _
@get_sample.register(Settings, pd.DataFrame)
@@ -23,16 +24,16 @@ def pandas_get_sample(config: Settings, df: pd.DataFrame) -> List[Sample]:
n_head = config.samples.head
if n_head > 0:
- samples.append(Sample(id="head", data=df.head(n=n_head), name="First rows"))
+ samples.append(Sample(id="head", data=df.head(n=n_head), name=_("core.model.first_rows")))
n_tail = config.samples.tail
if n_tail > 0:
- samples.append(Sample(id="tail", data=df.tail(n=n_tail), name="Last rows"))
+ samples.append(Sample(id="tail", data=df.tail(n=n_tail), name=_("core.model.last_rows")))
n_random = config.samples.random
if n_random > 0:
samples.append(
- Sample(id="random", data=df.sample(n=n_random), name="Random sample")
+ Sample(id="random", data=df.sample(n=n_random), name=_("core.model.random_sample"))
)
return samples
diff --git a/src/ydata_profiling/profile_report.py b/src/ydata_profiling/profile_report.py
index a7d6d9134..9d003f4b9 100644
--- a/src/ydata_profiling/profile_report.py
+++ b/src/ydata_profiling/profile_report.py
@@ -47,6 +47,7 @@
from ydata_profiling.utils.dataframe import hash_dataframe
from ydata_profiling.utils.logger import ProfilingLogger
from ydata_profiling.utils.paths import get_config
+from ydata_profiling.i18n import _, set_locale, get_locale
logger = ProfilingLogger(name="ReportLogger")
@@ -80,6 +81,7 @@ def __init__(
summarizer: Optional[BaseSummarizer] = None,
config: Optional[Settings] = None,
type_schema: Optional[dict] = None,
+ locale: Optional[str] = None,
**kwargs,
):
"""Generate a ProfileReport based on a pandas or spark.sql DataFrame
@@ -103,8 +105,21 @@ def __init__(
typeset: optional user typeset to use for type inference
summarizer: optional user summarizer to generate custom summary output
type_schema: optional dict containing pairs of `column name`: `type`
+ locale: language locale for report generation (e.g., 'en', 'zh')
**kwargs: other arguments, for valid arguments, check the default configuration file.
"""
+ from ydata_profiling.i18n import set_locale, get_locale
+
+ # Save current language settings
+ current_locale = get_locale()
+
+ # If the locale parameter is explicitly specified, use it
+ if locale:
+ set_locale(locale)
+ target_locale = locale
+ else:
+ # Otherwise, use the current global language setting
+ target_locale = current_locale
self.__validate_inputs(df, minimal, tsmode, config_file, lazy)
@@ -123,6 +138,11 @@ def __init__(
else:
report_config = SparkSettings()
+ # Ensure that the language settings in the configuration are consistent with the target language
+ if target_locale != 'en':
+ report_config.i18n.locale = target_locale
+ set_locale(target_locale)
+
groups = [
(explorative, "explorative"),
(sensitive, "sensitive"),
@@ -144,6 +164,10 @@ def __init__(
if kwargs:
report_config = report_config.update(kwargs)
+ # Finally ensure the language setting is correct
+ report_config.i18n.locale = target_locale
+ set_locale(target_locale)
+
report_config.vars.timeseries.active = tsmode
if tsmode and sortby:
report_config.vars.timeseries.sortby = sortby
@@ -411,7 +435,7 @@ def _render_html(self) -> str:
report = self.report
with tqdm(
- total=1, desc="Render HTML", disable=not self.config.progress_bar
+ total=1, desc=_("rendering.html_progress"), disable=not self.config.progress_bar
) as pbar:
html = HTMLReport(copy.deepcopy(report)).render(
nav=self.config.html.navbar_show,
@@ -440,7 +464,7 @@ def _render_widgets(self) -> Any:
with tqdm(
total=1,
- desc="Render widgets",
+ desc=_("rendering.widgets_progress"),
disable=not self.config.progress_bar,
leave=False,
) as pbar:
@@ -477,7 +501,7 @@ def encode_it(o: Any) -> Any:
description = self.description_set
with tqdm(
- total=1, desc="Render JSON", disable=not self.config.progress_bar
+ total=1, desc=_("rendering.json_progress"), disable=not self.config.progress_bar
) as pbar:
description_dict = format_summary(description)
description_dict = encode_it(description_dict)
diff --git a/src/ydata_profiling/report/presentation/core/alerts.py b/src/ydata_profiling/report/presentation/core/alerts.py
index 79de56287..113006d5c 100644
--- a/src/ydata_profiling/report/presentation/core/alerts.py
+++ b/src/ydata_profiling/report/presentation/core/alerts.py
@@ -3,6 +3,7 @@
from ydata_profiling.config import Style
from ydata_profiling.model.alerts import Alert
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class Alerts(ItemRenderer):
@@ -12,7 +13,7 @@ def __init__(
super().__init__("alerts", {"alerts": alerts, "style": style}, **kwargs)
def __repr__(self):
- return "Alerts"
+ return _("core.alerts")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/collapse.py b/src/ydata_profiling/report/presentation/core/collapse.py
index a7dba34f1..2633588aa 100644
--- a/src/ydata_profiling/report/presentation/core/collapse.py
+++ b/src/ydata_profiling/report/presentation/core/collapse.py
@@ -3,6 +3,7 @@
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
from ydata_profiling.report.presentation.core.renderable import Renderable
from ydata_profiling.report.presentation.core.toggle_button import ToggleButton
+from ydata_profiling.i18n import _
class Collapse(ItemRenderer):
@@ -10,7 +11,7 @@ def __init__(self, button: ToggleButton, item: Renderable, **kwargs):
super().__init__("collapse", {"button": button, "item": item}, **kwargs)
def __repr__(self) -> str:
- return "Collapse"
+ return _("core.collapse")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/container.py b/src/ydata_profiling/report/presentation/core/container.py
index c82f06266..e48ff0a62 100644
--- a/src/ydata_profiling/report/presentation/core/container.py
+++ b/src/ydata_profiling/report/presentation/core/container.py
@@ -1,6 +1,7 @@
from typing import Any, Callable, Optional, Sequence
from ydata_profiling.report.presentation.core.renderable import Renderable
+from ydata_profiling.i18n import _
class Container(Renderable):
@@ -25,7 +26,7 @@ def __init__(
self.sequence_type = sequence_type
def __str__(self) -> str:
- text = "Container\n"
+ text = f"{_("core.container")}\n"
if "items" in self.content:
for id, item in enumerate(self.content["items"]):
name = str(item).replace("\n", "\n\t")
@@ -35,9 +36,9 @@ def __str__(self) -> str:
def __repr__(self) -> str:
if "name" in self.content:
name = self.content["name"]
- return f"Container(name={name})"
+ return f"{_("core.container")}(name={name})"
else:
- return "Container"
+ return _("core.container")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/correlation_table.py b/src/ydata_profiling/report/presentation/core/correlation_table.py
index 174d0e708..64a47a050 100644
--- a/src/ydata_profiling/report/presentation/core/correlation_table.py
+++ b/src/ydata_profiling/report/presentation/core/correlation_table.py
@@ -3,6 +3,7 @@
import pandas as pd
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class CorrelationTable(ItemRenderer):
@@ -15,7 +16,7 @@ def __init__(self, name: str, correlation_matrix: pd.DataFrame, **kwargs):
)
def __repr__(self) -> str:
- return "CorrelationTable"
+ return _("core.correlationTable")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/dropdown.py b/src/ydata_profiling/report/presentation/core/dropdown.py
index c1c2f274e..919383aa8 100644
--- a/src/ydata_profiling/report/presentation/core/dropdown.py
+++ b/src/ydata_profiling/report/presentation/core/dropdown.py
@@ -3,6 +3,7 @@
from ydata_profiling.report.presentation.core.container import Container
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
from ydata_profiling.report.presentation.core.renderable import Renderable
+from ydata_profiling.i18n import _
class Dropdown(ItemRenderer):
@@ -32,7 +33,7 @@ def __init__(
)
def __repr__(self) -> str:
- return "Dropdown"
+ return _("core.dropdown")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/duplicate.py b/src/ydata_profiling/report/presentation/core/duplicate.py
index 907d19376..8dfc28a93 100644
--- a/src/ydata_profiling/report/presentation/core/duplicate.py
+++ b/src/ydata_profiling/report/presentation/core/duplicate.py
@@ -3,6 +3,7 @@
import pandas as pd
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class Duplicate(ItemRenderer):
@@ -10,7 +11,7 @@ def __init__(self, name: str, duplicate: pd.DataFrame, **kwargs):
super().__init__("duplicate", {"duplicate": duplicate}, name=name, **kwargs)
def __repr__(self) -> str:
- return "Duplicate"
+ return _("core.duplicate")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/frequency_table.py b/src/ydata_profiling/report/presentation/core/frequency_table.py
index 060e82da3..cd12da85d 100644
--- a/src/ydata_profiling/report/presentation/core/frequency_table.py
+++ b/src/ydata_profiling/report/presentation/core/frequency_table.py
@@ -1,6 +1,7 @@
from typing import Any
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class FrequencyTable(ItemRenderer):
@@ -8,7 +9,7 @@ def __init__(self, rows: list, redact: bool, **kwargs):
super().__init__("frequency_table", {"rows": rows, "redact": redact}, **kwargs)
def __repr__(self) -> str:
- return "FrequencyTable"
+ return _("core.frequencyTable")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/frequency_table_small.py b/src/ydata_profiling/report/presentation/core/frequency_table_small.py
index c7036b3b7..e1d61597f 100644
--- a/src/ydata_profiling/report/presentation/core/frequency_table_small.py
+++ b/src/ydata_profiling/report/presentation/core/frequency_table_small.py
@@ -1,6 +1,7 @@
from typing import Any, List
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class FrequencyTableSmall(ItemRenderer):
@@ -10,7 +11,7 @@ def __init__(self, rows: List[Any], redact: bool, **kwargs):
)
def __repr__(self) -> str:
- return "FrequencyTableSmall"
+ return _("core.frequencyTableSmall")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/html.py b/src/ydata_profiling/report/presentation/core/html.py
index a93e53656..a7aa56621 100644
--- a/src/ydata_profiling/report/presentation/core/html.py
+++ b/src/ydata_profiling/report/presentation/core/html.py
@@ -1,6 +1,7 @@
from typing import Any
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class HTML(ItemRenderer):
@@ -8,7 +9,7 @@ def __init__(self, content: str, **kwargs):
super().__init__("html", {"html": content}, **kwargs)
def __repr__(self) -> str:
- return "HTML"
+ return _("core.html")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/image.py b/src/ydata_profiling/report/presentation/core/image.py
index 4d991922d..ee6b0cf04 100644
--- a/src/ydata_profiling/report/presentation/core/image.py
+++ b/src/ydata_profiling/report/presentation/core/image.py
@@ -2,6 +2,7 @@
from ydata_profiling.config import ImageType
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class Image(ItemRenderer):
@@ -28,7 +29,7 @@ def __init__(
)
def __repr__(self) -> str:
- return "Image"
+ return _("core.image")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/sample.py b/src/ydata_profiling/report/presentation/core/sample.py
index 6ce6194fd..ac0e26e0d 100644
--- a/src/ydata_profiling/report/presentation/core/sample.py
+++ b/src/ydata_profiling/report/presentation/core/sample.py
@@ -3,6 +3,7 @@
import pandas as pd
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class Sample(ItemRenderer):
@@ -14,7 +15,7 @@ def __init__(
)
def __repr__(self) -> str:
- return "Sample"
+ return _("core.sample")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/scores.py b/src/ydata_profiling/report/presentation/core/scores.py
index 7ff70570a..7414cef4d 100644
--- a/src/ydata_profiling/report/presentation/core/scores.py
+++ b/src/ydata_profiling/report/presentation/core/scores.py
@@ -5,6 +5,7 @@
from ydata_profiling.config import Style
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class Scores(ItemRenderer):
@@ -26,7 +27,7 @@ def __init__(
super().__init__("scores", content=content, **kwargs)
def __repr__(self) -> str:
- return "Scores"
+ return _("core.scores")
def render(self) -> Any:
raise NotImplementedError("Handled by flavour-specific class")
diff --git a/src/ydata_profiling/report/presentation/core/table.py b/src/ydata_profiling/report/presentation/core/table.py
index 46fa9e1ad..78c2c95d1 100644
--- a/src/ydata_profiling/report/presentation/core/table.py
+++ b/src/ydata_profiling/report/presentation/core/table.py
@@ -2,6 +2,7 @@
from ydata_profiling.config import Style
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class Table(ItemRenderer):
@@ -20,7 +21,7 @@ def __init__(
)
def __repr__(self) -> str:
- return "Table"
+ return _("core.table")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/toggle_button.py b/src/ydata_profiling/report/presentation/core/toggle_button.py
index c6ce1b861..054a4d35c 100644
--- a/src/ydata_profiling/report/presentation/core/toggle_button.py
+++ b/src/ydata_profiling/report/presentation/core/toggle_button.py
@@ -1,6 +1,7 @@
from typing import Any
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class ToggleButton(ItemRenderer):
@@ -8,7 +9,7 @@ def __init__(self, text: str, **kwargs):
super().__init__("toggle_button", {"text": text}, **kwargs)
def __repr__(self) -> str:
- return "ToggleButton"
+ return _("core.toggle_button")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/variable.py b/src/ydata_profiling/report/presentation/core/variable.py
index cdf063202..0a24c6517 100644
--- a/src/ydata_profiling/report/presentation/core/variable.py
+++ b/src/ydata_profiling/report/presentation/core/variable.py
@@ -2,6 +2,7 @@
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
from ydata_profiling.report.presentation.core.renderable import Renderable
+from ydata_profiling.i18n import _
class Variable(ItemRenderer):
@@ -20,13 +21,13 @@ def __str__(self):
top_text = str(self.content["top"]).replace("\n", "\n\t")
bottom_text = str(self.content["bottom"]).replace("\n", "\n\t")
- text = "Variable\n"
+ text = f"{_("core.variable")}\n"
text += f"- top: {top_text}"
text += f"- bottom: {bottom_text}"
return text
def __repr__(self):
- return "Variable"
+ return _("core.variable")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/core/variable_info.py b/src/ydata_profiling/report/presentation/core/variable_info.py
index 9eaa54a39..6c5ff966c 100644
--- a/src/ydata_profiling/report/presentation/core/variable_info.py
+++ b/src/ydata_profiling/report/presentation/core/variable_info.py
@@ -3,6 +3,7 @@
from ydata_profiling.config import Style
from ydata_profiling.model.alerts import Alert
from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer
+from ydata_profiling.i18n import _
class VariableInfo(ItemRenderer):
@@ -30,7 +31,7 @@ def __init__(
)
def __repr__(self) -> str:
- return "VariableInfo"
+ return _("core.variable_info")
def render(self) -> Any:
raise NotImplementedError()
diff --git a/src/ydata_profiling/report/presentation/flavours/html/__init__.py b/src/ydata_profiling/report/presentation/flavours/html/__init__.py
index 50e234f77..72be96f2d 100644
--- a/src/ydata_profiling/report/presentation/flavours/html/__init__.py
+++ b/src/ydata_profiling/report/presentation/flavours/html/__init__.py
@@ -25,6 +25,7 @@
from ydata_profiling.report.presentation.flavours.html.variable_info import (
HTMLVariableInfo,
)
+from ydata_profiling.report.presentation.flavours.html.i18n_extension import I18nExtension
__all__ = [
"HTMLCollapse",
@@ -44,4 +45,5 @@
"HTMLAlerts",
"HTMLCorrelationTable",
"HTMLScores",
+ "I18nExtension",
]
diff --git a/src/ydata_profiling/report/presentation/flavours/html/i18n_extension.py b/src/ydata_profiling/report/presentation/flavours/html/i18n_extension.py
new file mode 100644
index 000000000..47dbf0f5e
--- /dev/null
+++ b/src/ydata_profiling/report/presentation/flavours/html/i18n_extension.py
@@ -0,0 +1,33 @@
+"""
+Jinja2 internationalization extension for ydata-profiling
+"""
+from jinja2 import nodes
+from jinja2.ext import Extension
+from ydata_profiling.i18n import _
+
+class I18nExtension(Extension):
+ """Jinja2 extension for internationalization"""
+
+ tags = {'trans'}
+
+ def __init__(self, environment):
+ super().__init__(environment)
+ environment.globals['_'] = _
+ environment.globals['gettext'] = _
+ environment.filters['trans'] = self.translate_filter
+
+ def translate_filter(self, key, **kwargs):
+ """Filter for translating keys in templates"""
+ return _(key, **kwargs)
+
+ def parse(self, parser):
+ """Parse trans tag for {% trans %} syntax"""
+ lineno = next(parser.stream).lineno
+ key = parser.parse_expression()
+ return nodes.Output([
+ nodes.Call(
+ nodes.Name('_', 'load'),
+ [key],
+ []
+ )
+ ]).set_lineno(lineno)
\ No newline at end of file
diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates.py b/src/ydata_profiling/report/presentation/flavours/html/templates.py
index 85e24a46a..69a8dc1b8 100644
--- a/src/ydata_profiling/report/presentation/flavours/html/templates.py
+++ b/src/ydata_profiling/report/presentation/flavours/html/templates.py
@@ -6,20 +6,33 @@
from ydata_profiling.config import Settings
from ydata_profiling.report.formatters import fmt, fmt_badge, fmt_numeric, fmt_percent
+from ydata_profiling.i18n import _
+from ydata_profiling.report.presentation.flavours.html.i18n_extension import I18nExtension
# Initializing Jinja
package_loader = jinja2.PackageLoader(
"ydata_profiling", "report/presentation/flavours/html/templates"
)
jinja2_env = jinja2.Environment(
- lstrip_blocks=True, trim_blocks=True, loader=package_loader
+ lstrip_blocks=True,
+ trim_blocks=True,
+ loader=package_loader,
+ extensions=[I18nExtension] # Add internationalization extension
)
+
+# Adding translation function to Jinja2 environment
+jinja2_env.globals['_'] = _
+jinja2_env.globals['gettext'] = _
+
jinja2_env.filters["is_list"] = lambda x: isinstance(x, list)
jinja2_env.filters["fmt_badge"] = fmt_badge
jinja2_env.filters["fmt_percent"] = fmt_percent
jinja2_env.filters["fmt_numeric"] = fmt_numeric
jinja2_env.filters["fmt"] = fmt
+# Add translation filter
+jinja2_env.filters["trans"] = lambda key, **kwargs: _(key, **kwargs)
+
def template(template_name: str) -> jinja2.Template:
"""Get the template object given the name.
diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html b/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html
index 3a53428b8..f95ae6bbe 100644
--- a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html
+++ b/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html
@@ -1,6 +1,6 @@