优化数据预处理
This commit is contained in:
42
data_preprocessing/config.py
Normal file
42
data_preprocessing/config.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
数据预处理模块配置
|
||||
"""
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
# 获取项目根目录
|
||||
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""预处理模块配置"""
|
||||
|
||||
# 原始数据存放目录
|
||||
raw_data_dir: str = os.path.join(PROJECT_ROOT, "raw_data")
|
||||
|
||||
# 清洗后数据输出目录
|
||||
cleaned_data_dir: str = os.path.join(PROJECT_ROOT, "cleaned_data")
|
||||
|
||||
# 默认时间列名
|
||||
default_time_column: str = "SendTime"
|
||||
|
||||
# 支持的文件扩展名
|
||||
supported_extensions: tuple = (".csv", ".xlsx", ".xls")
|
||||
|
||||
# CSV 编码
|
||||
csv_encoding: str = "utf-8-sig"
|
||||
|
||||
def ensure_dirs(self):
|
||||
"""确保目录存在"""
|
||||
os.makedirs(self.raw_data_dir, exist_ok=True)
|
||||
os.makedirs(self.cleaned_data_dir, exist_ok=True)
|
||||
print(f"[OK] 目录已就绪:")
|
||||
print(f" 原始数据: {self.raw_data_dir}")
|
||||
print(f" 清洗输出: {self.cleaned_data_dir}")
|
||||
|
||||
|
||||
# 默认配置实例
|
||||
default_config = Config()
|
||||
Reference in New Issue
Block a user