Files
ProjectMD/DataTools/validate_data.py

153 lines
3.9 KiB
Python

# DataTools/validate_data.py
"""CSV 데이터 검증 (조건부 필드 지원)"""
import pandas as pd
import sys
import json
from pathlib import Path
GAMEDATA_DIR = Path(__file__).parent.parent / "GameData"
def load_schema(data_name):
"""저장된 스키마 JSON 로드"""
schema_path = GAMEDATA_DIR / f".{data_name}_schema.json"
if not schema_path.exists():
return None
with open(schema_path, 'r', encoding='utf-8') as f:
return json.load(f)
def check_condition(row, condition):
"""
조건 확인
condition: {
'field': 'tower_type',
'op': '=',
'value': 'attack'
}
"""
if not condition:
return True # 조건 없으면 항상 참
field = condition['field']
op = condition['op']
expected = condition['value']
if field not in row or pd.isna(row[field]):
return False
actual = str(row[field])
if op == '=':
return actual == expected
elif op == '!=':
return actual != expected
elif op == '>':
try:
return float(row[field]) > float(expected)
except:
return False
elif op == '<':
try:
return float(row[field]) < float(expected)
except:
return False
elif op == '>=':
try:
return float(row[field]) >= float(expected)
except:
return False
elif op == '<=':
try:
return float(row[field]) <= float(expected)
except:
return False
return False
def validate_file(file_path, schema):
"""파일 검증 (CSV 버전)"""
try:
df = pd.read_csv(file_path)
errors = []
if len(df) == 0:
errors.append("데이터가 없습니다")
return errors
# 조건부 필드 검증
if schema:
for field in schema:
condition = field.get('condition')
if not condition:
continue # 공통 필드는 스킵
field_name = field['name']
# 각 행 검사
for idx, row in df.iterrows():
should_have_value = check_condition(row, condition)
has_value = not pd.isna(row.get(field_name))
if should_have_value and not has_value:
cond_desc = f"{condition['field']}{condition['op']}{condition['value']}"
errors.append(
f"{idx+2}: '{field_name}' 필드가 비어있습니다 "
f"(조건: {cond_desc})"
)
return errors
except Exception as e:
return [f"파일 읽기 오류: {e}"]
def main():
print("🔍 CSV 데이터 검증 중...\n")
all_valid = True
csv_files = list(GAMEDATA_DIR.glob("*.csv"))
if not csv_files:
print("⚠️ 검증할 CSV 파일이 없습니다")
return
for csv_path in csv_files:
data_name = csv_path.stem
# 숨김 파일 스킵
if data_name.startswith("."):
continue
print(f"📊 {data_name}.csv 검증...")
schema = load_schema(data_name)
errors = validate_file(csv_path, schema)
if errors:
print(f"❌ 실패:")
for err in errors:
print(f" - {err}")
all_valid = False
else:
row_count = len(pd.read_csv(csv_path))
print(f"✅ 통과 ({row_count}개 행)")
print()
if all_valid:
print("🎉 모든 데이터 검증 통과!")
else:
print("❌ 검증 실패한 파일이 있습니다")
sys.exit(0 if all_valid else 1)
if __name__ == "__main__":
main()