Files
Train/app/tests/test_clean.py
T
2026-06-16 00:55:20 +08:00

110 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""清洗规则单元测试 — 对应 T-1.2 UT。"""
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.etl import clean as cl
class TestCleanCell(unittest.TestCase):
def test_strip_newlines_and_spaces(self):
self.assertEqual(cl.clean_cell("\n\n\n"), "东风")
self.assertEqual(cl.clean_cell(" HXD1 型 "), "HXD1 型")
def test_none(self):
self.assertEqual(cl.clean_cell(None), "")
def test_is_empty(self):
for v in ["", "-", "——", "", "/", None, ""]:
self.assertTrue(cl.is_empty(v), v)
self.assertFalse(cl.is_empty("现役"))
class TestParseValueUnit(unittest.TestCase):
def test_plain_number(self):
v, u, _ = cl.parse_value_unit("126.0", "t")
self.assertEqual(v, 126.0)
self.assertEqual(u, "t")
def test_with_unit_in_text(self):
v, u, _ = cl.parse_value_unit("400km/h(试验)")
self.assertEqual(v, 400.0)
self.assertEqual(u, "km/h")
def test_speed_with_default_unit(self):
v, u, _ = cl.parse_value_unit("160km/h", "km/h")
self.assertEqual(v, 160.0)
self.assertEqual(u, "km/h")
def test_axle_load_with_paren(self):
v, u, _ = cl.parse_value_unit("23(25)", "t")
self.assertEqual(v, 23.0)
self.assertEqual(u, "t")
def test_composite_takes_first_number(self):
v, u, _ = cl.parse_value_unit("2×92(100)", "t")
self.assertEqual(v, 2.0) # 取第一个数值,原文保真在 raw_json
def test_empty_markers(self):
for raw in ["——", "-", "", "/"]:
v, u, txt = cl.parse_value_unit(raw, "t")
self.assertIsNone(v)
self.assertEqual(u, "")
def test_no_number(self):
v, u, txt = cl.parse_value_unit("交-直-交传动", "")
self.assertIsNone(v)
self.assertEqual(txt, "交-直-交传动")
class TestParseYear(unittest.TestCase):
def test_year_with_char(self):
self.assertEqual(cl.parse_year("1971 年"), 1971)
def test_year_datetime(self):
self.assertEqual(cl.parse_year("2007-12-22 00:00:00"), 2007)
def test_year_plain(self):
self.assertEqual(cl.parse_year("2006"), 2006)
def test_year_dashes(self):
self.assertIsNone(cl.parse_year("——"))
self.assertIsNone(cl.parse_year(""))
class TestNormalizeStatus(unittest.TestCase):
def test_mapping(self):
self.assertEqual(cl.normalize_status("半封存"), "半封存")
self.assertEqual(cl.normalize_status("封存"), "封存")
self.assertEqual(cl.normalize_status("已淘汰"), "退役")
self.assertEqual(cl.normalize_status("样车"), "试验")
self.assertEqual(cl.normalize_status(""), "未知")
class TestInferCountryType(unittest.TestCase):
def test_default_domestic(self):
self.assertEqual(cl.infer_country_type("大连机车车辆厂"), "国产")
def test_import(self):
self.assertEqual(cl.infer_country_type("苏联引进"), "引进仿制")
self.assertEqual(cl.infer_country_type("日本制造"), "进口")
def test_joint(self):
self.assertEqual(cl.infer_country_type("中外合资生产"), "中外合资")
class TestForwardFill(unittest.TestCase):
def test_fill(self):
self.assertEqual(
cl.forward_fill(["东风", "", "", "韶山", ""]),
["东风", "东风", "东风", "韶山", "韶山"])
def test_leading_empty(self):
self.assertEqual(cl.forward_fill(["", "A"]), ["", "A"])
if __name__ == "__main__":
unittest.main()