110 lines
3.6 KiB
Python
110 lines
3.6 KiB
Python
"""清洗规则单元测试 — 对应 T-1.2 UT。"""
|
||
import os
|
||
import sys
|
||
import unittest
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||
|
||
from app.etl import clean as cl
|
||
|
||
|
||
class TestCleanCell(unittest.TestCase):
|
||
def test_strip_newlines_and_spaces(self):
|
||
self.assertEqual(cl.clean_cell("东\n\n\n风"), "东风")
|
||
self.assertEqual(cl.clean_cell(" HXD1 型 "), "HXD1 型")
|
||
|
||
def test_none(self):
|
||
self.assertEqual(cl.clean_cell(None), "")
|
||
|
||
def test_is_empty(self):
|
||
for v in ["", "-", "——", "—", "/", None, "无"]:
|
||
self.assertTrue(cl.is_empty(v), v)
|
||
self.assertFalse(cl.is_empty("现役"))
|
||
|
||
|
||
class TestParseValueUnit(unittest.TestCase):
|
||
def test_plain_number(self):
|
||
v, u, _ = cl.parse_value_unit("126.0", "t")
|
||
self.assertEqual(v, 126.0)
|
||
self.assertEqual(u, "t")
|
||
|
||
def test_with_unit_in_text(self):
|
||
v, u, _ = cl.parse_value_unit("400km/h(试验)")
|
||
self.assertEqual(v, 400.0)
|
||
self.assertEqual(u, "km/h")
|
||
|
||
def test_speed_with_default_unit(self):
|
||
v, u, _ = cl.parse_value_unit("160km/h", "km/h")
|
||
self.assertEqual(v, 160.0)
|
||
self.assertEqual(u, "km/h")
|
||
|
||
def test_axle_load_with_paren(self):
|
||
v, u, _ = cl.parse_value_unit("23(25)", "t")
|
||
self.assertEqual(v, 23.0)
|
||
self.assertEqual(u, "t")
|
||
|
||
def test_composite_takes_first_number(self):
|
||
v, u, _ = cl.parse_value_unit("2×92(100)", "t")
|
||
self.assertEqual(v, 2.0) # 取第一个数值,原文保真在 raw_json
|
||
|
||
def test_empty_markers(self):
|
||
for raw in ["——", "-", "", "/"]:
|
||
v, u, txt = cl.parse_value_unit(raw, "t")
|
||
self.assertIsNone(v)
|
||
self.assertEqual(u, "")
|
||
|
||
def test_no_number(self):
|
||
v, u, txt = cl.parse_value_unit("交-直-交传动", "")
|
||
self.assertIsNone(v)
|
||
self.assertEqual(txt, "交-直-交传动")
|
||
|
||
|
||
class TestParseYear(unittest.TestCase):
|
||
def test_year_with_char(self):
|
||
self.assertEqual(cl.parse_year("1971 年"), 1971)
|
||
|
||
def test_year_datetime(self):
|
||
self.assertEqual(cl.parse_year("2007-12-22 00:00:00"), 2007)
|
||
|
||
def test_year_plain(self):
|
||
self.assertEqual(cl.parse_year("2006"), 2006)
|
||
|
||
def test_year_dashes(self):
|
||
self.assertIsNone(cl.parse_year("——"))
|
||
self.assertIsNone(cl.parse_year(""))
|
||
|
||
|
||
class TestNormalizeStatus(unittest.TestCase):
|
||
def test_mapping(self):
|
||
self.assertEqual(cl.normalize_status("半封存"), "半封存")
|
||
self.assertEqual(cl.normalize_status("封存"), "封存")
|
||
self.assertEqual(cl.normalize_status("已淘汰"), "退役")
|
||
self.assertEqual(cl.normalize_status("样车"), "试验")
|
||
self.assertEqual(cl.normalize_status(""), "未知")
|
||
|
||
|
||
class TestInferCountryType(unittest.TestCase):
|
||
def test_default_domestic(self):
|
||
self.assertEqual(cl.infer_country_type("大连机车车辆厂"), "国产")
|
||
|
||
def test_import(self):
|
||
self.assertEqual(cl.infer_country_type("苏联引进"), "引进仿制")
|
||
self.assertEqual(cl.infer_country_type("日本制造"), "进口")
|
||
|
||
def test_joint(self):
|
||
self.assertEqual(cl.infer_country_type("中外合资生产"), "中外合资")
|
||
|
||
|
||
class TestForwardFill(unittest.TestCase):
|
||
def test_fill(self):
|
||
self.assertEqual(
|
||
cl.forward_fill(["东风", "", "", "韶山", ""]),
|
||
["东风", "东风", "东风", "韶山", "韶山"])
|
||
|
||
def test_leading_empty(self):
|
||
self.assertEqual(cl.forward_fill(["", "A"]), ["", "A"])
|
||
|
||
|
||
if __name__ == "__main__":
|
||
unittest.main()
|