"""清洗规则单元测试 — 对应 T-1.2 UT。""" import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from app.etl import clean as cl class TestCleanCell(unittest.TestCase): def test_strip_newlines_and_spaces(self): self.assertEqual(cl.clean_cell("东\n\n\n风"), "东风") self.assertEqual(cl.clean_cell(" HXD1 型 "), "HXD1 型") def test_none(self): self.assertEqual(cl.clean_cell(None), "") def test_is_empty(self): for v in ["", "-", "——", "—", "/", None, "无"]: self.assertTrue(cl.is_empty(v), v) self.assertFalse(cl.is_empty("现役")) class TestParseValueUnit(unittest.TestCase): def test_plain_number(self): v, u, _ = cl.parse_value_unit("126.0", "t") self.assertEqual(v, 126.0) self.assertEqual(u, "t") def test_with_unit_in_text(self): v, u, _ = cl.parse_value_unit("400km/h(试验)") self.assertEqual(v, 400.0) self.assertEqual(u, "km/h") def test_speed_with_default_unit(self): v, u, _ = cl.parse_value_unit("160km/h", "km/h") self.assertEqual(v, 160.0) self.assertEqual(u, "km/h") def test_axle_load_with_paren(self): v, u, _ = cl.parse_value_unit("23(25)", "t") self.assertEqual(v, 23.0) self.assertEqual(u, "t") def test_composite_takes_first_number(self): v, u, _ = cl.parse_value_unit("2×92(100)", "t") self.assertEqual(v, 2.0) # 取第一个数值,原文保真在 raw_json def test_empty_markers(self): for raw in ["——", "-", "", "/"]: v, u, txt = cl.parse_value_unit(raw, "t") self.assertIsNone(v) self.assertEqual(u, "") def test_no_number(self): v, u, txt = cl.parse_value_unit("交-直-交传动", "") self.assertIsNone(v) self.assertEqual(txt, "交-直-交传动") class TestParseYear(unittest.TestCase): def test_year_with_char(self): self.assertEqual(cl.parse_year("1971 年"), 1971) def test_year_datetime(self): self.assertEqual(cl.parse_year("2007-12-22 00:00:00"), 2007) def test_year_plain(self): self.assertEqual(cl.parse_year("2006"), 2006) def test_year_dashes(self): self.assertIsNone(cl.parse_year("——")) self.assertIsNone(cl.parse_year("")) class TestNormalizeStatus(unittest.TestCase): def test_mapping(self): self.assertEqual(cl.normalize_status("半封存"), "半封存") self.assertEqual(cl.normalize_status("封存"), "封存") self.assertEqual(cl.normalize_status("已淘汰"), "退役") self.assertEqual(cl.normalize_status("样车"), "试验") self.assertEqual(cl.normalize_status(""), "未知") class TestInferCountryType(unittest.TestCase): def test_default_domestic(self): self.assertEqual(cl.infer_country_type("大连机车车辆厂"), "国产") def test_import(self): self.assertEqual(cl.infer_country_type("苏联引进"), "引进仿制") self.assertEqual(cl.infer_country_type("日本制造"), "进口") def test_joint(self): self.assertEqual(cl.infer_country_type("中外合资生产"), "中外合资") class TestForwardFill(unittest.TestCase): def test_fill(self): self.assertEqual( cl.forward_fill(["东风", "", "", "韶山", ""]), ["东风", "东风", "东风", "韶山", "韶山"]) def test_leading_empty(self): self.assertEqual(cl.forward_fill(["", "A"]), ["", "A"]) if __name__ == "__main__": unittest.main()