From 8350377be172de9da693383bd9de67d6ffeb81c5 Mon Sep 17 00:00:00 2001 From: Caglar Pir Date: Thu, 5 Feb 2026 03:44:51 -0800 Subject: [PATCH] Sanitize camera and lend serial numbers Summary: Remove all non aphanumeric characters from the serial numbers --- mapillary_tools/exif_read.py | 18 ++++----- mapillary_tools/exiftool_read.py | 9 ++--- mapillary_tools/exiftool_read_video.py | 9 ++--- mapillary_tools/utils.py | 11 ++++++ tests/unit/test_exifread.py | 51 ++++++++++++++++---------- tests/unit/test_utils.py | 43 ++++++++++++++++++++++ 6 files changed, 101 insertions(+), 40 deletions(-) diff --git a/mapillary_tools/exif_read.py b/mapillary_tools/exif_read.py index d7b8b142..94266570 100644 --- a/mapillary_tools/exif_read.py +++ b/mapillary_tools/exif_read.py @@ -19,6 +19,8 @@ import exifread from exifread.utils import Ratio +from .utils import sanitize_serial + LOG = logging.getLogger(__name__) XMP_NAMESPACES = { @@ -512,11 +514,9 @@ def extract_camera_uuid(self) -> str | None: str, ) - parts = [] - if body_serial: - parts.append(body_serial.strip()) - if lens_serial: - parts.append(lens_serial.strip()) + parts = [ + s for s in [sanitize_serial(body_serial), sanitize_serial(lens_serial)] if s + ] if parts: return "_".join(parts) @@ -880,11 +880,9 @@ def extract_camera_uuid(self) -> str | None: str, ) - parts = [] - if body_serial: - parts.append(body_serial.strip()) - if lens_serial: - parts.append(lens_serial.strip()) + parts = [ + s for s in [sanitize_serial(body_serial), sanitize_serial(lens_serial)] if s + ] if parts: return "_".join(parts) diff --git a/mapillary_tools/exiftool_read.py b/mapillary_tools/exiftool_read.py index bbf7c6cc..0f645fd5 100644 --- a/mapillary_tools/exiftool_read.py +++ b/mapillary_tools/exiftool_read.py @@ -12,6 +12,7 @@ from pathlib import Path from . import exif_read +from .utils import sanitize_serial EXIFTOOL_NAMESPACES: dict[str, str] = { @@ -501,11 +502,9 @@ def extract_camera_uuid(self) -> str | None: str, ) - parts = [] - if body_serial: - parts.append(body_serial.strip()) - if lens_serial: - parts.append(lens_serial.strip()) + parts = [ + s for s in [sanitize_serial(body_serial), sanitize_serial(lens_serial)] if s + ] if parts: return "_".join(parts) diff --git a/mapillary_tools/exiftool_read_video.py b/mapillary_tools/exiftool_read_video.py index b1bff9d5..9eef37b3 100644 --- a/mapillary_tools/exiftool_read_video.py +++ b/mapillary_tools/exiftool_read_video.py @@ -13,6 +13,7 @@ from . import exif_read, exiftool_read, geo from .telemetry import GPSFix, GPSPoint +from .utils import sanitize_serial MAX_TRACK_ID = 10 @@ -450,11 +451,9 @@ def extract_camera_uuid(self) -> str | None: str, ) - parts = [] - if body_serial: - parts.append(body_serial.strip()) - if lens_serial: - parts.append(lens_serial.strip()) + parts = [ + s for s in [sanitize_serial(body_serial), sanitize_serial(lens_serial)] if s + ] if parts: return "_".join(parts) diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index c2570570..3bb0c6f2 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -59,6 +59,17 @@ def is_video_file(path: Path) -> bool: ) +def sanitize_serial(s: str | None) -> str | None: + """ + Sanitize a serial number by removing all non-alphanumeric characters. + Returns None if the input is None or if the result is empty after cleaning. + """ + if s is None: + return None + cleaned = "".join(c for c in s if c.isalnum()) + return cleaned if cleaned else None + + def iterate_files( root: Path, recursive: bool = False, follow_hidden_dirs: bool = False ) -> T.Generator[Path, None, None]: diff --git a/tests/unit/test_exifread.py b/tests/unit/test_exifread.py index 4276ab95..8e7d51c4 100644 --- a/tests/unit/test_exifread.py +++ b/tests/unit/test_exifread.py @@ -359,6 +359,17 @@ def test_whitespace_stripped(self): } assert reader.extract_camera_uuid() == "BODY123_LENS456" + def test_special_characters_removed(self): + """Test that special characters are removed from serial numbers""" + from mapillary_tools.exif_read import ExifReadFromEXIF + + reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) + reader.tags = { + "EXIF BodySerialNumber": MockExifTag("BODY-123:456"), + "EXIF LensSerialNumber": MockExifTag("LENS/789.ABC"), + } + assert reader.extract_camera_uuid() == "BODY123456_LENS789ABC" + class TestExtractCameraUuidFromXMP: """Test extract_camera_uuid from XMP tags""" @@ -395,23 +406,23 @@ def _create_xmp_reader(self, tags_dict: dict): def test_xmp_body_serial_only(self): """Test XMP with only body serial number""" - reader = self._create_xmp_reader({"exifEX:BodySerialNumber": "XMP_BODY123"}) - assert reader.extract_camera_uuid() == "XMP_BODY123" + reader = self._create_xmp_reader({"exifEX:BodySerialNumber": "XMPBODY123"}) + assert reader.extract_camera_uuid() == "XMPBODY123" def test_xmp_lens_serial_only(self): """Test XMP with only lens serial number""" - reader = self._create_xmp_reader({"exifEX:LensSerialNumber": "XMP_LENS456"}) - assert reader.extract_camera_uuid() == "XMP_LENS456" + reader = self._create_xmp_reader({"exifEX:LensSerialNumber": "XMPLENS456"}) + assert reader.extract_camera_uuid() == "XMPLENS456" def test_xmp_both_serials(self): """Test XMP with both body and lens serial numbers""" reader = self._create_xmp_reader( { - "exifEX:BodySerialNumber": "XMP_BODY", - "exifEX:LensSerialNumber": "XMP_LENS", + "exifEX:BodySerialNumber": "XMPBODY", + "exifEX:LensSerialNumber": "XMPLENS", } ) - assert reader.extract_camera_uuid() == "XMP_BODY_XMP_LENS" + assert reader.extract_camera_uuid() == "XMPBODY_XMPLENS" def test_xmp_no_serials(self): """Test XMP with no serial numbers""" @@ -420,13 +431,13 @@ def test_xmp_no_serials(self): def test_xmp_aux_serial_number(self): """Test XMP with aux:SerialNumber (Adobe auxiliary namespace)""" - reader = self._create_xmp_reader({"aux:SerialNumber": "AUX_SERIAL123"}) - assert reader.extract_camera_uuid() == "AUX_SERIAL123" + reader = self._create_xmp_reader({"aux:SerialNumber": "AUXSERIAL123"}) + assert reader.extract_camera_uuid() == "AUXSERIAL123" def test_xmp_aux_lens_serial_number(self): """Test XMP with aux:LensSerialNumber""" - reader = self._create_xmp_reader({"aux:LensSerialNumber": "AUX_LENS456"}) - assert reader.extract_camera_uuid() == "AUX_LENS456" + reader = self._create_xmp_reader({"aux:LensSerialNumber": "AUXLENS456"}) + assert reader.extract_camera_uuid() == "AUXLENS456" class TestExtractCameraUuidIntegration: @@ -565,8 +576,8 @@ def test_generic_serial_fallback(self): def test_ifd0_serial_fallback(self): """Test that IFD0:SerialNumber is used as fallback""" - reader = self._create_exiftool_reader({"IFD0:SerialNumber": "IFD0_SN_123"}) - assert reader.extract_camera_uuid() == "IFD0_SN_123" + reader = self._create_exiftool_reader({"IFD0:SerialNumber": "IFD0SN123"}) + assert reader.extract_camera_uuid() == "IFD0SN123" def test_body_serial_priority_over_generic(self): """Test that BodySerialNumber takes priority over generic SerialNumber""" @@ -587,25 +598,25 @@ def test_xmp_exifex_body_serial(self): def test_xmp_aux_serial(self): """Test XMP-aux:SerialNumber extraction""" - reader = self._create_exiftool_reader({"XMP-aux:SerialNumber": "AUX_SN_456"}) - assert reader.extract_camera_uuid() == "AUX_SN_456" + reader = self._create_exiftool_reader({"XMP-aux:SerialNumber": "AUXSN456"}) + assert reader.extract_camera_uuid() == "AUXSN456" def test_xmp_aux_lens_serial(self): """Test XMP-aux:LensSerialNumber extraction""" reader = self._create_exiftool_reader( - {"XMP-aux:LensSerialNumber": "AUX_LENS_789"} + {"XMP-aux:LensSerialNumber": "AUXLENS789"} ) - assert reader.extract_camera_uuid() == "AUX_LENS_789" + assert reader.extract_camera_uuid() == "AUXLENS789" def test_xmp_combined(self): """Test XMP body and lens serial combined""" reader = self._create_exiftool_reader( { - "XMP-exifEX:BodySerialNumber": "XMP_BODY", - "XMP-exifEX:LensSerialNumber": "XMP_LENS", + "XMP-exifEX:BodySerialNumber": "XMPBODY", + "XMP-exifEX:LensSerialNumber": "XMPLENS", } ) - assert reader.extract_camera_uuid() == "XMP_BODY_XMP_LENS" + assert reader.extract_camera_uuid() == "XMPBODY_XMPLENS" def test_whitespace_stripped(self): """Test that whitespace is stripped from serial numbers""" diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index cefb12a5..15b54e81 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -124,3 +124,46 @@ def test_filter_all(tmpdir: py.path.local): or {"foo/world.mp4", "foo/world.MP4", "foo/world.ts"} == actual or {"foo/world.mp4", "foo/world.ts"} == actual ) + + +class TestSanitizeSerial: + """Tests for sanitize_serial function""" + + def test_none_input(self): + """Test that None input returns None""" + assert utils.sanitize_serial(None) is None + + def test_alphanumeric_unchanged(self): + """Test that alphanumeric strings are unchanged""" + assert utils.sanitize_serial("ABC123") == "ABC123" + assert utils.sanitize_serial("abc123xyz") == "abc123xyz" + + def test_removes_whitespace(self): + """Test that whitespace is removed""" + assert utils.sanitize_serial(" ABC123 ") == "ABC123" + assert utils.sanitize_serial("ABC 123") == "ABC123" + assert utils.sanitize_serial(" A B C ") == "ABC" + + def test_removes_special_characters(self): + """Test that special characters are removed""" + assert utils.sanitize_serial("ABC-123") == "ABC123" + assert utils.sanitize_serial("ABC_123") == "ABC123" + assert utils.sanitize_serial("ABC/123") == "ABC123" + assert utils.sanitize_serial("ABC:123") == "ABC123" + assert utils.sanitize_serial("ABC.123") == "ABC123" + + def test_removes_mixed_special_chars(self): + """Test removal of various special characters""" + assert utils.sanitize_serial("SN:ABC-123/XYZ") == "SNABC123XYZ" + assert utils.sanitize_serial("(ABC)[123]{XYZ}") == "ABC123XYZ" + + def test_empty_after_sanitize_returns_none(self): + """Test that empty result after sanitization returns None""" + assert utils.sanitize_serial("") is None + assert utils.sanitize_serial(" ") is None + assert utils.sanitize_serial("---") is None + assert utils.sanitize_serial("!@#$%^&*()") is None + + def test_preserves_case(self): + """Test that case is preserved""" + assert utils.sanitize_serial("AbC123xYz") == "AbC123xYz"