diff --git a/pycaption/dfxp/base.py b/pycaption/dfxp/base.py index bef05864..215b2200 100644 --- a/pycaption/dfxp/base.py +++ b/pycaption/dfxp/base.py @@ -65,6 +65,8 @@ } DFXP_DEFAULT_LANGUAGE_CODE = "en" +DFXP_DEFAULT_FRAMERATE = "30" +DFXP_DEFAULT_FRAMERATEMULTIPLIER = "1000 1000" class DFXPReader(BaseReader): @@ -81,6 +83,7 @@ def __init__(self, *args, **kw): self.read_invalid_positioning = ( kw.get('read_invalid_positioning', False)) self.nodes = [] + self.framerate = self._get_framerate(DFXP_DEFAULT_FRAMERATE, DFXP_DEFAULT_FRAMERATEMULTIPLIER) def detect(self, content): if '' in content.lower(): @@ -101,6 +104,11 @@ def read(self, content): default_language = dfxp_document.tt.attrs.get('xml:lang', DEFAULT_LANGUAGE_CODE) + framerate = dfxp_document.tt.attrs.get('ttp:framerate', + DFXP_DEFAULT_FRAMERATE) + framerate_multiplier = dfxp_document.tt.attrs.get('ttp:frameratemultiplier', + DFXP_DEFAULT_FRAMERATEMULTIPLIER) + self.framerate = self._get_framerate(framerate, framerate_multiplier) # Each div represents all the captions for a single language. for div in dfxp_document.find_all('div'): @@ -169,6 +177,13 @@ def _find_and_convert_times(self, p_tag): return start, end + @staticmethod + def _get_framerate(framerate, framerate_multiplier): + numerator, denominator = framerate_multiplier.split() + framerate_multiplier = int(numerator) / int(denominator) + framerate = float(framerate) * framerate_multiplier + return framerate + def _convert_timestamp_to_microseconds(self, stamp): match = TIME_EXPRESSION_PATTERN.search(stamp) if not match: @@ -180,8 +195,7 @@ def _convert_timestamp_to_microseconds(self, stamp): else: return self._convert_time_count_to_microseconds(match) - @staticmethod - def _convert_clock_time_to_microseconds(clock_time_match): + def _convert_clock_time_to_microseconds(self, clock_time_match): microseconds = int(clock_time_match.group('hours')) * \ MICROSECONDS_PER_UNIT["hours"] microseconds += int(clock_time_match.group('minutes')) * \ @@ -192,12 +206,11 @@ def _convert_clock_time_to_microseconds(clock_time_match): microseconds += int(clock_time_match.group('sub_frames').ljust( 3, '0')) * MICROSECONDS_PER_UNIT["milliseconds"] elif clock_time_match.group('frames'): - microseconds += int(clock_time_match.group('frames')) / 30 * \ + microseconds += int(clock_time_match.group('frames')) / self.framerate * \ MICROSECONDS_PER_UNIT["seconds"] return int(microseconds) - @staticmethod - def _convert_time_count_to_microseconds(time_count_match): + def _convert_time_count_to_microseconds(self, time_count_match): value = float(time_count_match.group('time_count')) metric = time_count_match.group("metric") if metric == "h": @@ -209,7 +222,7 @@ def _convert_time_count_to_microseconds(time_count_match): elif metric == "ms": microseconds = value * MICROSECONDS_PER_UNIT["milliseconds"] elif metric == "f": - microseconds = value / 30 * MICROSECONDS_PER_UNIT["seconds"] + microseconds = value / self.framerate * MICROSECONDS_PER_UNIT["seconds"] elif metric == "t": raise NotImplementedError("The tick metric for time count is " "not currently implemented.") diff --git a/tests/conftest.py b/tests/conftest.py index 55b785fb..5ec12f1c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,7 @@ sample_dfxp_invalid_positioning_value_template, sample_dfxp_multiple_captions_with_the_same_timing, sample_dfxp_with_ampersand_character, sample_dfxp_with_nested_spans, - dfxp_style_region_align_conflict, dfxp_with_concurrent_captions, + dfxp_style_region_align_conflict, dfxp_with_concurrent_captions, sample_dfxp_framerate ) from tests.fixtures.microdvd import ( # noqa: F401 sample_microdvd, sample_microdvd_2, diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py index c619ebff..6bced707 100644 --- a/tests/fixtures/dfxp.py +++ b/tests/fixtures/dfxp.py @@ -1508,4 +1508,49 @@ def sample_dfxp_default_styling_p_tags():

-""" \ No newline at end of file +""" + + +@pytest.fixture(scope="session") +def sample_dfxp_framerate(): + return """\ + + + + + SMPTE 2052 Timed Text Captions document created by MacCaption™ version 7.0.12 + + + +