From ed8b666608707c821716cef9c70b5ddaf57ce6af Mon Sep 17 00:00:00 2001 From: Jonas Depoix Date: Mon, 21 Jul 2025 12:07:16 +0200 Subject: [PATCH 1/2] removed deprecated methods: get_transcript, get_transcripts, list_transcripts --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- youtube_transcript_api/_api.py | 169 +---------- youtube_transcript_api/_errors.py | 4 +- youtube_transcript_api/test/test_api.py | 368 +----------------------- 4 files changed, 6 insertions(+), 537 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 56cf9fc..8f9bf84 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -15,7 +15,7 @@ Steps to reproduce the behavior: ### What code / cli command are you executing? For example: I am running ``` -YouTubeTranscriptApi.get_transcript ... +YouTubeTranscriptApi().fetch() ... ``` ### Which Python version are you using? diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py index 55cfb95..b294493 100644 --- a/youtube_transcript_api/_api.py +++ b/youtube_transcript_api/_api.py @@ -1,11 +1,10 @@ -import warnings from typing import Optional, Iterable from requests import Session from requests.adapters import HTTPAdapter from urllib3 import Retry -from .proxies import ProxyConfig, GenericProxyConfig +from .proxies import ProxyConfig from ._transcripts import TranscriptListFetcher, FetchedTranscript, TranscriptList @@ -126,169 +125,3 @@ def list( Make sure that this is the actual ID, NOT the full URL to the video! """ return self._fetcher.fetch(video_id) - - @classmethod - def list_transcripts(cls, video_id, proxies=None): - """ - DEPRECATED: use the `list` method instead! - - Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object - which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating - over the `TranscriptList` the individual transcripts are represented by `Transcript` objects, which provide - metadata and can either be fetched by calling `transcript.fetch()` or translated by calling - `transcript.translate('en')`. Example: - - # retrieve the available transcripts - transcript_list = YouTubeTranscriptApi.list_transcripts('video_id') - - # iterate over all available transcripts - for transcript in transcript_list: - # the Transcript object provides metadata properties - print( - transcript.video_id, - transcript.language, - transcript.language_code, - # whether it has been manually created or generated by YouTube - transcript.is_generated, - # a list of languages the transcript can be translated to - transcript.translation_languages, - ) - - # fetch the actual transcript data - print(transcript.fetch()) - - # translating the transcript will return another transcript object - print(transcript.translate('en').fetch()) - - # you can also directly filter for the language you are looking for, using the transcript list - transcript = transcript_list.find_transcript(['de', 'en']) - - # or just filter for manually created transcripts - transcript = transcript_list.find_manually_created_transcript(['de', 'en']) - - # or automatically generated ones - transcript = transcript_list.find_generated_transcript(['de', 'en']) - - :param video_id: the youtube video id - :type video_id: str - :param proxies: a dictionary mapping of http and https proxies to be used for the network requests - :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies - :return: the list of available transcripts - :rtype TranscriptList: - """ - warnings.warn( - "`list_transcripts` is deprecated and will be removed in a future version. " - "Use the `list` method instead!", - DeprecationWarning, - ) - - proxy_config = None - if proxies: - if isinstance(proxies, ProxyConfig): - proxy_config = proxies - else: - proxy_config = GenericProxyConfig( - http_url=proxies.get("http"), https_url=proxies.get("https") - ) - - ytt_api = YouTubeTranscriptApi( - proxy_config=proxy_config, - ) - return ytt_api.list(video_id) - - @classmethod - def get_transcripts( - cls, - video_ids, - languages=("en",), - continue_after_error=False, - proxies=None, - preserve_formatting=False, - ): - """ - DEPRECATED: use the `fetch` method instead! - - Retrieves the transcripts for a list of videos. - - :param video_ids: a list of youtube video ids - :type video_ids: list[str] - :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] - it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to - do so. - :type languages: list[str] - :param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving - one of the video transcripts - :type continue_after_error: bool - :param proxies: a dictionary mapping of http and https proxies to be used for the network requests - :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies - :param preserve_formatting: whether to keep select HTML text formatting - :type preserve_formatting: bool - :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of - video ids, which could not be retrieved - :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): - """ - warnings.warn( - "`get_transcripts` is deprecated and will be removed in a future version. " - "Use the `fetch` method instead!", - DeprecationWarning, - ) - - assert isinstance(video_ids, list), "`video_ids` must be a list of strings" - - data = {} - unretrievable_videos = [] - - for video_id in video_ids: - try: - data[video_id] = cls.get_transcript( - video_id, languages, proxies, preserve_formatting - ) - except Exception as exception: - if not continue_after_error: - raise exception - - unretrievable_videos.append(video_id) - - return data, unretrievable_videos - - @classmethod - def get_transcript( - cls, - video_id, - languages=("en",), - proxies=None, - preserve_formatting=False, - ): - """ - DEPRECATED: use the `fetch` method instead! - - Retrieves the transcript for a single video. This is just a shortcut for calling:: - - YouTubeTranscriptApi.list_transcripts(video_id, proxies).find_transcript(languages).fetch() - - :param video_id: the youtube video id - :type video_id: str - :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] - it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to - do so. - :type languages: list[str] - :param proxies: a dictionary mapping of http and https proxies to be used for the network requests - :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies - :param preserve_formatting: whether to keep select HTML text formatting - :type preserve_formatting: bool - :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys - :rtype [{'text': str, 'start': float, 'end': float}]: - """ - warnings.warn( - "`get_transcript` is deprecated and will be removed in a future version. " - "Use the `fetch` method instead!", - DeprecationWarning, - ) - - assert isinstance(video_id, str), "`video_id` must be a string" - return ( - cls.list_transcripts(video_id, proxies) - .find_transcript(languages) - .fetch(preserve_formatting=preserve_formatting) - .to_raw_data() - ) diff --git a/youtube_transcript_api/_errors.py b/youtube_transcript_api/_errors.py index b4431d0..835df4b 100644 --- a/youtube_transcript_api/_errors.py +++ b/youtube_transcript_api/_errors.py @@ -123,8 +123,8 @@ class VideoUnavailable(CouldNotRetrieveTranscript): class InvalidVideoId(CouldNotRetrieveTranscript): CAUSE_MESSAGE = ( "You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n" - 'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n' - 'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`' + 'Do NOT run: `YouTubeTranscriptApi().fetch("https://www.youtube.com/watch?v=1234")`\n' + 'Instead run: `YouTubeTranscriptApi().fetch("1234")`' ) diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py index 2ad548a..4694505 100644 --- a/youtube_transcript_api/test/test_api.py +++ b/youtube_transcript_api/test/test_api.py @@ -260,7 +260,7 @@ def test_fetch__exception_if_youtube_request_fails(self): self.assertIn("Request to YouTube failed: ", str(cm.exception)) - def test_get_transcript__exception_if_youtube_request_limit_reached( + def test_fetch__exception_if_youtube_request_limit_reached( self, ): httpretty.register_uri( @@ -272,7 +272,7 @@ def test_get_transcript__exception_if_youtube_request_limit_reached( with self.assertRaises(IpBlocked): YouTubeTranscriptApi().fetch("abc") - def test_get_transcript__exception_if_timedtext_request_limit_reached( + def test_fetch__exception_if_timedtext_request_limit_reached( self, ): httpretty.register_uri( @@ -493,367 +493,3 @@ def test_load_cookies__no_valid_cookies(self): cookie_path = get_asset_path("expired_example_cookies.txt") with self.assertRaises(CookieInvalid): YouTubeTranscriptApi(cookie_path=cookie_path) - - ### DEPRECATED METHODS ### - - def test_get_transcript__deprecated(self): - transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8") - - self.assertEqual( - transcript, - [ - {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54}, - { - "text": "this is not the original transcript", - "start": 1.54, - "duration": 4.16, - }, - { - "text": "just something shorter, I made up for testing", - "start": 5.7, - "duration": 3.239, - }, - ], - ) - - def test_get_transcript_formatted__deprecated(self): - transcript = YouTubeTranscriptApi.get_transcript( - "GJLlxj_dtq8", preserve_formatting=True - ) - - self.assertEqual( - transcript, - [ - {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54}, - { - "text": "this is not the original transcript", - "start": 1.54, - "duration": 4.16, - }, - { - "text": "just something shorter, I made up for testing", - "start": 5.7, - "duration": 3.239, - }, - ], - ) - - def test_list_transcripts__deprecated(self): - transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8") - - language_codes = {transcript.language_code for transcript in transcript_list} - - self.assertEqual( - language_codes, {"zh", "de", "en", "hi", "ja", "ko", "es", "cs", "en"} - ) - - def test_list_transcripts__find_manually_created__deprecated(self): - transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8") - transcript = transcript_list.find_manually_created_transcript(["cs"]) - - self.assertFalse(transcript.is_generated) - - def test_list_transcripts__find_generated__deprecated(self): - transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8") - - with self.assertRaises(NoTranscriptFound): - transcript_list.find_generated_transcript(["cs"]) - - transcript = transcript_list.find_generated_transcript(["en"]) - - self.assertTrue(transcript.is_generated) - - def test_list_transcripts__url_as_video_id__deprecated(self): - httpretty.register_uri( - httpretty.POST, - "https://www.youtube.com/youtubei/v1/player", - body=load_asset("youtube_video_unavailable.innertube.json.static"), - ) - - with self.assertRaises(InvalidVideoId): - YouTubeTranscriptApi.list_transcripts( - "https://www.youtube.com/youtubei/v1/player?v=GJLlxj_dtq8" - ) - - def test_translate_transcript__deprecated(self): - transcript = YouTubeTranscriptApi.list_transcripts( - "GJLlxj_dtq8" - ).find_transcript(["en"]) - - translated_transcript = transcript.translate("ar") - - self.assertEqual(translated_transcript.language_code, "ar") - self.assertIn("&tlang=ar", translated_transcript._url) - - def test_translate_transcript__translation_language_not_available__deprecated(self): - transcript = YouTubeTranscriptApi.list_transcripts( - "GJLlxj_dtq8" - ).find_transcript(["en"]) - - with self.assertRaises(TranslationLanguageNotAvailable): - transcript.translate("xyz") - - def test_translate_transcript__not_translatable__deprecated(self): - transcript = YouTubeTranscriptApi.list_transcripts( - "GJLlxj_dtq8" - ).find_transcript(["en"]) - transcript.translation_languages = [] - - with self.assertRaises(NotTranslatable): - transcript.translate("af") - - def test_get_transcript__correct_language_is_used__deprecated(self): - YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", ["de", "en"]) - query_string = httpretty.last_request().querystring - - self.assertIn("lang", query_string) - self.assertEqual(len(query_string["lang"]), 1) - self.assertEqual(query_string["lang"][0], "de") - - def test_get_transcript__fallback_language_is_used__deprecated(self): - httpretty.register_uri( - httpretty.POST, - "https://www.youtube.com/youtubei/v1/player", - body=load_asset("youtube_ww1_nl_en.innertube.json.static"), - ) - - YouTubeTranscriptApi.get_transcript("F1xioXWb8CY", ["de", "en"]) - query_string = httpretty.last_request().querystring - - self.assertIn("lang", query_string) - self.assertEqual(len(query_string["lang"]), 1) - self.assertEqual(query_string["lang"][0], "en") - - def test_get_transcript__create_consent_cookie_if_needed__deprecated(self): - httpretty.register_uri( - httpretty.GET, - "https://www.youtube.com/watch", - body=load_asset("youtube_consent_page.html.static"), - ) - - YouTubeTranscriptApi.get_transcript("F1xioXWb8CY") - self.assertEqual(len(httpretty.latest_requests()), 4) - for request in httpretty.latest_requests()[1:]: - self.assertEqual( - request.headers["cookie"], "CONSENT=YES+cb.20210328-17-p0.de+FX+119" - ) - - def test_get_transcript__exception_if_create_consent_cookie_failed__deprecated( - self, - ): - for _ in range(2): - httpretty.register_uri( - httpretty.GET, - "https://www.youtube.com/watch", - body=load_asset("youtube_consent_page.html.static"), - ) - - with self.assertRaises(FailedToCreateConsentCookie): - YouTubeTranscriptApi.get_transcript("F1xioXWb8CY") - - def test_get_transcript__exception_if_consent_cookie_age_invalid__deprecated(self): - httpretty.register_uri( - httpretty.GET, - "https://www.youtube.com/watch", - body=load_asset("youtube_consent_page_invalid.html.static"), - ) - - with self.assertRaises(FailedToCreateConsentCookie): - YouTubeTranscriptApi.get_transcript("F1xioXWb8CY") - - def test_get_transcript__exception_if_video_unavailable__deprecated(self): - httpretty.register_uri( - httpretty.POST, - "https://www.youtube.com/youtubei/v1/player", - body=load_asset("youtube_video_unavailable.innertube.json.static"), - ) - - with self.assertRaises(VideoUnavailable): - YouTubeTranscriptApi.get_transcript("abc") - - def test_get_transcript__exception_if_youtube_request_fails__deprecated(self): - httpretty.register_uri( - httpretty.POST, "https://www.youtube.com/youtubei/v1/player", status=500 - ) - - with self.assertRaises(YouTubeRequestFailed): - YouTubeTranscriptApi.get_transcript("abc") - - def test_get_transcript__exception_if_youtube_request_limit_reached__deprecated( - self, - ): - httpretty.register_uri( - httpretty.GET, - "https://www.youtube.com/watch", - body=load_asset("youtube_too_many_requests.html.static"), - ) - - with self.assertRaises(IpBlocked): - YouTubeTranscriptApi.get_transcript("abc") - - def test_get_transcript__exception_if_transcripts_disabled__deprecated(self): - httpretty.register_uri( - httpretty.POST, - "https://www.youtube.com/youtubei/v1/player", - body=load_asset("youtube_transcripts_disabled.innertube.json.static"), - ) - - with self.assertRaises(TranscriptsDisabled): - YouTubeTranscriptApi.get_transcript("dsMFmonKDD4") - - httpretty.register_uri( - httpretty.POST, - "https://www.youtube.com/youtubei/v1/player", - body=load_asset("youtube_transcripts_disabled2.innertube.json.static"), - ) - with self.assertRaises(TranscriptsDisabled): - YouTubeTranscriptApi.get_transcript("Fjg5lYqvzUs") - - def test_get_transcript__exception_if_language_unavailable__deprecated(self): - with self.assertRaises(NoTranscriptFound): - YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", languages=["cz"]) - - @patch("youtube_transcript_api.proxies.GenericProxyConfig.to_requests_dict") - def test_get_transcript__with_proxy__deprecated(self, to_requests_dict): - proxies = { - "http": "http://localhost:8080", - "https": "http://localhost:8080", - } - transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", proxies=proxies) - self.assertEqual( - transcript, - [ - {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54}, - { - "text": "this is not the original transcript", - "start": 1.54, - "duration": 4.16, - }, - { - "text": "just something shorter, I made up for testing", - "start": 5.7, - "duration": 3.239, - }, - ], - ) - to_requests_dict.assert_any_call() - - @patch("youtube_transcript_api.proxies.GenericProxyConfig.to_requests_dict") - def test_get_transcript__with_proxy_config__deprecated(self, to_requests_dict): - proxy_config = GenericProxyConfig( - http_url="http://localhost:8080", - https_url="http://localhost:8080", - ) - transcript = YouTubeTranscriptApi.get_transcript( - "GJLlxj_dtq8", proxies=proxy_config - ) - self.assertEqual( - transcript, - [ - {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54}, - { - "text": "this is not the original transcript", - "start": 1.54, - "duration": 4.16, - }, - { - "text": "just something shorter, I made up for testing", - "start": 5.7, - "duration": 3.239, - }, - ], - ) - to_requests_dict.assert_any_call() - - @pytest.mark.skip( - reason="This test is temporarily disabled because cookie auth is currently not " - "working due to YouTube changes." - ) - def test_get_transcript__with_cookies__deprecated(self): - cookies_path = get_asset_path("example_cookies.txt") - transcript = YouTubeTranscriptApi.get_transcript( - "GJLlxj_dtq8", cookies=str(cookies_path) - ) - - self.assertEqual( - transcript, - [ - {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54}, - { - "text": "this is not the original transcript", - "start": 1.54, - "duration": 4.16, - }, - { - "text": "just something shorter, I made up for testing", - "start": 5.7, - "duration": 3.239, - }, - ], - ) - - def test_get_transcript__assertionerror_if_input_not_string__deprecated(self): - with self.assertRaises(AssertionError): - YouTubeTranscriptApi.get_transcript(["video_id_1", "video_id_2"]) - - def test_get_transcripts__assertionerror_if_input_not_list__deprecated(self): - with self.assertRaises(AssertionError): - YouTubeTranscriptApi.get_transcripts("video_id_1") - - @patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript") - def test_get_transcripts__deprecated(self, mock_get_transcript): - video_id_1 = "video_id_1" - video_id_2 = "video_id_2" - languages = ["de", "en"] - - YouTubeTranscriptApi.get_transcripts( - [video_id_1, video_id_2], languages=languages - ) - - mock_get_transcript.assert_any_call(video_id_1, languages, None, False) - mock_get_transcript.assert_any_call(video_id_2, languages, None, False) - self.assertEqual(mock_get_transcript.call_count, 2) - - @patch( - "youtube_transcript_api.YouTubeTranscriptApi.get_transcript", - side_effect=Exception("Error"), - ) - def test_get_transcripts__stop_on_error__deprecated(self, mock_get_transcript): - with self.assertRaises(Exception): - YouTubeTranscriptApi.get_transcripts(["video_id_1", "video_id_2"]) - - @patch( - "youtube_transcript_api.YouTubeTranscriptApi.get_transcript", - side_effect=Exception("Error"), - ) - def test_get_transcripts__continue_on_error__deprecated(self, mock_get_transcript): - video_id_1 = "video_id_1" - video_id_2 = "video_id_2" - - YouTubeTranscriptApi.get_transcripts( - ["video_id_1", "video_id_2"], continue_after_error=True - ) - - mock_get_transcript.assert_any_call(video_id_1, ("en",), None, False) - mock_get_transcript.assert_any_call(video_id_2, ("en",), None, False) - - @pytest.mark.skip( - reason="This test is temporarily disabled because cookie auth is currently not " - "working due to YouTube changes." - ) - @patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript") - def test_get_transcripts__with_cookies__deprecated(self, mock_get_transcript): - cookie_path = get_asset_path("example_cookies.txt") - YouTubeTranscriptApi.get_transcripts(["GJLlxj_dtq8"], cookies=str(cookie_path)) - mock_get_transcript.assert_any_call( - "GJLlxj_dtq8", ("en",), None, str(cookie_path), False - ) - - @patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript") - def test_get_transcripts__with_proxies__deprecated(self, mock_get_transcript): - proxies = { - "http": "http://localhost:8080", - "https": "http://localhost:8080", - } - YouTubeTranscriptApi.get_transcripts(["GJLlxj_dtq8"], proxies=proxies) - mock_get_transcript.assert_any_call("GJLlxj_dtq8", ("en",), proxies, False) From a951854edc0ac7b72368ec2be991ca8fec27f623 Mon Sep 17 00:00:00 2001 From: Jonas Depoix Date: Mon, 21 Jul 2025 12:09:12 +0200 Subject: [PATCH 2/2] v1.2.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 627c632..4548c26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "youtube-transcript-api" -version = "1.1.1" +version = "1.2.0" description = "This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!" readme = "README.md" license = "MIT"