Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions dateparser/data/date_translation_data/en.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,32 +51,39 @@
],
"monday": [
"mon",
"monday"
"monday",
"mo"
],
"tuesday": [
"tue",
"tuesday",
"tu",
"Tues"
],
"wednesday": [
"wed",
"wednesday"
"wednesday",
"we"
],
"thursday": [
"thu",
"thursday"
"thursday",
"th"
],
"friday": [
"fri",
"friday"
"friday",
"fr"
],
"saturday": [
"sat",
"saturday"
"saturday",
"sa"
],
"sunday": [
"sun",
"sunday"
"sunday",
"su"
],
"am": [
"am"
Expand Down
26 changes: 26 additions & 0 deletions dateparser/languages/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,31 @@ def clean_dictionary(dictionary, threshold=2):
del dictionary[del_key]
return dictionary

@property
def weekdays(self):
weekdays = [
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
]
return weekdays

def remove_multiple_occurrences(self, date_str_tokens: list):
# first occurrence of day of the week will be considered
# followings occurrence(s) will be skipped and removed from the token list.
weekdays_counter = 0
for i, token in enumerate(date_str_tokens):
if token in self.weekdays:
weekdays_counter += 1

if weekdays_counter > 1:
date_str_tokens.pop(i)
continue

def translate(self, date_string, keep_formatting=False, settings=None):
"""
Translate the date string to its English equivalent.
Expand Down Expand Up @@ -145,6 +170,7 @@ def translate(self, date_string, keep_formatting=False, settings=None):
if "in" in date_string_tokens:
date_string_tokens = self._clear_future_words(date_string_tokens)

self.remove_multiple_occurrences(date_string_tokens)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this do? None of the new test scenarios seem to feature multiple occurrences of a week day. What scenarios does this address? (I wonder if it could be introducing other issues)

return self._join(
list(filter(bool, date_string_tokens)),
separator="" if keep_formatting else " ",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,30 @@ pertain: ["of"]

sentence_splitter_group : 1

# two letters days of week

monday:
- mo

tuesday:
- tu
- Tues

wednesday:
- we

thursday:
- th

friday:
- fr

saturday:
- sa

sunday:
- su

september:
- sept

Expand Down
72 changes: 66 additions & 6 deletions tests/test_date.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python

import datetime as real_datetime
import os
import unittest
from collections import OrderedDict
Expand All @@ -9,6 +10,7 @@
from time import tzset
from unittest.mock import Mock, patch

import pytest
from parameterized import param, parameterized

import dateparser
Expand Down Expand Up @@ -832,13 +834,68 @@ def test_get_date_tuple(self, date_string, expected_result):
self.when_get_date_tuple_is_called(date_string)
self.then_returned_tuple_is(expected_result)

@parameterized.expand(
[
param(
"Mo",
datetime(2025, 7, 28, 0, 0),
),
param(
"Tu",
datetime(2025, 7, 29, 0, 0),
),
param(
"We",
datetime(2025, 7, 30, 0, 0),
),
param(
"Th",
datetime(2025, 7, 31, 0, 0),
),
param(
"Fr",
datetime(2025, 8, 1, 0, 0),
),
param(
"Sa",
datetime(2025, 7, 26, 0, 0),
),
param(
"Su",
datetime(2025, 7, 27, 0, 0),
),
]
)
def test_short_weekday_names(self, date_string, expected):
if "Mo" in date_string:
pytest.xfail(
"Known bug: 'Mo' is being interpreted as a month instead of a weekday and needs to be fixed."
)

self.given_parser(["en"])
self.given_now(2025, 8, 1)
self.when_date_string_is_parsed(date_string)
self.then_parsed_datetime_is(expected)

def given_now(self, year, month, day, **time):
now = datetime(year, month, day, **time)
datetime_mock = Mock(wraps=datetime)
datetime_mock.utcnow = Mock(return_value=now)
datetime_mock.now = Mock(return_value=now)
datetime_mock.today = Mock(return_value=now)
self.add_patch(patch("dateparser.date.datetime", new=datetime_mock))
now = real_datetime.datetime(year, month, day, **time)

# Patch the datetime *class* in each target module
class DateParserDateTime(real_datetime.datetime):
@classmethod
def now(cls, tz=None):
return now.replace(tzinfo=tz) if tz else now

@classmethod
def utcnow(cls):
return now

@classmethod
def today(cls):
return now

self.add_patch(patch("dateparser.date.datetime", DateParserDateTime))
self.add_patch(patch("dateparser.parser.datetime", DateParserDateTime))
Comment on lines -836 to +898
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain what this change does?


def given_parser(self, restrict_to_languages=None, **params):
self.parser = date.DateDataParser(languages=restrict_to_languages, **params)
Expand Down Expand Up @@ -874,6 +931,9 @@ def when_get_date_tuple_is_called(self, date_string):
def then_date_was_parsed(self):
self.assertIsNotNone(self.result["date_obj"])

def then_date_was_not_parsed(self):
self.assertIsNone(self.result["date_obj"])
Comment on lines +934 to +935
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems unused now.


def then_date_locale(self):
self.assertIsNotNone(self.result["locale"])

Expand Down
Loading