Skip to content

Commit 2340c61

Browse files
committed
feat: improve automatic format detection
Add more patterns to automatic format detector, pick a format that produces maximum date fixes #103
1 parent 6f3f929 commit 2340c61

File tree

3 files changed

+182
-28
lines changed

3 files changed

+182
-28
lines changed

src/main/java/net/atomique/ksar/AllParser.java

Lines changed: 69 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package net.atomique.ksar;
77

88
import net.atomique.ksar.xml.OSConfig;
9+
910
import org.slf4j.Logger;
1011
import org.slf4j.LoggerFactory;
1112

@@ -14,26 +15,49 @@
1415
import java.time.LocalTime;
1516
import java.time.format.DateTimeFormatter;
1617
import java.time.format.DateTimeParseException;
17-
import java.util.HashMap;
18-
import java.util.Map;
18+
import java.util.List;
19+
import java.util.Locale;
1920
import java.util.TreeSet;
21+
import java.util.stream.Collectors;
22+
import java.util.stream.Stream;
2023

2124
public abstract class AllParser {
2225

2326
private static final Logger log = LoggerFactory.getLogger(AllParser.class);
24-
private static final Map<String, String> DATE_FORMAT_REGEXPS = new HashMap<String, String>() {
25-
{
26-
put("^\\d{8}$", "yyyyMMdd");
27-
put("^\\d{1,2}-\\d{1,2}-\\d{4}$", "dd-MM-yyyy");
28-
put("^\\d{4}-\\d{1,2}-\\d{1,2}$", "yyyy-MM-dd");
29-
put("^\\d{1,2}/\\d{1,2}/\\d{4}$", "MM/dd/yyyy");
30-
put("^\\d{4}/\\d{1,2}/\\d{1,2}$", "yyyy/MM/dd");
31-
put("^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$", "dd MMM yyyy");
32-
put("^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$", "dd MMMM yyyy");
33-
put("^\\d{1,2}-\\d{1,2}-\\d{2}$", "dd-MM-yy");
34-
put("^\\d{1,2}/\\d{1,2}/\\d{2}$", "MM/dd/yy");
35-
}
36-
};
27+
28+
private static final List<DateTimeFormatter> DATE_FORMATS = Stream.of(
29+
"MM dd, yy",
30+
"MM-dd-yy",
31+
"MM/dd/yy",
32+
"MM-dd-yyyy",
33+
"MM/dd/yyyy",
34+
"dd-MM-yy",
35+
"dd.MM.yy",
36+
"dd/MM/yy",
37+
"dd.MM.yy.",
38+
"dd-MM-yyyy",
39+
"dd.MM.yyyy",
40+
"dd/MM/yyyy",
41+
"dd.MM.yyyy.",
42+
"yy. MM. dd",
43+
"yy-MM-dd",
44+
"yy.MM.dd",
45+
"yy/MM/dd",
46+
"yy年MM月dd日",
47+
"yy.dd.MM",
48+
"yyyy. MM. dd",
49+
"yyyy-MM-dd",
50+
"yyyy.MM.dd",
51+
"yyyy/MM/dd",
52+
"yyyy.MM.dd.",
53+
"yyyy年MM月dd日",
54+
"yyyy.dd.MM",
55+
"yyyyMMdd",
56+
"dd MMM yyyy",
57+
"dd MMMM yyyy",
58+
"MMM dd yyyy",
59+
"MMMM dd yyyy"
60+
).map(p -> DateTimeFormatter.ofPattern(p, Locale.US)).collect(Collectors.toList());
3761

3862
public AllParser() {
3963

@@ -80,14 +104,7 @@ public boolean setDate(String s) {
80104
}
81105

82106
try {
83-
DateTimeFormatter formatter;
84-
if ("Automatic Detection".equals(dateFormat)) {
85-
formatter = DateTimeFormatter.ofPattern(determineDateFormat(s));
86-
87-
} else {
88-
formatter = DateTimeFormatter.ofPattern(dateFormat);
89-
}
90-
107+
DateTimeFormatter formatter = getDateFormatter(s);
91108
currentDate = LocalDate.parse(s, formatter);
92109

93110
parsedate = currentDate;
@@ -109,6 +126,20 @@ public boolean setDate(String s) {
109126
return true;
110127
}
111128

129+
private DateTimeFormatter getDateFormatter(String s) {
130+
if (dateFormatter != null) {
131+
return dateFormatter;
132+
}
133+
DateTimeFormatter format = null;
134+
if ("Automatic Detection".equals(dateFormat)) {
135+
format = determineDateFormat(s);
136+
} else {
137+
format = DateTimeFormatter.ofPattern(dateFormat);
138+
}
139+
dateFormatter = format;
140+
return dateFormatter;
141+
}
142+
112143
public String getDate() {
113144
if (sarStartDate.equals(sarEndDate)) {
114145
return sarStartDate;
@@ -125,13 +156,21 @@ public String getCurrentStat() {
125156
return currentStat;
126157
}
127158

128-
public static String determineDateFormat(String dateString) {
129-
for (String regexp : DATE_FORMAT_REGEXPS.keySet()) {
130-
if (dateString.toLowerCase().matches(regexp)) {
131-
return DATE_FORMAT_REGEXPS.get(regexp);
159+
public static DateTimeFormatter determineDateFormat(String dateString) {
160+
DateTimeFormatter best = null;
161+
LocalDate bestDate = null;
162+
for (DateTimeFormatter format : DATE_FORMATS) {
163+
try {
164+
LocalDate nextDate = LocalDate.parse(dateString, format);
165+
if (bestDate == null || nextDate.compareTo(bestDate) >= 0) {
166+
bestDate = nextDate;
167+
best = format;
168+
}
169+
} catch (DateTimeParseException e) {
170+
/* ignore */
132171
}
133172
}
134-
return null; // Unknown format.
173+
return best;
135174
}
136175

137176
protected String sarStartDate = null;
@@ -159,4 +198,6 @@ public static String determineDateFormat(String dateString) {
159198
protected String dateFormat = "MM/dd/yy";
160199
protected String timeFormat = "HH:mm:ss";
161200
protected int timeColumn = 1;
201+
202+
private DateTimeFormatter dateFormatter;
162203
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package net.atomique.ksar.parser;
2+
3+
import org.junit.Ignore;
4+
import org.junit.Test;
5+
6+
import java.time.LocalDate;
7+
import java.time.format.DateTimeFormatter;
8+
import java.time.format.FormatStyle;
9+
import java.util.*;
10+
import java.util.function.Function;
11+
import java.util.function.Predicate;
12+
import java.util.regex.Pattern;
13+
14+
public class DateFormatHelperTest {
15+
@Test
16+
@Ignore
17+
public void generateTests() throws Exception {
18+
Set<String> allFormats = new HashSet<>();
19+
LocalDate date = LocalDate.of(2017, 10, 18);
20+
Predicate<String> nonPunctuation = Pattern.compile("[^ ./-\\:0-9]{3,}").asPredicate();
21+
for (Locale locale : Locale.getAvailableLocales()) {
22+
for (FormatStyle style : EnumSet.of(FormatStyle.SHORT, FormatStyle.MEDIUM)) {
23+
DateTimeFormatter f = DateTimeFormatter.ofLocalizedDate(style).withLocale(locale);
24+
String str = f.format(date);
25+
if (nonPunctuation.test(str)) {
26+
continue;
27+
}
28+
String v = str.replaceAll("2017", "yyyy").replaceAll("17", "yy")
29+
.replaceAll("18", "dd").replaceAll("10", "MM");
30+
allFormats.add(v);
31+
}
32+
}
33+
List<String> formats = new ArrayList<>(allFormats);
34+
formats.sort(Comparator.<String, String>comparing(v -> v.replaceAll("[^\\w]", "-"))
35+
.thenComparing(Function.identity()));
36+
37+
for (String format : formats) {
38+
System.out.println('"' + format + "\",");
39+
}
40+
}
41+
42+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package net.atomique.ksar.parser;
2+
3+
import net.atomique.ksar.AllParser;
4+
5+
import org.junit.Assert;
6+
import org.junit.Test;
7+
import org.junit.runner.RunWith;
8+
import org.junit.runners.Parameterized;
9+
10+
import java.time.LocalDate;
11+
import java.time.format.DateTimeFormatter;
12+
import java.util.ArrayList;
13+
import java.util.Arrays;
14+
import java.util.Collection;
15+
16+
@RunWith(Parameterized.class)
17+
public class DateFormatTest {
18+
private final String text;
19+
private final LocalDate date;
20+
private final String expected;
21+
22+
public DateFormatTest(LocalDate date, String text, String expected) {
23+
this.text = text;
24+
this.date = date;
25+
this.expected = expected;
26+
}
27+
28+
@Parameterized.Parameters(name = "{1} -> {2}")
29+
public static Iterable<Object[]> params() {
30+
Collection<Object[]> res = new ArrayList<>();
31+
32+
// See DateTest.generateFormats
33+
LocalDate date = LocalDate.of(2017, 5, 16);
34+
for (String format : Arrays.asList(
35+
"MM-dd-yy",
36+
"MM/dd/yy",
37+
"dd-MM-yy",
38+
"dd.MM.yy",
39+
"dd/MM/yy",
40+
"dd.MM.yy.",
41+
"dd-MM-yyyy",
42+
"dd.MM.yyyy",
43+
"dd/MM/yyyy",
44+
"yy. MM. dd",
45+
"yy-MM-dd",
46+
"yy.MM.dd",
47+
"yy/MM/dd",
48+
"yy年MM月dd日",
49+
"yy.dd.MM",
50+
"yyyy-MM-dd",
51+
"yyyy.MM.dd",
52+
"yyyy/MM/dd",
53+
"yyyy.MM.dd."
54+
)) {
55+
DateTimeFormatter df = DateTimeFormatter.ofPattern(format);
56+
res.add(new Object[]{date, df.format(date), format});
57+
}
58+
// See https://github.com/vlsi/ksar/issues/103
59+
LocalDate aug_04_2017 = LocalDate.of(2017, 8, 4);
60+
res.add(new Object[]{aug_04_2017, "04/08/17", "dd/MM/yy"});
61+
return res;
62+
}
63+
64+
@Test
65+
public void run() {
66+
DateTimeFormatter format = AllParser.determineDateFormat(text);
67+
LocalDate date = LocalDate.parse(text, format);
68+
Assert.assertEquals(text, this.date, date);
69+
}
70+
71+
}

0 commit comments

Comments
 (0)