Skip to content

Commit faa34f8

Browse files
committed
refactor: ♻️ Removes use of regexp to extract data
This refactoring makes it possible to protect against DDOS attacks by analyzing a character string using a regexp
1 parent bbbb3c5 commit faa34f8

File tree

1 file changed

+55
-54
lines changed

1 file changed

+55
-54
lines changed
Lines changed: 55 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,104 @@
11
package io.github.jycr.javadataurlhandler;
22

33
import java.io.ByteArrayInputStream;
4-
import java.io.IOException;
54
import java.io.InputStream;
6-
import java.io.UnsupportedEncodingException;
75
import java.net.MalformedURLException;
86
import java.net.URL;
97
import java.net.URLConnection;
108
import java.net.URLDecoder;
119
import java.nio.charset.Charset;
1210
import java.util.Base64;
13-
import java.util.regex.Matcher;
14-
import java.util.regex.Pattern;
1511

1612
import static java.nio.charset.StandardCharsets.US_ASCII;
1713

1814
/**
19-
* The data scheme URLConnection.
20-
* <p>The data URI scheme Data protocol Syntax:</p>
21-
* <pre>data:[<mediatype>][;base64],<data></pre>
15+
* <p>The data scheme URLConnection.</p>
16+
* <p>Syntax of data URL scheme:</p>
17+
* <pre>
18+
* dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
19+
* mediatype := [ type "/" subtype ] *( ";" parameter )
20+
* data := *urlchar
21+
* parameter := attribute "=" value
22+
* </pre>
2223
*
2324
* @see <a href="https://www.rfc-editor.org/rfc/rfc2397#section-2">RFC-2397</a>
2425
* @see <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs">mdn web docs - Data URLs</a>
2526
*/
2627
public class DataUriConnection extends URLConnection {
2728

28-
/**
29-
* Syntax of data URL scheme:
30-
* <pre>
31-
* dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
32-
* mediatype := [ type "/" subtype ] *( ";" parameter )
33-
* data := *urlchar
34-
* parameter := attribute "=" value
35-
* </pre>
36-
*/
37-
private static final Pattern DATA_URL_SCHEME_PATTERN = Pattern.compile("data:(<mediatype>?(<contentType>?.*?/.*?)?(?:;(<paramKey>?.*?)=(<paramValue>?.*?))?)(?:;(<base64Flag>?base64)?)?,(<data>?.*)");
38-
3929
private static final Charset DEFAULT_CONTENT_CHARSET = US_ASCII;
40-
/**
41-
* Default mime type for data protocol.
42-
* See: <a href="https://www.rfc-editor.org/rfc/rfc2397#section-2">RFC-2397 - Description</a>
43-
*/
44-
private static final String DEFAULT_MEDIATYPE = "text/plain;charset=" + DEFAULT_CONTENT_CHARSET.name();
30+
private static final String DEFAULT_MEDIATYPE = "text/plain";
4531

46-
private final boolean valid;
4732
private final Charset charset;
4833
private final boolean isBase64;
4934
private final String data;
50-
private final String mediatype;
35+
private final String contentType;
5136

5237
public DataUriConnection(final URL url) throws MalformedURLException {
5338
super(url);
54-
final Matcher matcher = DATA_URL_SCHEME_PATTERN.matcher(url.toString());
55-
this.valid = matcher.matches();
56-
if (!this.valid) {
39+
String urlString = url.toString();
40+
if (!urlString.startsWith("data:")) {
41+
throw new MalformedURLException("Invalid data URL: " + url);
42+
}
43+
44+
int commaIndex = urlString.indexOf(',');
45+
if (commaIndex == -1) {
5746
throw new MalformedURLException("Invalid data URL: " + url);
5847
}
59-
this.data = matcher.group("data");
6048

61-
String mediatypeGroup = matcher.group("mediatype");
62-
this.mediatype = (mediatypeGroup != null && !mediatypeGroup.isEmpty()) ? mediatypeGroup : DEFAULT_MEDIATYPE;
63-
this.isBase64 = "base64".equals(matcher.group("base64Flag"));
49+
String metadata = urlString.substring(5, commaIndex);
50+
this.data = urlString.substring(commaIndex + 1);
51+
52+
final String[] parts = metadata.split(";");
53+
final String mediatype = parts.length > 0 && !parts[0].isEmpty() ? parts[0] : DEFAULT_MEDIATYPE;
54+
55+
boolean base64Flag = false;
56+
Charset extractedCharset = DEFAULT_CONTENT_CHARSET;
57+
for (String part : parts) {
58+
if ("base64".equals(part)) {
59+
base64Flag = true;
60+
} else if (part.startsWith("charset=")) {
61+
extractedCharset = Charset.forName(part.substring(8));
62+
}
63+
}
64+
this.isBase64 = base64Flag;
65+
this.charset = extractedCharset;
66+
this.contentType = mediatype + (isText(mediatype) ? ";charset=" + this.charset.name() : "");
67+
this.connected = true;
68+
}
6469

65-
String paramKey = matcher.group("paramKey");
66-
String paramValue = matcher.group("paramValue");
67-
this.charset = "charset".equals(paramKey) ? Charset.forName(paramValue) : DEFAULT_CONTENT_CHARSET;
70+
private static boolean isText(String mediatype) {
71+
return mediatype != null && (mediatype.startsWith("text/") || mediatype.endsWith("+xml"));
6872
}
6973

7074
@Override
7175
public void connect() {
72-
if (this.valid) {
73-
this.connected = true;
74-
}
76+
this.connected = true;
7577
}
7678

7779
@Override
78-
public InputStream getInputStream() throws IOException {
79-
if (!connected) {
80-
throw new IOException();
81-
}
80+
public InputStream getInputStream() {
8281
return new ByteArrayInputStream(getData());
8382
}
8483

85-
/**
86-
* <p>Returns the value of the content-type defined in data URL.</p>
87-
* <p>This value is optional and if not defined, value is <code>{@value #DEFAULT_MEDIATYPE}</code></p>
88-
*/
8984
@Override
9085
public String getContentType() {
91-
if (!connected) {
92-
return null;
93-
}
94-
return mediatype;
86+
return contentType;
87+
}
88+
89+
private byte[] getData() {
90+
return isBase64 ? Base64.getDecoder().decode(data) : URLDecoder.decode(data, charset).getBytes(charset);
91+
}
92+
93+
Charset getCharset() {
94+
return charset;
9595
}
9696

97-
private byte[] getData() throws UnsupportedEncodingException {
98-
if (isBase64) {
99-
return Base64.getDecoder().decode(data);
97+
@Override
98+
public String getHeaderField(String name) {
99+
if ("Content-Length".equalsIgnoreCase(name)) {
100+
return String.valueOf(getData().length);
100101
}
101-
return URLDecoder.decode(data, charset).getBytes(data);
102+
return null;
102103
}
103104
}

0 commit comments

Comments
 (0)