1
1
package io .github .jycr .javadataurlhandler ;
2
2
3
3
import java .io .ByteArrayInputStream ;
4
- import java .io .IOException ;
5
4
import java .io .InputStream ;
6
- import java .io .UnsupportedEncodingException ;
7
5
import java .net .MalformedURLException ;
8
6
import java .net .URL ;
9
7
import java .net .URLConnection ;
10
8
import java .net .URLDecoder ;
11
9
import java .nio .charset .Charset ;
12
10
import java .util .Base64 ;
13
- import java .util .regex .Matcher ;
14
- import java .util .regex .Pattern ;
15
11
16
12
import static java .nio .charset .StandardCharsets .US_ASCII ;
17
13
18
14
/**
19
- * The data scheme URLConnection.
20
- * <p>The data URI scheme Data protocol Syntax:</p>
21
- * <pre>data:[<mediatype>][;base64],<data></pre>
15
+ * <p>The data scheme URLConnection.</p>
16
+ * <p>Syntax of data URL scheme:</p>
17
+ * <pre>
18
+ * dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
19
+ * mediatype := [ type "/" subtype ] *( ";" parameter )
20
+ * data := *urlchar
21
+ * parameter := attribute "=" value
22
+ * </pre>
22
23
*
23
24
* @see <a href="https://www.rfc-editor.org/rfc/rfc2397#section-2">RFC-2397</a>
24
25
* @see <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs">mdn web docs - Data URLs</a>
25
26
*/
26
27
public class DataUriConnection extends URLConnection {
27
28
28
- /**
29
- * Syntax of data URL scheme:
30
- * <pre>
31
- * dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
32
- * mediatype := [ type "/" subtype ] *( ";" parameter )
33
- * data := *urlchar
34
- * parameter := attribute "=" value
35
- * </pre>
36
- */
37
- private static final Pattern DATA_URL_SCHEME_PATTERN = Pattern .compile ("data:(<mediatype>?(<contentType>?.*?/.*?)?(?:;(<paramKey>?.*?)=(<paramValue>?.*?))?)(?:;(<base64Flag>?base64)?)?,(<data>?.*)" );
38
-
39
29
private static final Charset DEFAULT_CONTENT_CHARSET = US_ASCII ;
40
- /**
41
- * Default mime type for data protocol.
42
- * See: <a href="https://www.rfc-editor.org/rfc/rfc2397#section-2">RFC-2397 - Description</a>
43
- */
44
- private static final String DEFAULT_MEDIATYPE = "text/plain;charset=" + DEFAULT_CONTENT_CHARSET .name ();
30
+ private static final String DEFAULT_MEDIATYPE = "text/plain" ;
45
31
46
- private final boolean valid ;
47
32
private final Charset charset ;
48
33
private final boolean isBase64 ;
49
34
private final String data ;
50
- private final String mediatype ;
35
+ private final String contentType ;
51
36
52
37
public DataUriConnection (final URL url ) throws MalformedURLException {
53
38
super (url );
54
- final Matcher matcher = DATA_URL_SCHEME_PATTERN .matcher (url .toString ());
55
- this .valid = matcher .matches ();
56
- if (!this .valid ) {
39
+ String urlString = url .toString ();
40
+ if (!urlString .startsWith ("data:" )) {
41
+ throw new MalformedURLException ("Invalid data URL: " + url );
42
+ }
43
+
44
+ int commaIndex = urlString .indexOf (',' );
45
+ if (commaIndex == -1 ) {
57
46
throw new MalformedURLException ("Invalid data URL: " + url );
58
47
}
59
- this .data = matcher .group ("data" );
60
48
61
- String mediatypeGroup = matcher .group ("mediatype" );
62
- this .mediatype = (mediatypeGroup != null && !mediatypeGroup .isEmpty ()) ? mediatypeGroup : DEFAULT_MEDIATYPE ;
63
- this .isBase64 = "base64" .equals (matcher .group ("base64Flag" ));
49
+ String metadata = urlString .substring (5 , commaIndex );
50
+ this .data = urlString .substring (commaIndex + 1 );
51
+
52
+ final String [] parts = metadata .split (";" );
53
+ final String mediatype = parts .length > 0 && !parts [0 ].isEmpty () ? parts [0 ] : DEFAULT_MEDIATYPE ;
54
+
55
+ boolean base64Flag = false ;
56
+ Charset extractedCharset = DEFAULT_CONTENT_CHARSET ;
57
+ for (String part : parts ) {
58
+ if ("base64" .equals (part )) {
59
+ base64Flag = true ;
60
+ } else if (part .startsWith ("charset=" )) {
61
+ extractedCharset = Charset .forName (part .substring (8 ));
62
+ }
63
+ }
64
+ this .isBase64 = base64Flag ;
65
+ this .charset = extractedCharset ;
66
+ this .contentType = mediatype + (isText (mediatype ) ? ";charset=" + this .charset .name () : "" );
67
+ this .connected = true ;
68
+ }
64
69
65
- String paramKey = matcher .group ("paramKey" );
66
- String paramValue = matcher .group ("paramValue" );
67
- this .charset = "charset" .equals (paramKey ) ? Charset .forName (paramValue ) : DEFAULT_CONTENT_CHARSET ;
70
+ private static boolean isText (String mediatype ) {
71
+ return mediatype != null && (mediatype .startsWith ("text/" ) || mediatype .endsWith ("+xml" ));
68
72
}
69
73
70
74
@ Override
71
75
public void connect () {
72
- if (this .valid ) {
73
- this .connected = true ;
74
- }
76
+ this .connected = true ;
75
77
}
76
78
77
79
@ Override
78
- public InputStream getInputStream () throws IOException {
79
- if (!connected ) {
80
- throw new IOException ();
81
- }
80
+ public InputStream getInputStream () {
82
81
return new ByteArrayInputStream (getData ());
83
82
}
84
83
85
- /**
86
- * <p>Returns the value of the content-type defined in data URL.</p>
87
- * <p>This value is optional and if not defined, value is <code>{@value #DEFAULT_MEDIATYPE}</code></p>
88
- */
89
84
@ Override
90
85
public String getContentType () {
91
- if (!connected ) {
92
- return null ;
93
- }
94
- return mediatype ;
86
+ return contentType ;
87
+ }
88
+
89
+ private byte [] getData () {
90
+ return isBase64 ? Base64 .getDecoder ().decode (data ) : URLDecoder .decode (data , charset ).getBytes (charset );
91
+ }
92
+
93
+ Charset getCharset () {
94
+ return charset ;
95
95
}
96
96
97
- private byte [] getData () throws UnsupportedEncodingException {
98
- if (isBase64 ) {
99
- return Base64 .getDecoder ().decode (data );
97
+ @ Override
98
+ public String getHeaderField (String name ) {
99
+ if ("Content-Length" .equalsIgnoreCase (name )) {
100
+ return String .valueOf (getData ().length );
100
101
}
101
- return URLDecoder . decode ( data , charset ). getBytes ( data ) ;
102
+ return null ;
102
103
}
103
104
}
0 commit comments