apache
diff --git a/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 12 additions & 0 deletions b/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎docs/sql-ref-ansi-compliance.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/sql-ref-ansi-compliance.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/pyspark/sql/connect/client/artifact.py‎
Lines changed: 55 additions & 0 deletions b/‎python/pyspark/sql/connect/client/artifact.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎python/pyspark/sql/connect/client/core.py‎
Lines changed: 6 additions & 0 deletions b/‎python/pyspark/sql/connect/client/core.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎python/pyspark/sql/connect/functions/builtin.py‎
Lines changed: 164 additions & 14 deletions b/‎python/pyspark/sql/connect/functions/builtin.py‎
Lines changed: 164 additions & 14 deletions
@@ -583,6 +583,12 @@
     ],
     "sqlState" : "22KD3"
   },
+  "CANNOT_USE_MULTI_ALIASES_IN_WATERMARK_CLAUSE" : {
+    "message" : [
+      "Multiple aliases are not supported in watermark clause."
+    ],
+    "sqlState" : "42000"
+  },
   "CANNOT_WRITE_STATE_STORE" : {
     "message" : [
       "Error writing state store files for provider <providerClass>."
@@ -4985,6 +4991,12 @@
     ],
     "sqlState" : "4274K"
   },
+  "REQUIRES_EXPLICIT_NAME_IN_WATERMARK_CLAUSE" : {
+    "message" : [
+      "The watermark clause requires an explicit name if expression is specified, but got <sqlExpr>."
+    ],
+    "sqlState" : "42000"
+  },
   "REQUIRES_SINGLE_PART_NAMESPACE" : {
     "message" : [
       "<sessionCatalog> requires a single-part namespace, but got <namespace>."
 
@@ -497,6 +497,7 @@ Below is a list of all the keywords in Spark SQL.
 |DEFAULT|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
 |DEFINER|non-reserved|non-reserved|non-reserved|
+|DELAY|non-reserved|non-reserved|non-reserved|
 |DELETE|non-reserved|non-reserved|reserved|
 |DELIMITED|non-reserved|non-reserved|non-reserved|
 |DESC|non-reserved|non-reserved|non-reserved|
@@ -793,6 +794,7 @@ Below is a list of all the keywords in Spark SQL.
 |VIEW|non-reserved|non-reserved|non-reserved|
 |VIEWS|non-reserved|non-reserved|non-reserved|
 |VOID|non-reserved|non-reserved|non-reserved|
+|WATERMARK|non-reserved|non-reserved|non-reserved|
 |WEEK|non-reserved|non-reserved|non-reserved|
 |WEEKS|non-reserved|non-reserved|non-reserved|
 |WHEN|reserved|non-reserved|reserved|
 
@@ -427,6 +427,30 @@ def is_cached_artifact(self, hash: str) -> bool:
         status = resp.statuses.get(artifactName)
         return status.exists if status is not None else False
 
+    def get_cached_artifacts(self, hashes: list[str]) -> set[str]:
+        """
+        Batch check which artifacts are already cached on the server.
+        Returns a set of hashes that are already cached.
+        """
+        if not hashes:
+            return set()
+
+        artifact_names = [f"{CACHE_PREFIX}/{hash}" for hash in hashes]
+        request = proto.ArtifactStatusesRequest(
+            user_context=self._user_context, session_id=self._session_id, names=artifact_names
+        )
+        resp: proto.ArtifactStatusesResponse = self._stub.ArtifactStatus(
+            request, metadata=self._metadata
+        )
+
+        cached = set()
+        for hash in hashes:
+            artifact_name = f"{CACHE_PREFIX}/{hash}"
+            status = resp.statuses.get(artifact_name)
+            if status is not None and status.exists:
+                cached.add(hash)
+        return cached
+
     def cache_artifact(self, blob: bytes) -> str:
         """
         Cache the give blob at the session.
@@ -442,3 +466,34 @@ def cache_artifact(self, blob: bytes) -> str:
                 # TODO(SPARK-42658): Handle responses containing CRC failures.
 
         return hash
+
+    def cache_artifacts(self, blobs: list[bytes]) -> list[str]:
+        """
+        Cache the given blobs at the session.
+
+        This method batches artifact status checks and uploads to minimize RPC overhead.
+        """
+        # Compute hashes for all blobs upfront
+        hashes = [hashlib.sha256(blob).hexdigest() for blob in blobs]
+        unique_hashes = list(set(hashes))
+
+        # Batch check which artifacts are already cached
+        cached_hashes = self.get_cached_artifacts(unique_hashes)
+
+        # Collect unique artifacts that need to be uploaded
+        seen_hashes = set()
+        artifacts_to_add = []
+        for blob, hash in zip(blobs, hashes):
+            if hash not in cached_hashes and hash not in seen_hashes:
+                artifacts_to_add.append(new_cache_artifact(hash, InMemory(blob)))
+                seen_hashes.add(hash)
+
+        # Batch upload all missing artifacts in a single RPC call
+        if artifacts_to_add:
+            requests = self._add_artifacts(artifacts_to_add)
+            response: proto.AddArtifactsResponse = self._retrieve_responses(requests)
+            summaries: List[proto.AddArtifactsResponse.ArtifactSummary] = []
+            for summary in response.artifacts:
+                summaries.append(summary)
+                # TODO(SPARK-42658): Handle responses containing CRC failures.
+        return hashes
@@ -2003,6 +2003,12 @@ def cache_artifact(self, blob: bytes) -> str:
                 return self._artifact_manager.cache_artifact(blob)
         raise SparkConnectException("Invalid state during retry exception handling.")
 
+    def cache_artifacts(self, blobs: list[bytes]) -> list[str]:
+        for attempt in self._retrying():
+            with attempt:
+                return self._artifact_manager.cache_artifacts(blobs)
+        raise SparkConnectException("Invalid state during retry exception handling.")
+
     def _verify_response_integrity(
         self,
         response: Union[
 
@@ -3945,45 +3945,195 @@ def make_time(hour: "ColumnOrName", minute: "ColumnOrName", second: "ColumnOrNam
 make_time.__doc__ = pysparkfuncs.make_time.__doc__
 
 
+@overload
 def make_timestamp(
     years: "ColumnOrName",
     months: "ColumnOrName",
     days: "ColumnOrName",
     hours: "ColumnOrName",
     mins: "ColumnOrName",
     secs: "ColumnOrName",
+) -> Column:
+    ...
+
+
+@overload
+def make_timestamp(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+    timezone: "ColumnOrName",
+) -> Column:
+    ...
+
+
+@overload
+def make_timestamp(*, date: "ColumnOrName", time: "ColumnOrName") -> Column:
+    ...
+
+
+@overload
+def make_timestamp(
+    *, date: "ColumnOrName", time: "ColumnOrName", timezone: "ColumnOrName"
+) -> Column:
+    ...
+
+
+def make_timestamp(
+    years: Optional["ColumnOrName"] = None,
+    months: Optional["ColumnOrName"] = None,
+    days: Optional["ColumnOrName"] = None,
+    hours: Optional["ColumnOrName"] = None,
+    mins: Optional["ColumnOrName"] = None,
+    secs: Optional["ColumnOrName"] = None,
     timezone: Optional["ColumnOrName"] = None,
+    date: Optional["ColumnOrName"] = None,
+    time: Optional["ColumnOrName"] = None,
 ) -> Column:
-    if timezone is not None:
-        return _invoke_function_over_columns(
-            "make_timestamp", years, months, days, hours, mins, secs, timezone
-        )
+    if years is not None:
+        if any(arg is not None for arg in [date, time]):
+            raise PySparkValueError(
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
+            )
+        if timezone is not None:
+            return _invoke_function_over_columns(
+                "make_timestamp",
+                cast("ColumnOrName", years),
+                cast("ColumnOrName", months),
+                cast("ColumnOrName", days),
+                cast("ColumnOrName", hours),
+                cast("ColumnOrName", mins),
+                cast("ColumnOrName", secs),
+                cast("ColumnOrName", timezone),
+            )
+        else:
+            return _invoke_function_over_columns(
+                "make_timestamp",
+                cast("ColumnOrName", years),
+                cast("ColumnOrName", months),
+                cast("ColumnOrName", days),
+                cast("ColumnOrName", hours),
+                cast("ColumnOrName", mins),
+                cast("ColumnOrName", secs),
+            )
     else:
-        return _invoke_function_over_columns(
-            "make_timestamp", years, months, days, hours, mins, secs
-        )
+        if any(arg is not None for arg in [years, months, days, hours, mins, secs]):
+            raise PySparkValueError(
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
+            )
+        if timezone is not None:
+            return _invoke_function_over_columns(
+                "make_timestamp",
+                cast("ColumnOrName", date),
+                cast("ColumnOrName", time),
+                cast("ColumnOrName", timezone),
+            )
+        else:
+            return _invoke_function_over_columns(
+                "make_timestamp", cast("ColumnOrName", date), cast("ColumnOrName", time)
+            )
 
 
 make_timestamp.__doc__ = pysparkfuncs.make_timestamp.__doc__
 
 
+@overload
 def try_make_timestamp(
     years: "ColumnOrName",
     months: "ColumnOrName",
     days: "ColumnOrName",
     hours: "ColumnOrName",
     mins: "ColumnOrName",
     secs: "ColumnOrName",
+) -> Column:
+    ...
+
+
+@overload
+def try_make_timestamp(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+    timezone: "ColumnOrName",
+) -> Column:
+    ...
+
+
+@overload
+def try_make_timestamp(*, date: "ColumnOrName", time: "ColumnOrName") -> Column:
+    ...
+
+
+@overload
+def try_make_timestamp(
+    *, date: "ColumnOrName", time: "ColumnOrName", timezone: "ColumnOrName"
+) -> Column:
+    ...
+
+
+def try_make_timestamp(
+    years: Optional["ColumnOrName"] = None,
+    months: Optional["ColumnOrName"] = None,
+    days: Optional["ColumnOrName"] = None,
+    hours: Optional["ColumnOrName"] = None,
+    mins: Optional["ColumnOrName"] = None,
+    secs: Optional["ColumnOrName"] = None,
     timezone: Optional["ColumnOrName"] = None,
+    date: Optional["ColumnOrName"] = None,
+    time: Optional["ColumnOrName"] = None,
 ) -> Column:
-    if timezone is not None:
-        return _invoke_function_over_columns(
-            "try_make_timestamp", years, months, days, hours, mins, secs, timezone
-        )
+    if years is not None:
+        if any(arg is not None for arg in [date, time]):
+            raise PySparkValueError(
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
+            )
+        if timezone is not None:
+            return _invoke_function_over_columns(
+                "try_make_timestamp",
+                cast("ColumnOrName", years),
+                cast("ColumnOrName", months),
+                cast("ColumnOrName", days),
+                cast("ColumnOrName", hours),
+                cast("ColumnOrName", mins),
+                cast("ColumnOrName", secs),
+                cast("ColumnOrName", timezone),
+            )
+        else:
+            return _invoke_function_over_columns(
+                "try_make_timestamp",
+                cast("ColumnOrName", years),
+                cast("ColumnOrName", months),
+                cast("ColumnOrName", days),
+                cast("ColumnOrName", hours),
+                cast("ColumnOrName", mins),
+                cast("ColumnOrName", secs),
+            )
     else:
-        return _invoke_function_over_columns(
-            "try_make_timestamp", years, months, days, hours, mins, secs
-        )
+        if any(arg is not None for arg in [years, months, days, hours, mins, secs]):
+            raise PySparkValueError(
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={"arg_list": "years|months|days|hours|mins|secs and date|time"},
+            )
+        if timezone is not None:
+            return _invoke_function_over_columns(
+                "try_make_timestamp",
+                cast("ColumnOrName", date),
+                cast("ColumnOrName", time),
+                cast("ColumnOrName", timezone),
+            )
+        else:
+            return _invoke_function_over_columns(
+                "try_make_timestamp", cast("ColumnOrName", date), cast("ColumnOrName", time)
+            )
 
 
 try_make_timestamp.__doc__ = pysparkfuncs.try_make_timestamp.__doc__