-
Notifications
You must be signed in to change notification settings - Fork 1k
PHOENIX-7705 Support for a row size function #2292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7e09907
72cfa09
004427a
9199d9e
ad1a5de
a37c287
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.phoenix.expression.function; | ||
|
|
||
| import java.util.List; | ||
| import org.apache.phoenix.expression.Expression; | ||
| import org.apache.phoenix.parse.FunctionParseNode.BuiltInFunction; | ||
| import org.apache.phoenix.parse.RawRowSizeParseNode; | ||
|
|
||
| /** | ||
| * Function to return the total size of the all HBase cells versions and delete markers for a given | ||
| * row | ||
| */ | ||
| @BuiltInFunction(name = RawRowSizeFunction.NAME, nodeClass = RawRowSizeParseNode.class, args = {}) | ||
| public class RawRowSizeFunction extends RowSizeFunction { | ||
|
|
||
| public static final String NAME = "RAW_ROW_SIZE"; | ||
|
|
||
| public RawRowSizeFunction() { | ||
| } | ||
|
|
||
| public RawRowSizeFunction(List<Expression> children) { | ||
| super(children); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.phoenix.expression.function; | ||
|
|
||
| import java.util.List; | ||
| import org.apache.hadoop.hbase.io.ImmutableBytesWritable; | ||
| import org.apache.phoenix.expression.Determinism; | ||
| import org.apache.phoenix.expression.Expression; | ||
| import org.apache.phoenix.parse.FunctionParseNode.BuiltInFunction; | ||
| import org.apache.phoenix.parse.RowSizeParseNode; | ||
| import org.apache.phoenix.schema.tuple.Tuple; | ||
| import org.apache.phoenix.schema.types.PDataType; | ||
| import org.apache.phoenix.schema.types.PUnsignedLong; | ||
|
|
||
| /** | ||
| * Function to return the total size of the HBase cells that constitute a given row | ||
| */ | ||
| @BuiltInFunction(name = RowSizeFunction.NAME, nodeClass = RowSizeParseNode.class, args = {}) | ||
| public class RowSizeFunction extends ScalarFunction { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are the scalar function evaluated on server side as well (or only on client side) ? If it's client side, then we need to fetch the whole row back to client for size computation ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It can be evaluated on the client size to check if the where clause evaluates to true on an empty tuple and, or when it is specified as a top level expression node in a select clause. This PR does not allow the row_size function to be a top level node in a select clause. In this PR, a row is never returned to the client; only its size is returned as part of an aggregation function result. |
||
|
|
||
| public static final String NAME = "ROW_SIZE"; | ||
|
|
||
| public RowSizeFunction() { | ||
| } | ||
|
|
||
| public RowSizeFunction(List<Expression> children) { | ||
| super(children); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) { | ||
| if (tuple == null) { | ||
| return false; | ||
| } | ||
| long size = 0; | ||
| for (int i = 0; i < tuple.size(); i++) { | ||
| size += tuple.getValue(i).getSerializedSize(); | ||
| } | ||
| ptr.set(PUnsignedLong.INSTANCE.toBytes(size)); | ||
| return true; | ||
| } | ||
|
|
||
| @Override | ||
| public PDataType getDataType() { | ||
| return PUnsignedLong.INSTANCE; | ||
| } | ||
|
|
||
| @Override | ||
| public String getName() { | ||
| return NAME; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean isStateless() { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Determinism getDeterminism() { | ||
| return Determinism.PER_ROW; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean isRowLevel() { | ||
| return true; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.phoenix.filter; | ||
|
|
||
| import java.io.DataInput; | ||
| import java.io.DataOutput; | ||
| import java.io.IOException; | ||
| import java.util.List; | ||
| import org.apache.hadoop.hbase.Cell; | ||
| import org.apache.hadoop.hbase.exceptions.DeserializationException; | ||
| import org.apache.hadoop.hbase.util.Writables; | ||
| import org.apache.phoenix.expression.Expression; | ||
| import org.apache.phoenix.schema.tuple.MultiKeyValueTuple; | ||
| import org.apache.phoenix.schema.tuple.Tuple; | ||
|
|
||
| /** | ||
| * Filter used when expressions reference to the entire row | ||
| */ | ||
| public class RowLevelFilter extends BooleanExpressionFilter { | ||
| private boolean allVersions = false; | ||
| private boolean keepRow = false; | ||
|
|
||
| public RowLevelFilter() { | ||
| } | ||
|
|
||
| public RowLevelFilter(Expression expression, boolean allVersions) { | ||
| super(expression); | ||
| this.allVersions = allVersions; | ||
| } | ||
|
|
||
| @Override | ||
| public void reset() { | ||
| super.reset(); | ||
| keepRow = false; | ||
| } | ||
|
|
||
| // No @Override for HBase 3 compatibility | ||
| public ReturnCode filterKeyValue(Cell v) { | ||
| return filterCell(v); | ||
| } | ||
|
|
||
| @Override | ||
| public ReturnCode filterCell(Cell v) { | ||
| return allVersions ? ReturnCode.INCLUDE : ReturnCode.INCLUDE_AND_NEXT_COL; | ||
| } | ||
|
|
||
| @Override | ||
| public void filterRowCells(List<Cell> kvs) throws IOException { | ||
| Tuple tuple = new MultiKeyValueTuple(); | ||
| tuple.setKeyValues(kvs); | ||
| keepRow = Boolean.TRUE.equals(evaluate(tuple)); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean filterRow() { | ||
| return !this.keepRow; | ||
| } | ||
|
|
||
| @Override | ||
| public void readFields(DataInput input) throws IOException { | ||
| super.readFields(input); | ||
| allVersions = input.readBoolean(); | ||
| } | ||
|
|
||
| @Override | ||
| public void write(DataOutput output) throws IOException { | ||
| super.write(output); | ||
| output.writeBoolean(allVersions); | ||
| } | ||
|
|
||
| public static RowLevelFilter parseFrom(final byte[] pbBytes) throws DeserializationException { | ||
| try { | ||
| return (RowLevelFilter) Writables.getWritable(pbBytes, new RowLevelFilter()); | ||
| } catch (IOException e) { | ||
| throw new DeserializationException(e); | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not clear with the usage. Does it mean we can't use query where row_size needs to be fetched for each row (e.g.
select row_size() from tableorselect row_size() from table group by tenant_id)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please see the exception message and also the row size test to see how to get individual row sizes.