From 99aa242cf56de7cdc63452a9b4516f68aa207113 Mon Sep 17 00:00:00 2001 From: Daniel Otykier Date: Wed, 1 Nov 2023 14:35:27 +0100 Subject: [PATCH 1/6] Use Tokenize() instead of RegEx.IsMatch, whenever we're looking for the presence of certain DAX tokens (functions, division symbol, etc.). --- BestPracticeRules/BPARules.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/BestPracticeRules/BPARules.json b/BestPracticeRules/BPARules.json index c8aa0024..ae3df21f 100644 --- a/BestPracticeRules/BPARules.json +++ b/BestPracticeRules/BPARules.json @@ -68,7 +68,7 @@ "Description": "Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.\r\nReference: https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables/", "Severity": 2, "Scope": "CalculatedColumn", - "Expression": "RegEx.IsMatch(Expression,\"(?i)RELATED\\s*\\(\")", + "Expression": "Tokenize().Any(Type = RELATED)", "CompatibilityLevel": 1200 }, { @@ -207,7 +207,7 @@ "Description": "At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.", "Severity": 2, "Scope": "Measure, CalculationItem", - "Expression": "Model.Tables.Any(ObjectTypeName == \"Table (DirectQuery)\")\r\nand\r\n(\r\nRegEx.IsMatch(Expression,\"CLOSINGBALANCEMONTH\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"CLOSINGBALANCEQUARTER\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"CLOSINGBALANCEYEAR\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"DATEADD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"DATESBETWEEN\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"DATESINPERIOD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"DATESMTD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"DATESQTD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"DATESYTD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"ENDOFMONTH\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"ENDOFQUARTER\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"ENDOFYEAR\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"FIRSTDATE\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"FIRSTNONBLANK\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"FIRSTNONBLANKVALUE\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"LASTDATE\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"LASTNONBLANK\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"LASTNONBLANKVALUE\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"NEXTDAY\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"NEXTMONTH\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"NEXTQUARTER\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"NEXTYEAR\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"OPENINGBALANCEMONTH\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"OPENINGBALANCEQUARTER\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"OPENINGBALANCEYEAR\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"PARALLELPERIOD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"PREVIOUSDAY\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"PREVIOUSMONTH\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"PREVIOUSQUARTER\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"PREVIOUSYEAR\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"SAMEPERIODLASTYEAR\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"STARTOFMONTH\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"STARTOFQUARTER\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"STARTOFYEAR\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"TOTALMTD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"TOTALQTD\\s*\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"TOTALYTD\\s*\\(\")\r\n)", + "Expression": "Model.AllPartitions.Any(Mode = \"DirectQuery\")\r\nand\r\n(\r\n Tokenize().Any(\r\n Type == CLOSINGBALANCEMONTH ||\r\n Type == CLOSINGBALANCEQUARTER ||\r\n Type == CLOSINGBALANCEYEAR ||\r\n Type == DATEADD ||\r\n Type == DATESBETWEEN ||\r\n Type == DATESINPERIOD ||\r\n Type == DATESMTD ||\r\n Type == DATESQTD ||\r\n Type == DATESYTD ||\r\n Type == ENDOFMONTH ||\r\n Type == ENDOFQUARTER ||\r\n Type == ENDOFYEAR ||\r\n Type == FIRSTDATE ||\r\n Type == FIRSTNONBLANK ||\r\n Type == FIRSTNONBLANKVALUE ||\r\n Type == LASTDATE ||\r\n Type == LASTNONBLANK ||\r\n Type == LASTNONBLANKVALUE ||\r\n Type == NEXTDAY ||\r\n Type == NEXTMONTH ||\r\n Type == NEXTQUARTER ||\r\n Type == NEXTYEAR ||\r\n Type == OPENINGBALANCEMONTH ||\r\n Type == OPENINGBALANCEQUARTER ||\r\n Type == OPENINGBALANCEYEAR ||\r\n Type == PARALLELPERIOD ||\r\n Type == PREVIOUSDAY ||\r\n Type == PREVIOUSMONTH ||\r\n Type == PREVIOUSQUARTER ||\r\n Type == PREVIOUSYEAR ||\r\n Type == SAMEPERIODLASTYEAR ||\r\n Type == STARTOFMONTH ||\r\n Type == STARTOFQUARTER ||\r\n Type == STARTOFYEAR ||\r\n Type == TOTALMTD ||\r\n Type == TOTALQTD ||\r\n Type == TOTALYTD\r\n )\r\n)", "CompatibilityLevel": 1200 }, { @@ -237,7 +237,7 @@ "Description": "Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.\r\nReference: https://docs.microsoft.com/power-bi/admin/service-admin-rls", "Severity": 1, "Scope": "TablePermission", - "Expression": "RegEx.IsMatch(Expression,\"(?i)USERNAME\\(\")\r\nor\r\nRegEx.IsMatch(Expression,\"(?i)USERPRINCIPALNAME\\(\")", + "Expression": "Tokenize().Any(Type = USERNAME or Type = USERPRINCIPALNAME)", "CompatibilityLevel": 1200 }, { @@ -277,7 +277,7 @@ "Description": "The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.\r\nReference: https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax/", "Severity": 2, "Scope": "Measure, CalculationItem", - "Expression": "RegEx.IsMatch(Expression,\"(?i)INTERSECT\\s*\\(\")", + "Expression": "Tokenize().Any(Type = INTERSECT)", "CompatibilityLevel": 1400 }, { @@ -287,7 +287,7 @@ "Description": "Use the DIVIDE function instead of using \"/\". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.\r\n\r\nReference: https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator", "Severity": 2, "Scope": "Measure, CalculatedColumn, CalculationItem", - "Expression": "RegEx.IsMatch(Expression,\"\\]\\s*\\/(?!\\/)(?!\\*)\")\r\nor\r\nRegEx.IsMatch(Expression,\"\\)\\s*\\/(?!\\/)(?!\\*)\")", + "Expression": "Tokenize().Any(Type = DIV)", "CompatibilityLevel": 1200 }, { @@ -701,7 +701,7 @@ "Description": "It is a best practice to hide fact table columns that are used for aggregation in measures.", "Severity": 2, "Scope": "DataColumn, CalculatedColumn, CalculatedTableColumn", - "Expression": "(\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)COUNT\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)COUNTBLANK\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)SUM\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)AVERAGE\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)VALUES\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)DISTINCT\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)DISTINCTCOUNT\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\n\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)MIN\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\n\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)MAX\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)COUNTA\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\n\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)AVERAGEA\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)MAXA\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n\nor\r\nReferencedBy.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)MINA\\s*\\(\\s*\\'*\" + outerit.Table.Name + \"\\'*\\[\" + outerit.Name + \"\\]\\s*\\)\"))\r\n)\r\n\nand IsHidden == false\r\n\nand (DataType == \"Int64\" || DataType == \"Decimal\" || DataType == \"Double\")", + "Expression": "ReferencedBy.AllMeasures.Any(\r\n Tokenize().Count(not CommentOrWhitespace) <= 5 and\r\n Tokenize().Any(\r\n Type = COUNT or\r\n Type = COUNTBLANK or\r\n Type = SUM or\r\n Type = AVERAGE or\r\n Type = VALUES or\r\n Type = DISTINCT or\r\n Type = DISTINCTCOUNT or\r\n Type = MIN or\r\n Type = MAX or\r\n Type = COUNTA or\r\n Type = AVERAGEA or\r\n Type = MAXA or\r\n Type = MINA\r\n )\r\n)\r\nand IsHidden == false\r\nand (DataType == \"Int64\" || DataType == \"Decimal\" || DataType == \"Double\")", "FixExpression": "IsHidden = true", "CompatibilityLevel": 1200 }, From da93feda4d55808df94371eae8afb6942f395b88 Mon Sep 17 00:00:00 2001 From: Daniel Otykier Date: Wed, 1 Nov 2023 14:36:17 +0100 Subject: [PATCH 2/6] RLS-specific rules should use the "TablePermission" scope. Also, avoid RegEx.IsMatch when possible. --- BestPracticeRules/BPARules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BestPracticeRules/BPARules.json b/BestPracticeRules/BPARules.json index ae3df21f..65db55be 100644 --- a/BestPracticeRules/BPARules.json +++ b/BestPracticeRules/BPARules.json @@ -127,8 +127,8 @@ "Category": "Performance", "Description": "Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).", "Severity": 2, - "Scope": "Table, CalculatedTable", - "Expression": "RowLevelSecurity.Any(RegEx.IsMatch(it.Replace(\" \",\"\"),\"(?i)RIGHT\\s*\\(\"))\r\nor\r\nRowLevelSecurity.Any(RegEx.IsMatch(it.Replace(\" \",\"\"),\"(?i)LEFT\\s*\\(\"))\r\nor\r\nRowLevelSecurity.Any(RegEx.IsMatch(it.Replace(\" \",\"\"),\"(?i)UPPER\\s*\\(\"))\r\nor\r\nRowLevelSecurity.Any(RegEx.IsMatch(it.Replace(\" \",\"\"),\"(?i)LOWER\\s*\\(\"))\r\nor\r\nRowLevelSecurity.Any(RegEx.IsMatch(it.Replace(\" \",\"\"),\"(?i)FIND\\s*\\(\"))\r\n", + "Scope": "TablePermission", + "Expression": "Tokenize().Any(\r\n Type = RIGHT or\r\n Type = LEFT or\r\n Type = UPPER or\r\n Type = LOWER or\r\n Type = FIND or\r\n Type = MID\r\n)", "CompatibilityLevel": 1200 }, { From 3b3ef89ad3895dd2806731591158e3dba1b4d500 Mon Sep 17 00:00:00 2001 From: Daniel Otykier Date: Wed, 1 Nov 2023 14:37:19 +0100 Subject: [PATCH 3/6] Use RegEx.Escape to account for table names containing special characters. Also, fixed RegEx expression to be less strict when detecting if the current table is being referenced within USERELATIONSHIP. --- BestPracticeRules/BPARules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BestPracticeRules/BPARules.json b/BestPracticeRules/BPARules.json index 65db55be..a1459a2c 100644 --- a/BestPracticeRules/BPARules.json +++ b/BestPracticeRules/BPARules.json @@ -397,7 +397,7 @@ "Description": "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.\r\n\r\nReference: https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security/", "Severity": 3, "Scope": "Table, CalculatedTable", - "Expression": "Model.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)USERELATIONSHIP\\s*\\(\\s*.+?(?=])\\]\\s*,\\s*'*\" + current.Name + \"'*\\[\"))\r\nand\r\nRowLevelSecurity.Any(it <> null)", + "Expression": "Model.AllMeasures.Any(RegEx.IsMatch(Expression,\"(?i)USERELATIONSHIP\\s*\\([^)]*\" + RegEx.Escape(current.Name) + \"('?\\[)[^)]*\\)\"))\r\nand\r\nRowLevelSecurity.Any(it <> null)", "CompatibilityLevel": 1200 }, { @@ -440,7 +440,7 @@ "Severity": 3, "Scope": "DataColumn, CalculatedColumn, CalculatedTableColumn", "Expression": "IsAvailableInMDX = false\r\n\r\nand\r\n(\r\nUsedInSortBy.Any()\r\nor\r\nUsedInHierarchies.Any()\r\nor\r\nUsedInVariations.Any()\r\nor\r\nSortByColumn != null\r\n)", - "FixExpression": "IsAvailableInMDX = true", + "FixExpression": "IsAvailableInMDX = true", "CompatibilityLevel": 1200 }, { From 53804099468a38cd18e0d750b78080b58ee117e8 Mon Sep 17 00:00:00 2001 From: Daniel Otykier Date: Wed, 1 Nov 2023 14:42:13 +0100 Subject: [PATCH 4/6] Use Tokenize() instead of RegEx.IsMatch --- BestPracticeRules/BPARules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BestPracticeRules/BPARules.json b/BestPracticeRules/BPARules.json index a1459a2c..1f4707dd 100644 --- a/BestPracticeRules/BPARules.json +++ b/BestPracticeRules/BPARules.json @@ -297,7 +297,7 @@ "Description": "Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).\r\nReference: https://www.elegantbi.com/post/top10bestpractices", "Severity": 2, "Scope": "Measure, CalculatedColumn", - "Expression": "RegEx.IsMatch(Expression,\"(?i)IFERROR\\s*\\(\")", + "Expression": "Tokenize().Any(Type = IFERROR)", "CompatibilityLevel": 1200 }, { From 74b1c864a4ac9fb2884f9ed1f17ba0b9822026cb Mon Sep 17 00:00:00 2001 From: Daniel Otykier Date: Wed, 1 Nov 2023 14:42:56 +0100 Subject: [PATCH 5/6] Escape table/column names before constructing a regex pattern, to avoid errors when an object name contains special characters --- BestPracticeRules/BPARules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BestPracticeRules/BPARules.json b/BestPracticeRules/BPARules.json index 1f4707dd..ffb121e2 100644 --- a/BestPracticeRules/BPARules.json +++ b/BestPracticeRules/BPARules.json @@ -337,7 +337,7 @@ "Description": "Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.\r\n\r\nReference: https://docs.microsoft.com/power-bi/guidance/relationships-active-inactive\r\nReference: https://dax.guide/userelationship/", "Severity": 2, "Scope": "Relationship", - "Expression": "IsActive == false\r\nand not\r\n(\r\nModel.AllMeasures.Any(RegEx.IsMatch(Expression,\r\n\"(?i)USERELATIONSHIP\\s*\\(\\s*\\'*\" +\r\ncurrent.FromTable.Name + \"\\'*\\[\" + \r\ncurrent.FromColumn.Name + \"\\]\\s*,\\s*\\'*\" +\r\ncurrent.ToTable.Name + \"\\'*\\[\" +\r\ncurrent.ToColumn.Name + \"\\]\"))\r\nor\r\nModel.AllCalculationItems.Any(RegEx.IsMatch(Expression,\r\n\"(?i)USERELATIONSHIP\\s*\\(\\s*\\'*\" +\r\ncurrent.FromTable.Name + \"\\'*\\[\" + \r\ncurrent.FromColumn.Name + \"\\]\\s*,\\s*\\'*\" +\r\ncurrent.ToTable.Name + \"\\'*\\[\" +\r\ncurrent.ToColumn.Name + \"\\]\"))\r\n)", + "Expression": "IsActive == false\r\nand not\r\n(\r\nModel.AllMeasures.Any(RegEx.IsMatch(Expression,\r\n\"(?i)USERELATIONSHIP\\s*\\(\\s*\\'*\" +\r\nRegEx.Escape(current.FromTable.Name) + \"\\'*\\[\" + \r\nRegEx.Escape(current.FromColumn.Name) + \"\\]\\s*,\\s*\\'*\" +\r\nRegEx.Escape(current.ToTable.Name) + \"\\'*\\[\" +\r\nRegEx.Escape(current.ToColumn.Name) + \"\\]\"))\r\nor\r\nModel.AllCalculationItems.Any(RegEx.IsMatch(Expression,\r\n\"(?i)USERELATIONSHIP\\s*\\(\\s*\\'*\" +\r\nRegEx.Escape(current.FromTable.Name) + \"\\'*\\[\" + \r\nRegEx.Escape(current.FromColumn.Name) + \"\\]\\s*,\\s*\\'*\" +\r\nRegEx.Escape(current.ToTable.Name) + \"\\'*\\[\" +\r\nRegEx.Escape(current.ToColumn.Name) + \"\\]\"))\r\n)", "CompatibilityLevel": 1200 }, { From d32963974fbf89d9208d01853af9f377a6a1ba14 Mon Sep 17 00:00:00 2001 From: Daniel Otykier Date: Wed, 1 Nov 2023 14:43:08 +0100 Subject: [PATCH 6/6] Use Tokenize() instead of RegEx.IsMatch --- BestPracticeRules/BPARules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BestPracticeRules/BPARules.json b/BestPracticeRules/BPARules.json index ffb121e2..ef172d76 100644 --- a/BestPracticeRules/BPARules.json +++ b/BestPracticeRules/BPARules.json @@ -357,7 +357,7 @@ "Description": "The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.\r\n\r\nReference: https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function/", "Severity": 1, "Scope": "Measure", - "Expression": "RegEx.IsMatch(Expression,\"(?i)EVALUATEANDLOG\\s*\\(\")", + "Expression": "Tokenize().Any(Type = EVALUATEANDLOG)", "CompatibilityLevel": 1200 }, {