|
| 1 | +tumour_pattern = ( |
| 2 | + r"(?P<tumour_prefix>[cpyramP]{1,2}\s?)?" # Optional tumour prefix |
| 3 | + r"T\s?" # 'T' followed by optional space |
| 4 | + r"(?P<tumour>([0-4]|is|[Xx]))" # Tumour size (required if 'T' is present) |
| 5 | + r"(?P<tumour_specification>[abcdx]|mi)?" # Optional tumour specification |
| 6 | + r"(?:\s?\((?P<tumour_suffix>[^()]{1,10})\))?" # Optional tumour suffix |
| 7 | +) |
| 8 | + |
| 9 | +node_pattern = ( |
| 10 | + r"(?P<node_prefix>[cpyraP]{1,2}\s?)?" # Optional node prefix |
| 11 | + r"N\s?" # 'N' followed by optional space |
| 12 | + r"(?P<node>[Xx01234\+])" # Node size/status (required if 'N' is present) |
| 13 | + r"(?P<node_specification>[abcdx]|mi|sn|i[-,+]|mol[-,+]|\(mi\)|\(sn\)|" |
| 14 | + r"\(i[-,+]\)|\(mol[-,+]\)|\(\d+\s*/\s*\d+\))?" # Optional node specification |
| 15 | + r"(?:\s?\((?P<node_suffix>[^()]{1,10})\))?" # Optional node suffix |
| 16 | +) |
| 17 | + |
| 18 | +metastasis_pattern = ( |
| 19 | + r"(?P<metastasis_prefix>[cpyraP]{1,2}\s?)?" # Optional metastasis prefix |
| 20 | + r"M\s?" # 'M' followed by optional space |
| 21 | + r"(?P<metastasis>[Xx0123\+])" # Metastasis status (required if 'M' is present) |
| 22 | + r"(?P<metastasis_specification>[abcd]|i\+|mol\+|cy\+|\(i\+\)|\(mol\+\)|" |
| 23 | + r"\(cy\+\)|PUL|OSS|HEP|BRA|LYM|OTH|MAR|PLE|PER|ADR|SKI)?" # Optional specification |
| 24 | +) |
| 25 | + |
| 26 | +pleura_pattern = ( |
| 27 | + r"PL\s?(?P<pleura>([0123]|x))?" # Optional pleura status (for lung cancer) |
| 28 | +) |
| 29 | + |
| 30 | +resection_pattern = ( |
| 31 | + r"R\s?" |
| 32 | + r"(?P<resection>[Xx012])?" # Optional resection completeness |
| 33 | + r"(?P<resection_specification>(is|cy\+|\(is\)|\(cy\+\))?)?" # Optional spec |
| 34 | + r"(?P<resection_loc>(\((?P<r_loc>[a-z]+)\)[,;\s]*)*)?" # Optional localization |
| 35 | +) |
| 36 | + |
| 37 | +version_pattern = ( |
| 38 | + r"\(?(?P<version>uicc|accj|tnm|UICC|ACCJ|TNM)" # TNM version |
| 39 | + r"\s+([éeE]ditions|[éeE]d\.?)?\s*" |
| 40 | + r"(?P<version_year>\d{4}|\d{2})\)?" # Year of the version |
| 41 | +) |
| 42 | + |
| 43 | +TNM_space = r"(\s*[,\/]?\s*|\n)" # Allow space, comma, or slash as delimiters |
| 44 | + |
| 45 | +# We need te exclude pattern like 'T1', 'T2' if they are not followed by node or |
| 46 | +# metastasis sections. |
| 47 | +exclude_pattern = ( |
| 48 | + r"(?!T\s*[0-4]\s*[.,\/](?!\s*" |
| 49 | + + node_pattern |
| 50 | + + "?" |
| 51 | + + TNM_space |
| 52 | + + "?" |
| 53 | + + metastasis_pattern |
| 54 | + + "?" |
| 55 | + + "))" |
| 56 | +) |
| 57 | + |
| 58 | +tnm_pattern_new = ( |
| 59 | + r"(?:\b|^)" |
| 60 | + + exclude_pattern |
| 61 | + + r"(?:" |
| 62 | + + r"(?P<T_component>" |
| 63 | + + tumour_pattern |
| 64 | + + ")" |
| 65 | + + TNM_space |
| 66 | + + "?" |
| 67 | + + r"(?P<N_component>" |
| 68 | + + node_pattern |
| 69 | + + ")?" |
| 70 | + + TNM_space |
| 71 | + + "?" |
| 72 | + + r"(?P<M_component>" |
| 73 | + + metastasis_pattern |
| 74 | + + ")?" |
| 75 | + + TNM_space |
| 76 | + + "?" |
| 77 | + + r"(?P<PL_component>" |
| 78 | + + pleura_pattern |
| 79 | + + ")?" |
| 80 | + + TNM_space |
| 81 | + + "?" |
| 82 | + + r"(?P<R_component>" |
| 83 | + + resection_pattern |
| 84 | + + ")?" |
| 85 | + + TNM_space |
| 86 | + + "?" |
| 87 | + + r"(?P<V_component>" |
| 88 | + + version_pattern |
| 89 | + + ")?" |
| 90 | + + r")" |
| 91 | + + r"(?:\b|$|\n)" |
| 92 | +) |
0 commit comments