diff --git a/codecov.yml b/codecov.yml index 4daa1dd8..3ac41467 100644 --- a/codecov.yml +++ b/codecov.yml @@ -2,7 +2,7 @@ coverage: status: project: default: - target: 70% # TODO: switch back to auto + target: 68% # TODO: switch back to auto threshold: 1% # the leniency in hitting the target patch: default: diff --git a/poetry.lock b/poetry.lock index cd2b795c..009d8624 100644 --- a/poetry.lock +++ b/poetry.lock @@ -346,100 +346,64 @@ markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win [[package]] name = "coverage" -version = "7.10.1" +version = "7.5.2" description = "Code coverage measurement for Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "coverage-7.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1c86eb388bbd609d15560e7cc0eb936c102b6f43f31cf3e58b4fd9afe28e1372"}, - {file = "coverage-7.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b4ba0f488c1bdb6bd9ba81da50715a372119785458831c73428a8566253b86b"}, - {file = "coverage-7.10.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:083442ecf97d434f0cb3b3e3676584443182653da08b42e965326ba12d6b5f2a"}, - {file = "coverage-7.10.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c1a40c486041006b135759f59189385da7c66d239bad897c994e18fd1d0c128f"}, - {file = "coverage-7.10.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3beb76e20b28046989300c4ea81bf690df84ee98ade4dc0bbbf774a28eb98440"}, - {file = "coverage-7.10.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bc265a7945e8d08da28999ad02b544963f813a00f3ed0a7a0ce4165fd77629f8"}, - {file = "coverage-7.10.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:47c91f32ba4ac46f1e224a7ebf3f98b4b24335bad16137737fe71a5961a0665c"}, - {file = "coverage-7.10.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1a108dd78ed185020f66f131c60078f3fae3f61646c28c8bb4edd3fa121fc7fc"}, - {file = "coverage-7.10.1-cp310-cp310-win32.whl", hash = "sha256:7092cc82382e634075cc0255b0b69cb7cada7c1f249070ace6a95cb0f13548ef"}, - {file = "coverage-7.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:ac0c5bba938879c2fc0bc6c1b47311b5ad1212a9dcb8b40fe2c8110239b7faed"}, - {file = "coverage-7.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b45e2f9d5b0b5c1977cb4feb5f594be60eb121106f8900348e29331f553a726f"}, - {file = "coverage-7.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a7a4d74cb0f5e3334f9aa26af7016ddb94fb4bfa11b4a573d8e98ecba8c34f1"}, - {file = "coverage-7.10.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d4b0aab55ad60ead26159ff12b538c85fbab731a5e3411c642b46c3525863437"}, - {file = "coverage-7.10.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dcc93488c9ebd229be6ee1f0d9aad90da97b33ad7e2912f5495804d78a3cd6b7"}, - {file = "coverage-7.10.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa309df995d020f3438407081b51ff527171cca6772b33cf8f85344b8b4b8770"}, - {file = "coverage-7.10.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cfb8b9d8855c8608f9747602a48ab525b1d320ecf0113994f6df23160af68262"}, - {file = "coverage-7.10.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:320d86da829b012982b414c7cdda65f5d358d63f764e0e4e54b33097646f39a3"}, - {file = "coverage-7.10.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dc60ddd483c556590da1d9482a4518292eec36dd0e1e8496966759a1f282bcd0"}, - {file = "coverage-7.10.1-cp311-cp311-win32.whl", hash = "sha256:4fcfe294f95b44e4754da5b58be750396f2b1caca8f9a0e78588e3ef85f8b8be"}, - {file = "coverage-7.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:efa23166da3fe2915f8ab452dde40319ac84dc357f635737174a08dbd912980c"}, - {file = "coverage-7.10.1-cp311-cp311-win_arm64.whl", hash = "sha256:d12b15a8c3759e2bb580ffa423ae54be4f184cf23beffcbd641f4fe6e1584293"}, - {file = "coverage-7.10.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6b7dc7f0a75a7eaa4584e5843c873c561b12602439d2351ee28c7478186c4da4"}, - {file = "coverage-7.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:607f82389f0ecafc565813aa201a5cade04f897603750028dd660fb01797265e"}, - {file = "coverage-7.10.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f7da31a1ba31f1c1d4d5044b7c5813878adae1f3af8f4052d679cc493c7328f4"}, - {file = "coverage-7.10.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:51fe93f3fe4f5d8483d51072fddc65e717a175490804e1942c975a68e04bf97a"}, - {file = "coverage-7.10.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3e59d00830da411a1feef6ac828b90bbf74c9b6a8e87b8ca37964925bba76dbe"}, - {file = "coverage-7.10.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:924563481c27941229cb4e16eefacc35da28563e80791b3ddc5597b062a5c386"}, - {file = "coverage-7.10.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ca79146ee421b259f8131f153102220b84d1a5e6fb9c8aed13b3badfd1796de6"}, - {file = "coverage-7.10.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2b225a06d227f23f386fdc0eab471506d9e644be699424814acc7d114595495f"}, - {file = "coverage-7.10.1-cp312-cp312-win32.whl", hash = "sha256:5ba9a8770effec5baaaab1567be916c87d8eea0c9ad11253722d86874d885eca"}, - {file = "coverage-7.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:9eb245a8d8dd0ad73b4062135a251ec55086fbc2c42e0eb9725a9b553fba18a3"}, - {file = "coverage-7.10.1-cp312-cp312-win_arm64.whl", hash = "sha256:7718060dd4434cc719803a5e526838a5d66e4efa5dc46d2b25c21965a9c6fcc4"}, - {file = "coverage-7.10.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ebb08d0867c5a25dffa4823377292a0ffd7aaafb218b5d4e2e106378b1061e39"}, - {file = "coverage-7.10.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f32a95a83c2e17422f67af922a89422cd24c6fa94041f083dd0bb4f6057d0bc7"}, - {file = "coverage-7.10.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c4c746d11c8aba4b9f58ca8bfc6fbfd0da4efe7960ae5540d1a1b13655ee8892"}, - {file = "coverage-7.10.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7f39edd52c23e5c7ed94e0e4bf088928029edf86ef10b95413e5ea670c5e92d7"}, - {file = "coverage-7.10.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab6e19b684981d0cd968906e293d5628e89faacb27977c92f3600b201926b994"}, - {file = "coverage-7.10.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5121d8cf0eacb16133501455d216bb5f99899ae2f52d394fe45d59229e6611d0"}, - {file = "coverage-7.10.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df1c742ca6f46a6f6cbcaef9ac694dc2cb1260d30a6a2f5c68c5f5bcfee1cfd7"}, - {file = "coverage-7.10.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:40f9a38676f9c073bf4b9194707aa1eb97dca0e22cc3766d83879d72500132c7"}, - {file = "coverage-7.10.1-cp313-cp313-win32.whl", hash = "sha256:2348631f049e884839553b9974f0821d39241c6ffb01a418efce434f7eba0fe7"}, - {file = "coverage-7.10.1-cp313-cp313-win_amd64.whl", hash = "sha256:4072b31361b0d6d23f750c524f694e1a417c1220a30d3ef02741eed28520c48e"}, - {file = "coverage-7.10.1-cp313-cp313-win_arm64.whl", hash = "sha256:3e31dfb8271937cab9425f19259b1b1d1f556790e98eb266009e7a61d337b6d4"}, - {file = "coverage-7.10.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1c4f679c6b573a5257af6012f167a45be4c749c9925fd44d5178fd641ad8bf72"}, - {file = "coverage-7.10.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:871ebe8143da284bd77b84a9136200bd638be253618765d21a1fce71006d94af"}, - {file = "coverage-7.10.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:998c4751dabf7d29b30594af416e4bf5091f11f92a8d88eb1512c7ba136d1ed7"}, - {file = "coverage-7.10.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:780f750a25e7749d0af6b3631759c2c14f45de209f3faaa2398312d1c7a22759"}, - {file = "coverage-7.10.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:590bdba9445df4763bdbebc928d8182f094c1f3947a8dc0fc82ef014dbdd8324"}, - {file = "coverage-7.10.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b2df80cb6a2af86d300e70acb82e9b79dab2c1e6971e44b78dbfc1a1e736b53"}, - {file = "coverage-7.10.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d6a558c2725bfb6337bf57c1cd366c13798bfd3bfc9e3dd1f4a6f6fc95a4605f"}, - {file = "coverage-7.10.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e6150d167f32f2a54690e572e0a4c90296fb000a18e9b26ab81a6489e24e78dd"}, - {file = "coverage-7.10.1-cp313-cp313t-win32.whl", hash = "sha256:d946a0c067aa88be4a593aad1236493313bafaa27e2a2080bfe88db827972f3c"}, - {file = "coverage-7.10.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e37c72eaccdd5ed1130c67a92ad38f5b2af66eeff7b0abe29534225db2ef7b18"}, - {file = "coverage-7.10.1-cp313-cp313t-win_arm64.whl", hash = "sha256:89ec0ffc215c590c732918c95cd02b55c7d0f569d76b90bb1a5e78aa340618e4"}, - {file = "coverage-7.10.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:166d89c57e877e93d8827dac32cedae6b0277ca684c6511497311249f35a280c"}, - {file = "coverage-7.10.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bed4a2341b33cd1a7d9ffc47df4a78ee61d3416d43b4adc9e18b7d266650b83e"}, - {file = "coverage-7.10.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ddca1e4f5f4c67980533df01430184c19b5359900e080248bbf4ed6789584d8b"}, - {file = "coverage-7.10.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:37b69226001d8b7de7126cad7366b0778d36777e4d788c66991455ba817c5b41"}, - {file = "coverage-7.10.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b2f22102197bcb1722691296f9e589f02b616f874e54a209284dd7b9294b0b7f"}, - {file = "coverage-7.10.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1e0c768b0f9ac5839dac5cf88992a4bb459e488ee8a1f8489af4cb33b1af00f1"}, - {file = "coverage-7.10.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:991196702d5e0b120a8fef2664e1b9c333a81d36d5f6bcf6b225c0cf8b0451a2"}, - {file = "coverage-7.10.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae8e59e5f4fd85d6ad34c2bb9d74037b5b11be072b8b7e9986beb11f957573d4"}, - {file = "coverage-7.10.1-cp314-cp314-win32.whl", hash = "sha256:042125c89cf74a074984002e165d61fe0e31c7bd40ebb4bbebf07939b5924613"}, - {file = "coverage-7.10.1-cp314-cp314-win_amd64.whl", hash = "sha256:a22c3bfe09f7a530e2c94c87ff7af867259c91bef87ed2089cd69b783af7b84e"}, - {file = "coverage-7.10.1-cp314-cp314-win_arm64.whl", hash = "sha256:ee6be07af68d9c4fca4027c70cea0c31a0f1bc9cb464ff3c84a1f916bf82e652"}, - {file = "coverage-7.10.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d24fb3c0c8ff0d517c5ca5de7cf3994a4cd559cde0315201511dbfa7ab528894"}, - {file = "coverage-7.10.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1217a54cfd79be20512a67ca81c7da3f2163f51bbfd188aab91054df012154f5"}, - {file = "coverage-7.10.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:51f30da7a52c009667e02f125737229d7d8044ad84b79db454308033a7808ab2"}, - {file = "coverage-7.10.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ed3718c757c82d920f1c94089066225ca2ad7f00bb904cb72b1c39ebdd906ccb"}, - {file = "coverage-7.10.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc452481e124a819ced0c25412ea2e144269ef2f2534b862d9f6a9dae4bda17b"}, - {file = "coverage-7.10.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9d6f494c307e5cb9b1e052ec1a471060f1dea092c8116e642e7a23e79d9388ea"}, - {file = "coverage-7.10.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:fc0e46d86905ddd16b85991f1f4919028092b4e511689bbdaff0876bd8aab3dd"}, - {file = "coverage-7.10.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80b9ccd82e30038b61fc9a692a8dc4801504689651b281ed9109f10cc9fe8b4d"}, - {file = "coverage-7.10.1-cp314-cp314t-win32.whl", hash = "sha256:e58991a2b213417285ec866d3cd32db17a6a88061a985dbb7e8e8f13af429c47"}, - {file = "coverage-7.10.1-cp314-cp314t-win_amd64.whl", hash = "sha256:e88dd71e4ecbc49d9d57d064117462c43f40a21a1383507811cf834a4a620651"}, - {file = "coverage-7.10.1-cp314-cp314t-win_arm64.whl", hash = "sha256:1aadfb06a30c62c2eb82322171fe1f7c288c80ca4156d46af0ca039052814bab"}, - {file = "coverage-7.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:57b6e8789cbefdef0667e4a94f8ffa40f9402cee5fc3b8e4274c894737890145"}, - {file = "coverage-7.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:85b22a9cce00cb03156334da67eb86e29f22b5e93876d0dd6a98646bb8a74e53"}, - {file = "coverage-7.10.1-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:97b6983a2f9c76d345ca395e843a049390b39652984e4a3b45b2442fa733992d"}, - {file = "coverage-7.10.1-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ddf2a63b91399a1c2f88f40bc1705d5a7777e31c7e9eb27c602280f477b582ba"}, - {file = "coverage-7.10.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47ab6dbbc31a14c5486420c2c1077fcae692097f673cf5be9ddbec8cdaa4cdbc"}, - {file = "coverage-7.10.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:21eb7d8b45d3700e7c2936a736f732794c47615a20f739f4133d5230a6512a88"}, - {file = "coverage-7.10.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:283005bb4d98ae33e45f2861cd2cde6a21878661c9ad49697f6951b358a0379b"}, - {file = "coverage-7.10.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fefe31d61d02a8b2c419700b1fade9784a43d726de26495f243b663cd9fe1513"}, - {file = "coverage-7.10.1-cp39-cp39-win32.whl", hash = "sha256:e8ab8e4c7ec7f8a55ac05b5b715a051d74eac62511c6d96d5bb79aaafa3b04cf"}, - {file = "coverage-7.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:c36baa0ecde742784aa76c2b816466d3ea888d5297fda0edbac1bf48fa94688a"}, - {file = "coverage-7.10.1-py3-none-any.whl", hash = "sha256:fa2a258aa6bf188eb9a8948f7102a83da7c430a0dce918dbd8b60ef8fcb772d7"}, - {file = "coverage-7.10.1.tar.gz", hash = "sha256:ae2b4856f29ddfe827106794f3589949a57da6f0d38ab01e24ec35107979ba57"}, + {file = "coverage-7.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:554c7327bf0fd688050348e22db7c8e163fb7219f3ecdd4732d7ed606b417263"}, + {file = "coverage-7.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d0305e02e40c7cfea5d08d6368576537a74c0eea62b77633179748d3519d6705"}, + {file = "coverage-7.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:829fb55ad437d757c70d5b1c51cfda9377f31506a0a3f3ac282bc6a387d6a5f1"}, + {file = "coverage-7.5.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:894b1acded706f1407a662d08e026bfd0ff1e59e9bd32062fea9d862564cfb65"}, + {file = "coverage-7.5.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe76d6dee5e4febefa83998b17926df3a04e5089e3d2b1688c74a9157798d7a2"}, + {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c7ebf2a37e4f5fea3c1a11e1f47cea7d75d0f2d8ef69635ddbd5c927083211fc"}, + {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20e611fc36e1a0fc7bbf957ef9c635c8807d71fbe5643e51b2769b3cc0fb0b51"}, + {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7c5c5b7ae2763533152880d5b5b451acbc1089ade2336b710a24b2b0f5239d20"}, + {file = "coverage-7.5.2-cp310-cp310-win32.whl", hash = "sha256:1e4225990a87df898e40ca31c9e830c15c2c53b1d33df592bc8ef314d71f0281"}, + {file = "coverage-7.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:976cd92d9420e6e2aa6ce6a9d61f2b490e07cb468968adf371546b33b829284b"}, + {file = "coverage-7.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5997d418c219dcd4dcba64e50671cca849aaf0dac3d7a2eeeb7d651a5bd735b8"}, + {file = "coverage-7.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec27e93bbf5976f0465e8936f02eb5add99bbe4e4e7b233607e4d7622912d68d"}, + {file = "coverage-7.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f11f98753800eb1ec872562a398081f6695f91cd01ce39819e36621003ec52a"}, + {file = "coverage-7.5.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e34680049eecb30b6498784c9637c1c74277dcb1db75649a152f8004fbd6646"}, + {file = "coverage-7.5.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e12536446ad4527ac8ed91d8a607813085683bcce27af69e3b31cd72b3c5960"}, + {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3d3f7744b8a8079d69af69d512e5abed4fb473057625588ce126088e50d05493"}, + {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:431a3917e32223fcdb90b79fe60185864a9109631ebc05f6c5aa03781a00b513"}, + {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a7c6574225f34ce45466f04751d957b5c5e6b69fca9351db017c9249786172ce"}, + {file = "coverage-7.5.2-cp311-cp311-win32.whl", hash = "sha256:2b144d142ec9987276aeff1326edbc0df8ba4afbd7232f0ca10ad57a115e95b6"}, + {file = "coverage-7.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:900532713115ac58bc3491b9d2b52704a05ed408ba0918d57fd72c94bc47fba1"}, + {file = "coverage-7.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9a42970ce74c88bdf144df11c52c5cf4ad610d860de87c0883385a1c9d9fa4ab"}, + {file = "coverage-7.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26716a1118c6ce2188283b4b60a898c3be29b480acbd0a91446ced4fe4e780d8"}, + {file = "coverage-7.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60b66b0363c5a2a79fba3d1cd7430c25bbd92c923d031cae906bdcb6e054d9a2"}, + {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d22eba19273b2069e4efeff88c897a26bdc64633cbe0357a198f92dca94268"}, + {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb5b92a0ab3d22dfdbfe845e2fef92717b067bdf41a5b68c7e3e857c0cff1a4"}, + {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1aef719b6559b521ae913ddeb38f5048c6d1a3d366865e8b320270b7bc4693c2"}, + {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8809c0ea0e8454f756e3bd5c36d04dddf222989216788a25bfd6724bfcee342c"}, + {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1acc2e2ef098a1d4bf535758085f508097316d738101a97c3f996bccba963ea5"}, + {file = "coverage-7.5.2-cp312-cp312-win32.whl", hash = "sha256:97de509043d3f0f2b2cd171bdccf408f175c7f7a99d36d566b1ae4dd84107985"}, + {file = "coverage-7.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:8941e35a0e991a7a20a1fa3e3182f82abe357211f2c335a9e6007067c3392fcf"}, + {file = "coverage-7.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5662bf0f6fb6757f5c2d6279c541a5af55a39772c2362ed0920b27e3ce0e21f7"}, + {file = "coverage-7.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d9c62cff2ffb4c2a95328488fd7aa96a7a4b34873150650fe76b19c08c9c792"}, + {file = "coverage-7.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74eeaa13e8200ad72fca9c5f37395fb310915cec6f1682b21375e84fd9770e84"}, + {file = "coverage-7.5.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f29bf497d51a5077994b265e976d78b09d9d0dff6ca5763dbb4804534a5d380"}, + {file = "coverage-7.5.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f96aa94739593ae0707eda9813ce363a0a0374a810ae0eced383340fc4a1f73"}, + {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:51b6cee539168a912b4b3b040e4042b9e2c9a7ad9c8546c09e4eaeff3eacba6b"}, + {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:59a75e6aa5c25b50b5a1499f9718f2edff54257f545718c4fb100f48d570ead4"}, + {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29da75ce20cb0a26d60e22658dd3230713c6c05a3465dd8ad040ffc991aea318"}, + {file = "coverage-7.5.2-cp38-cp38-win32.whl", hash = "sha256:23f2f16958b16152b43a39a5ecf4705757ddd284b3b17a77da3a62aef9c057ef"}, + {file = "coverage-7.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:9e41c94035e5cdb362beed681b58a707e8dc29ea446ea1713d92afeded9d1ddd"}, + {file = "coverage-7.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:06d96b9b19bbe7f049c2be3c4f9e06737ec6d8ef8933c7c3a4c557ef07936e46"}, + {file = "coverage-7.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:878243e1206828908a6b4a9ca7b1aa8bee9eb129bf7186fc381d2646f4524ce9"}, + {file = "coverage-7.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:482df956b055d3009d10fce81af6ffab28215d7ed6ad4a15e5c8e67cb7c5251c"}, + {file = "coverage-7.5.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a35c97af60a5492e9e89f8b7153fe24eadfd61cb3a2fb600df1a25b5dab34b7e"}, + {file = "coverage-7.5.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24bb4c7859a3f757a116521d4d3a8a82befad56ea1bdacd17d6aafd113b0071e"}, + {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e1046aab24c48c694f0793f669ac49ea68acde6a0798ac5388abe0a5615b5ec8"}, + {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:448ec61ea9ea7916d5579939362509145caaecf03161f6f13e366aebb692a631"}, + {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4a00bd5ba8f1a4114720bef283cf31583d6cb1c510ce890a6da6c4268f0070b7"}, + {file = "coverage-7.5.2-cp39-cp39-win32.whl", hash = "sha256:9f805481d5eff2a96bac4da1570ef662bf970f9a16580dc2c169c8c3183fa02b"}, + {file = "coverage-7.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:2c79f058e7bec26b5295d53b8c39ecb623448c74ccc8378631f5cb5c16a7e02c"}, + {file = "coverage-7.5.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:40dbb8e7727560fe8ab65efcddfec1ae25f30ef02e2f2e5d78cfb52a66781ec5"}, + {file = "coverage-7.5.2.tar.gz", hash = "sha256:13017a63b0e499c59b5ba94a8542fb62864ba3016127d1e4ef30d354fc2b00e9"}, ] [package.dependencies] @@ -1330,54 +1294,41 @@ files = [ [[package]] name = "pandas" -version = "2.3.1" +version = "2.2.2" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, - {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, - {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f951fbb702dacd390561e0ea45cdd8ecfa7fb56935eb3dd78e306c19104b9b0"}, - {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd05b72ec02ebfb993569b4931b2e16fbb4d6ad6ce80224a3ee838387d83a191"}, - {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1b916a627919a247d865aed068eb65eb91a344b13f5b57ab9f610b7716c92de1"}, - {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fe67dc676818c186d5a3d5425250e40f179c2a89145df477dd82945eaea89e97"}, - {file = "pandas-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:2eb789ae0274672acbd3c575b0598d213345660120a257b47b5dafdc618aec83"}, - {file = "pandas-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2b0540963d83431f5ce8870ea02a7430adca100cec8a050f0811f8e31035541b"}, - {file = "pandas-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fe7317f578c6a153912bd2292f02e40c1d8f253e93c599e82620c7f69755c74f"}, - {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6723a27ad7b244c0c79d8e7007092d7c8f0f11305770e2f4cd778b3ad5f9f85"}, - {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3462c3735fe19f2638f2c3a40bd94ec2dc5ba13abbb032dd2fa1f540a075509d"}, - {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:98bcc8b5bf7afed22cc753a28bc4d9e26e078e777066bc53fac7904ddef9a678"}, - {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d544806b485ddf29e52d75b1f559142514e60ef58a832f74fb38e48d757b299"}, - {file = "pandas-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b3cd4273d3cb3707b6fffd217204c52ed92859533e31dc03b7c5008aa933aaab"}, - {file = "pandas-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:689968e841136f9e542020698ee1c4fbe9caa2ed2213ae2388dc7b81721510d3"}, - {file = "pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:025e92411c16cbe5bb2a4abc99732a6b132f439b8aab23a59fa593eb00704232"}, - {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b7ff55f31c4fcb3e316e8f7fa194566b286d6ac430afec0d461163312c5841e"}, - {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dcb79bf373a47d2a40cf7232928eb7540155abbc460925c2c96d2d30b006eb4"}, - {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:56a342b231e8862c96bdb6ab97170e203ce511f4d0429589c8ede1ee8ece48b8"}, - {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ca7ed14832bce68baef331f4d7f294411bed8efd032f8109d690df45e00c4679"}, - {file = "pandas-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ac942bfd0aca577bef61f2bc8da8147c4ef6879965ef883d8e8d5d2dc3e744b8"}, - {file = "pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22"}, - {file = "pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a"}, - {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928"}, - {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9"}, - {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12"}, - {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb"}, - {file = "pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956"}, - {file = "pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a"}, - {file = "pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9"}, - {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275"}, - {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab"}, - {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96"}, - {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444"}, - {file = "pandas-2.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4645f770f98d656f11c69e81aeb21c6fca076a44bed3dcbb9396a4311bc7f6d8"}, - {file = "pandas-2.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:342e59589cc454aaff7484d75b816a433350b3d7964d7847327edda4d532a2e3"}, - {file = "pandas-2.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d12f618d80379fde6af007f65f0c25bd3e40251dbd1636480dfffce2cf1e6da"}, - {file = "pandas-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd71c47a911da120d72ef173aeac0bf5241423f9bfea57320110a978457e069e"}, - {file = "pandas-2.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:09e3b1587f0f3b0913e21e8b32c3119174551deb4a4eba4a89bc7377947977e7"}, - {file = "pandas-2.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2323294c73ed50f612f67e2bf3ae45aea04dce5690778e08a09391897f35ff88"}, - {file = "pandas-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:b4b0de34dc8499c2db34000ef8baad684cfa4cbd836ecee05f323ebfba348c7d"}, - {file = "pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, + {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, + {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, + {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, + {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, + {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, ] [package.dependencies] @@ -1442,6 +1393,22 @@ files = [ [package.dependencies] huggingface_hub = ">=0.23.4,<0.26.0" +[[package]] +name = "pie-documents" +version = "0.1.0" +description = "Python-IE document and annotation types as well as document processing utilities" +optional = false +python-versions = "<4.0,>=3.9" +groups = ["main"] +files = [ + {file = "pie_documents-0.1.0-py3-none-any.whl", hash = "sha256:4802b8f4e58c8a97c26566818ee42597ec180ba49098453813891db5640ee20a"}, + {file = "pie_documents-0.1.0.tar.gz", hash = "sha256:93b041dd7ca36ee246213e585a7ae10ae4bc6fc7aadc39bb049badc21b259457"}, +] + +[package.dependencies] +pandas = ">=2.0.3,<3.0.0" +pie-core = ">=0.2.0,<0.4.0" + [[package]] name = "platformdirs" version = "4.2.2" @@ -1535,14 +1502,14 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments [[package]] name = "pytest-cov" -version = "6.2.1" +version = "6.3.0" description = "Pytest plugin for measuring coverage." optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "pytest_cov-6.2.1-py3-none-any.whl", hash = "sha256:f5bc4c23f42f1cdd23c70b1dab1bbaef4fc505ba950d53e0081d0730dd7e86d5"}, - {file = "pytest_cov-6.2.1.tar.gz", hash = "sha256:25cc6cc0a5358204b8108ecedc51a9b57b34cc6b8c967cc2c01a4e00d8a67da2"}, + {file = "pytest_cov-6.3.0-py3-none-any.whl", hash = "sha256:440db28156d2468cafc0415b4f8e50856a0d11faefa38f30906048fe490f1749"}, + {file = "pytest_cov-6.3.0.tar.gz", hash = "sha256:35c580e7800f87ce892e687461166e1ac2bcb8fb9e13aea79032518d6e503ff2"}, ] [package.dependencies] @@ -1638,30 +1605,30 @@ test = ["cloudpickle (>=1.3)", "coverage (==7.3.1)", "fastapi", "onnx (>=0.14.0) [[package]] name = "pytz" -version = "2025.2" +version = "2024.1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, - {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] [[package]] name = "pyupgrade" -version = "3.20.0" +version = "3.15.2" description = "A tool to automatically upgrade syntax for newer versions." optional = false -python-versions = ">=3.9" +python-versions = ">=3.8.1" groups = ["dev"] files = [ - {file = "pyupgrade-3.20.0-py2.py3-none-any.whl", hash = "sha256:cd5bf842b863f50adad324a01c30aef60b9f698a9814848094818659c92cd1f4"}, - {file = "pyupgrade-3.20.0.tar.gz", hash = "sha256:dd6a16c13fc1a7db45796008689a9a35420bd364d681430f640c5e54a3d351ea"}, + {file = "pyupgrade-3.15.2-py2.py3-none-any.whl", hash = "sha256:ce309e0ff8ecb73f56a45f12570be84bbbde9540d13697cacb261a7f595fb1f5"}, + {file = "pyupgrade-3.15.2.tar.gz", hash = "sha256:c488d6896c546d25845712ef6402657123008d56c1063174e27aabe15bd6b4e5"}, ] [package.dependencies] -tokenize-rt = ">=6.1.0" +tokenize-rt = ">=5.2.0" [[package]] name = "pyyaml" @@ -2068,14 +2035,14 @@ files = [ [[package]] name = "six" -version = "1.17.0" +version = "1.16.0" description = "Python 2 and 3 compatibility utilities" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" groups = ["main"] files = [ - {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, - {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] [[package]] @@ -2110,14 +2077,14 @@ files = [ [[package]] name = "tokenize-rt" -version = "6.2.0" +version = "5.2.0" description = "A wrapper around the stdlib `tokenize` which roundtrips." optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "tokenize_rt-6.2.0-py2.py3-none-any.whl", hash = "sha256:a152bf4f249c847a66497a4a95f63376ed68ac6abf092a2f7cfb29d044ecff44"}, - {file = "tokenize_rt-6.2.0.tar.gz", hash = "sha256:8439c042b330c553fdbe1758e4a05c0ed460dbbbb24a606f11f0dee75da4cad6"}, + {file = "tokenize_rt-5.2.0-py2.py3-none-any.whl", hash = "sha256:b79d41a65cfec71285433511b50271b05da3584a1da144a0752e9c621a285289"}, + {file = "tokenize_rt-5.2.0.tar.gz", hash = "sha256:9fe80f8a5c1edad2d3ede0f37481cc0cc1538a2f442c9c2f9e4feacd2792d054"}, ] [[package]] @@ -2498,14 +2465,14 @@ files = [ [[package]] name = "tzdata" -version = "2025.2" +version = "2024.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" groups = ["main"] files = [ - {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, - {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] [[package]] @@ -2671,4 +2638,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more [metadata] lock-version = "2.1" python-versions = ">=3.9,<4.0" -content-hash = "41b9146c4a075dccea911a688d3b42819cdecf86459effc0f2b5073fc7d4db22" +content-hash = "2b1197ef088543dadf4f2048b6ca32cbf781698ffe7f78d79c0e78de388e9f15" diff --git a/pyproject.toml b/pyproject.toml index 4d91a0e4..310c033e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,12 +18,11 @@ dynamic = [ "classifiers" ] requires-python = ">=3.9,<4.0" dependencies = [ "pie-core >=0.2.1, <0.4.0", + "pie-documents >=0.1.0, <0.2.0", "torch >=1.10", "pytorch-lightning >=2, <3", "torchmetrics >1, <2", "transformers >=4.18, <5", - # required for metrics: f1, confusion_matrix, and statsistics - "pandas >=2.0.0, <3", ] [project.urls] @@ -44,6 +43,10 @@ classifiers = [ "License :: OSI Approved :: MIT License" ] +[tool.poetry.urls] +"Bug Tracker" = "https://github.com/christophalt/pytorch-ie/issues" +"Changelog" = "https://github.com/christophalt/pytorch-ie/releases" + [tool.poetry.group.dev] optional = true diff --git a/src/pytorch_ie/annotations.py b/src/pytorch_ie/annotations.py index 91b877de..9149a218 100644 --- a/src/pytorch_ie/annotations.py +++ b/src/pytorch_ie/annotations.py @@ -1,188 +1,13 @@ -from dataclasses import dataclass, field -from typing import Any, Optional, Tuple - -from pie_core import Annotation - - -def _post_init_single_label(self): - if not isinstance(self.label, str): - raise ValueError("label must be a single string.") - - if not isinstance(self.score, float): - raise ValueError("score must be a single float.") - - -def _post_init_multi_label(self): - if self.score is None: - score = tuple([1.0] * len(self.label)) - object.__setattr__(self, "score", score) - - if not isinstance(self.label, tuple): - object.__setattr__(self, "label", tuple(self.label)) - - if not isinstance(self.score, tuple): - object.__setattr__(self, "score", tuple(self.score)) - - if len(self.label) != len(self.score): - raise ValueError( - f"Number of labels ({len(self.label)}) and scores ({len(self.score)}) must be equal." - ) - - -def _post_init_multi_span(self): - if isinstance(self.slices, list): - object.__setattr__(self, "slices", tuple(tuple(s) for s in self.slices)) - - -def _post_init_arguments_and_roles(self): - if len(self.arguments) != len(self.roles): - raise ValueError( - f"Number of arguments ({len(self.arguments)}) and roles ({len(self.roles)}) must be equal" - ) - if not isinstance(self.arguments, tuple): - object.__setattr__(self, "arguments", tuple(self.arguments)) - if not isinstance(self.roles, tuple): - object.__setattr__(self, "roles", tuple(self.roles)) - - -@dataclass(eq=True, frozen=True) -class Label(Annotation): - label: str - score: float = field(default=1.0, compare=False) - - def __post_init__(self) -> None: - _post_init_single_label(self) - - def resolve(self) -> Any: - return self.label - - -@dataclass(eq=True, frozen=True) -class MultiLabel(Annotation): - label: Tuple[str, ...] - score: Optional[Tuple[float, ...]] = field(default=None, compare=False) - - def __post_init__(self) -> None: - _post_init_multi_label(self) - - def resolve(self) -> Any: - return self.label - - -@dataclass(eq=True, frozen=True) -class Span(Annotation): - start: int - end: int - - def __str__(self) -> str: - if not self.is_attached: - return super().__str__() - return str(self.target[self.start : self.end]) - - def resolve(self) -> Any: - if self.is_attached: - return self.target[self.start : self.end] - else: - raise ValueError(f"{self} is not attached to a target.") - - -@dataclass(eq=True, frozen=True) -class LabeledSpan(Span): - label: str - score: float = field(default=1.0, compare=False) - - def __post_init__(self) -> None: - _post_init_single_label(self) - - def resolve(self) -> Any: - return self.label, super().resolve() - - -@dataclass(eq=True, frozen=True) -class MultiLabeledSpan(Span): - label: Tuple[str, ...] - score: Optional[Tuple[float, ...]] = field(default=None, compare=False) - - def __post_init__(self) -> None: - _post_init_multi_label(self) - - def resolve(self) -> Any: - return self.label, super().resolve() - - -@dataclass(eq=True, frozen=True) -class MultiSpan(Annotation): - slices: Tuple[Tuple[int, int], ...] - - def __post_init__(self) -> None: - _post_init_multi_span(self) - - def __str__(self) -> str: - if not self.is_attached: - return super().__str__() - return str(tuple(self.target[start:end] for start, end in self.slices)) - - def resolve(self) -> Any: - if self.is_attached: - return tuple(self.target[start:end] for start, end in self.slices) - else: - raise ValueError(f"{self} is not attached to a target.") - - -@dataclass(eq=True, frozen=True) -class LabeledMultiSpan(MultiSpan): - label: str - score: float = field(default=1.0, compare=False) - - def __post_init__(self) -> None: - super().__post_init__() - _post_init_single_label(self) - - def resolve(self) -> Any: - return self.label, super().resolve() - - -@dataclass(eq=True, frozen=True) -class BinaryRelation(Annotation): - head: Annotation - tail: Annotation - label: str - score: float = field(default=1.0, compare=False) - - def __post_init__(self) -> None: - _post_init_single_label(self) - - def resolve(self) -> Any: - return self.label, (self.head.resolve(), self.tail.resolve()) - - -@dataclass(eq=True, frozen=True) -class MultiLabeledBinaryRelation(Annotation): - head: Annotation - tail: Annotation - label: Tuple[str, ...] - score: Optional[Tuple[float, ...]] = field(default=None, compare=False) - - def __post_init__(self) -> None: - _post_init_multi_label(self) - - def resolve(self) -> Any: - return self.label, (self.head.resolve(), self.tail.resolve()) - - -@dataclass(eq=True, frozen=True) -class NaryRelation(Annotation): - arguments: Tuple[Annotation, ...] - roles: Tuple[str, ...] - label: str - score: float = field(default=1.0, compare=False) - - def __post_init__(self) -> None: - _post_init_arguments_and_roles(self) - _post_init_single_label(self) - - def resolve(self) -> Any: - return ( - self.label, - tuple((role, arg.resolve()) for arg, role in zip(self.arguments, self.roles)), - ) +# backward compatibility +from pie_documents.annotations import ( + BinaryRelation, + Label, + LabeledMultiSpan, + LabeledSpan, + MultiLabel, + MultiLabeledBinaryRelation, + MultiLabeledSpan, + MultiSpan, + NaryRelation, + Span, +) diff --git a/src/pytorch_ie/documents.py b/src/pytorch_ie/documents.py index f1f0076b..69ae50ba 100644 --- a/src/pytorch_ie/documents.py +++ b/src/pytorch_ie/documents.py @@ -1,171 +1,31 @@ -import dataclasses -from typing import Any, Dict, Optional, Tuple - -from pie_core import AnnotationLayer, Document, annotation_field -from typing_extensions import TypeAlias - -from pytorch_ie.annotations import ( - BinaryRelation, - Label, - LabeledMultiSpan, - LabeledSpan, - MultiLabel, - Span, -) - - -@dataclasses.dataclass -class WithMetadata: - id: Optional[str] = None - metadata: Dict[str, Any] = dataclasses.field(default_factory=dict) - - -@dataclasses.dataclass -class WithTokens: - tokens: Tuple[str, ...] - - -@dataclasses.dataclass -class WithText: - text: str - - -@dataclasses.dataclass -class TextBasedDocument(WithMetadata, WithText, Document): - pass - - -@dataclasses.dataclass -class TokenBasedDocument(WithMetadata, WithTokens, Document): - def __post_init__(self) -> None: - - # When used in a dataset, the document gets serialized to json like structure which does not know tuples, - # so they get converted to lists. This is a workaround to automatically convert the "tokens" back to tuples - # when the document is created from a dataset. - if isinstance(self.tokens, list): - object.__setattr__(self, "tokens", tuple(self.tokens)) - elif not isinstance(self.tokens, tuple): - raise ValueError("tokens must be a tuple.") - - # Call the default document construction code - super().__post_init__() - - # backwards compatibility -TextDocument: TypeAlias = TextBasedDocument - - -@dataclasses.dataclass -class DocumentWithLabel(Document): - label: AnnotationLayer[Label] = annotation_field() - - -@dataclasses.dataclass -class DocumentWithMultiLabel(Document): - label: AnnotationLayer[MultiLabel] = annotation_field() - - -@dataclasses.dataclass -class TextDocumentWithLabel(DocumentWithLabel, TextBasedDocument): - pass - - -@dataclasses.dataclass -class TextDocumentWithMultiLabel(DocumentWithMultiLabel, TextBasedDocument): - pass - - -@dataclasses.dataclass -class TextDocumentWithLabeledPartitions(TextBasedDocument): - labeled_partitions: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - -@dataclasses.dataclass -class TextDocumentWithSentences(TextBasedDocument): - sentences: AnnotationLayer[Span] = annotation_field(target="text") - - -@dataclasses.dataclass -class TextDocumentWithSpans(TextBasedDocument): - spans: AnnotationLayer[Span] = annotation_field(target="text") - - -@dataclasses.dataclass -class TextDocumentWithLabeledSpans(TextBasedDocument): - labeled_spans: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - -@dataclasses.dataclass -class TextDocumentWithLabeledSpansAndLabeledPartitions( - TextDocumentWithLabeledSpans, TextDocumentWithLabeledPartitions -): - pass - - -@dataclasses.dataclass -class TextDocumentWithLabeledSpansAndSentences( - TextDocumentWithLabeledSpans, TextDocumentWithSentences -): - pass - - -@dataclasses.dataclass -class TextDocumentWithLabeledSpansAndBinaryRelations(TextDocumentWithLabeledSpans): - binary_relations: AnnotationLayer[BinaryRelation] = annotation_field(target="labeled_spans") - - -@dataclasses.dataclass -class TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions( - TextDocumentWithLabeledSpansAndLabeledPartitions, - TextDocumentWithLabeledSpansAndBinaryRelations, +from pie_documents.documents import ( + DocumentWithLabel, + DocumentWithMultiLabel, + TextBasedDocument, + TextDocumentWithLabel, + TextDocumentWithLabeledMultiSpans, + TextDocumentWithLabeledMultiSpansAndBinaryRelations, + TextDocumentWithLabeledMultiSpansAndLabeledPartitions, + TextDocumentWithLabeledMultiSpansBinaryRelationsAndLabeledPartitions, TextDocumentWithLabeledPartitions, -): - pass - - -@dataclasses.dataclass -class TextDocumentWithSpansAndBinaryRelations(TextDocumentWithSpans): - binary_relations: AnnotationLayer[BinaryRelation] = annotation_field(target="spans") - - -@dataclasses.dataclass -class TextDocumentWithSpansAndLabeledPartitions( - TextDocumentWithSpans, TextDocumentWithLabeledPartitions -): - pass - - -@dataclasses.dataclass -class TextDocumentWithSpansBinaryRelationsAndLabeledPartitions( - TextDocumentWithSpansAndLabeledPartitions, + TextDocumentWithLabeledSpans, + TextDocumentWithLabeledSpansAndBinaryRelations, + TextDocumentWithLabeledSpansAndLabeledPartitions, + TextDocumentWithLabeledSpansAndSentences, + TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions, + TextDocumentWithMultiLabel, + TextDocumentWithSentences, + TextDocumentWithSpans, TextDocumentWithSpansAndBinaryRelations, - TextDocumentWithLabeledPartitions, -): - pass - - -@dataclasses.dataclass -class TextDocumentWithLabeledMultiSpans(TextBasedDocument): - labeled_multi_spans: AnnotationLayer[LabeledMultiSpan] = annotation_field(target="text") - - -@dataclasses.dataclass -class TextDocumentWithLabeledMultiSpansAndLabeledPartitions( - TextDocumentWithLabeledMultiSpans, TextDocumentWithLabeledPartitions -): - pass - - -@dataclasses.dataclass -class TextDocumentWithLabeledMultiSpansAndBinaryRelations(TextDocumentWithLabeledMultiSpans): - binary_relations: AnnotationLayer[BinaryRelation] = annotation_field( - target="labeled_multi_spans" - ) - + TextDocumentWithSpansAndLabeledPartitions, + TextDocumentWithSpansBinaryRelationsAndLabeledPartitions, + TokenBasedDocument, + WithMetadata, + WithText, + WithTokens, +) +from typing_extensions import TypeAlias -@dataclasses.dataclass -class TextDocumentWithLabeledMultiSpansBinaryRelationsAndLabeledPartitions( - TextDocumentWithLabeledMultiSpansAndLabeledPartitions, - TextDocumentWithLabeledMultiSpansAndBinaryRelations, -): - pass +# backwards compatibility +TextDocument: TypeAlias = TextBasedDocument diff --git a/src/pytorch_ie/metrics/__init__.py b/src/pytorch_ie/metrics/__init__.py index 49b3a056..5885a047 100644 --- a/src/pytorch_ie/metrics/__init__.py +++ b/src/pytorch_ie/metrics/__init__.py @@ -1,4 +1,2 @@ -from .confusion_matrix import ConfusionMatrix -from .f1 import F1Metric - -__all__ = ["F1Metric", "ConfusionMatrix"] +# backwards compatibility +from pie_documents.metrics import ConfusionMatrix, F1Metric, SQuADF1 diff --git a/src/pytorch_ie/metrics/confusion_matrix.py b/src/pytorch_ie/metrics/confusion_matrix.py deleted file mode 100644 index 4d1df92a..00000000 --- a/src/pytorch_ie/metrics/confusion_matrix.py +++ /dev/null @@ -1,157 +0,0 @@ -import logging -from collections import defaultdict -from typing import Callable, Dict, Optional, Tuple, Union - -import pandas as pd -from pie_core import Annotation, Document, DocumentMetric - -from pytorch_ie.utils.hydra import resolve_target - -logger = logging.getLogger(__name__) - - -class ConfusionMatrix(DocumentMetric): - """Computes the confusion matrix for a given annotation layer that contains labeled annotations. - - Args: - layer: The layer to compute the confusion matrix for. - label_field: The field to use for the label. Defaults to "label". - unassignable_label: The label to use for false negative annotations. Defaults to "UNASSIGNABLE". - undetected_label: The label to use for false positive annotations. Defaults to "UNDETECTED". - strict: If True, raises an error if a base annotation has multiple gold labels. If False, logs a warning. - show_as_markdown: If True, logs the confusion matrix as markdown on the console when calling compute(). - annotation_processor: A callable that processes the annotations before calculating the confusion matrix. - """ - - def __init__( - self, - layer: str, - label_field: str = "label", - show_as_markdown: bool = False, - unassignable_label: str = "UNASSIGNABLE", - undetected_label: str = "UNDETECTED", - strict: bool = True, - annotation_processor: Optional[Union[Callable[[Annotation], Annotation], str]] = None, - ): - super().__init__() - self.layer = layer - self.label_field = label_field - self.unassignable_label = unassignable_label - self.undetected_label = undetected_label - self.strict = strict - self.show_as_markdown = show_as_markdown - self.annotation_processor: Optional[Callable[[Annotation], Annotation]] - if isinstance(annotation_processor, str): - self.annotation_processor = resolve_target(annotation_processor) - else: - self.annotation_processor = annotation_processor - - def reset(self): - self.counts: Dict[Tuple[str, str], int] = defaultdict(int) - - def calculate_counts( - self, - document: Document, - annotation_filter: Optional[Callable[[Annotation], bool]] = None, - annotation_processor: Optional[Callable[[Annotation], Annotation]] = None, - ) -> Dict[Tuple[str, str], int]: - annotation_processor = annotation_processor or (lambda ann: ann) - annotation_filter = annotation_filter or (lambda ann: True) - predicted_annotations = { - annotation_processor(ann) - for ann in document[self.layer].predictions - if annotation_filter(ann) - } - gold_annotations = { - annotation_processor(ann) for ann in document[self.layer] if annotation_filter(ann) - } - base2gold = defaultdict(list) - for ann in gold_annotations: - base_ann_kwargs = {self.label_field: "DUMMY_LABEL"} - base_ann = ann.copy(**base_ann_kwargs) - base2gold[base_ann].append(ann) - base2pred = defaultdict(list) - for ann in predicted_annotations: - base_ann_kwargs = {self.label_field: "DUMMY_LABEL"} - base_ann = ann.copy(**base_ann_kwargs) - base2pred[base_ann].append(ann) - - # (gold_label, pred_label) -> count - counts: Dict[Tuple[str, str], int] = defaultdict(int) - for base_ann in set(base2gold) | set(base2pred): - gold_labels = [getattr(ann, self.label_field) for ann in base2gold[base_ann]] - pred_labels = [getattr(ann, self.label_field) for ann in base2pred[base_ann]] - - if self.undetected_label in gold_labels: - raise ValueError( - f"The gold annotation has the label '{self.undetected_label}' for undetected instances. " - f"Set a different undetected_label." - ) - if self.unassignable_label in pred_labels: - raise ValueError( - f"The predicted annotation has the label '{self.unassignable_label}' for unassignable predictions. " - f"Set a different unassignable_label." - ) - - if len(gold_labels) > 1: - msg = f"The base annotation {base_ann} has multiple gold labels: {gold_labels}." - if self.strict: - raise ValueError(msg) - else: - logger.warning(msg + " Skip this base annotation.") - continue - - # use placeholder labels for empty gold or prediction labels - if len(gold_labels) == 0: - gold_labels.append(self.undetected_label) - if len(pred_labels) == 0: - pred_labels.append(self.unassignable_label) - - # main logic - for gold_label in gold_labels: - for pred_label in pred_labels: - counts[(gold_label, pred_label)] += 1 - - return counts - - def add_counts(self, counts: Dict[Tuple[str, str], int]): - for key, value in counts.items(): - self.counts[key] += value - - def _update(self, document: Document): - new_counts = self.calculate_counts( - document=document, - annotation_processor=self.annotation_processor, - ) - self.add_counts(new_counts) - - def _compute(self) -> Dict[str, Dict[str, int]]: - - res: Dict[str, Dict[str, int]] = defaultdict(dict) - for gold_label, pred_label in sorted(self.counts): - res[gold_label][pred_label] = self.counts[(gold_label, pred_label)] - - if self.show_as_markdown: - res_df = pd.DataFrame(res).fillna(0) - # index is prediction, columns is gold - gold_labels = res_df.columns - pred_labels = res_df.index - - # re-arrange index and columns: sort and put undetected_label and unassignable_label at the end - gold_labels_sorted = sorted( - [gold_label for gold_label in gold_labels if gold_label != self.undetected_label] - ) - # re-add undetected_label at the end, if it was in the gold labels - if self.undetected_label in gold_labels: - gold_labels_sorted = gold_labels_sorted + [self.undetected_label] - pred_labels_sorted = sorted( - [pred_label for pred_label in pred_labels if pred_label != self.unassignable_label] - ) - # re-add unassignable_label at the end, if it was in the pred labels - if self.unassignable_label in pred_labels: - pred_labels_sorted = pred_labels_sorted + [self.unassignable_label] - res_df_sorted = res_df.loc[pred_labels_sorted, gold_labels_sorted] - - # transpose and show as markdown: index is now gold, columns is prediction - logger.info(f"\n{self.layer}:\n{res_df_sorted.T.to_markdown()}") - return res diff --git a/src/pytorch_ie/metrics/f1.py b/src/pytorch_ie/metrics/f1.py deleted file mode 100644 index 7d4a02e8..00000000 --- a/src/pytorch_ie/metrics/f1.py +++ /dev/null @@ -1,153 +0,0 @@ -import logging -from collections import defaultdict -from functools import partial -from typing import Callable, Collection, Dict, Hashable, Optional, Tuple, Union - -import pandas as pd -from pie_core import Annotation, Document, DocumentMetric - -from pytorch_ie.utils.hydra import resolve_target - -logger = logging.getLogger(__name__) - - -def has_one_of_the_labels(ann: Annotation, label_field: str, labels: Collection[str]) -> bool: - return getattr(ann, label_field) in labels - - -def has_this_label(ann: Annotation, label_field: str, label: str) -> bool: - return getattr(ann, label_field) == label - - -class F1Metric(DocumentMetric): - """Computes the (micro aggregated) F1 score for a given layer. If labels are provided, - it also computes the F1 score for each label separately and the macro F1 score. - - Args: - layer: The layer to compute the F1 score for. - labels: If provided, calculate F1 score for each label. - label_field: The field to use for the label. Defaults to "label". - show_as_markdown: If True, logs the F1 score as markdown on the console when calling compute(). - """ - - def __init__( - self, - layer: str, - labels: Optional[Union[Collection[str], str]] = None, - label_field: str = "label", - show_as_markdown: bool = False, - annotation_processor: Optional[Union[Callable[[Annotation], Hashable], str]] = None, - ): - super().__init__() - self.layer = layer - self.label_field = label_field - self.show_as_markdown = show_as_markdown - self.annotation_processor: Optional[Callable[[Annotation], Hashable]] - if isinstance(annotation_processor, str): - self.annotation_processor = resolve_target(annotation_processor) - else: - self.annotation_processor = annotation_processor - - self.per_label = labels is not None - self.infer_labels = False - if self.per_label: - if isinstance(labels, str): - if labels != "INFERRED": - raise ValueError( - "labels can only be 'INFERRED' if per_label is True and labels is a string" - ) - self.labels = [] - self.infer_labels = True - elif isinstance(labels, Collection): - if not all(isinstance(label, str) for label in labels): - raise ValueError("labels must be a collection of strings") - if "MICRO" in labels or "MACRO" in labels: - raise ValueError( - "labels cannot contain 'MICRO' or 'MACRO' because they are used to capture aggregated metrics" - ) - if len(labels) == 0: - raise ValueError("labels cannot be empty") - self.labels = list(labels) - else: - raise ValueError("labels must be a string or a collection of strings") - - def reset(self): - self.counts = defaultdict(lambda: (0, 0, 0)) - - def calculate_counts( - self, - document: Document, - annotation_filter: Optional[Callable[[Annotation], bool]] = None, - annotation_processor: Optional[Callable[[Annotation], Hashable]] = None, - ) -> Tuple[int, int, int]: - annotation_processor = annotation_processor or (lambda ann: ann) - annotation_filter = annotation_filter or (lambda ann: True) - predicted_annotations = { - annotation_processor(ann) - for ann in document[self.layer].predictions - if annotation_filter(ann) - } - gold_annotations = { - annotation_processor(ann) for ann in document[self.layer] if annotation_filter(ann) - } - tp = len([ann for ann in predicted_annotations & gold_annotations]) - fn = len([ann for ann in gold_annotations - predicted_annotations]) - fp = len([ann for ann in predicted_annotations - gold_annotations]) - return tp, fp, fn - - def add_counts(self, counts: Tuple[int, int, int], label: str): - self.counts[label] = ( - self.counts[label][0] + counts[0], - self.counts[label][1] + counts[1], - self.counts[label][2] + counts[2], - ) - - def _update(self, document: Document): - new_counts = self.calculate_counts( - document=document, - annotation_filter=( - partial(has_one_of_the_labels, label_field=self.label_field, labels=self.labels) - if self.per_label and not self.infer_labels - else None - ), - annotation_processor=self.annotation_processor, - ) - self.add_counts(new_counts, label="MICRO") - if self.infer_labels: - layer = document[self.layer] - # collect labels from gold data and predictions - for ann in list(layer) + list(layer.predictions): - label = getattr(ann, self.label_field) - if label not in self.labels: - self.labels.append(label) - if self.per_label: - for label in self.labels: - new_counts = self.calculate_counts( - document=document, - annotation_filter=partial( - has_this_label, label_field=self.label_field, label=label - ), - annotation_processor=self.annotation_processor, - ) - self.add_counts(new_counts, label=label) - - def _compute(self) -> Dict[str, Dict[str, float]]: - res = dict() - if self.per_label: - res["MACRO"] = {"f1": 0.0, "p": 0.0, "r": 0.0} - for label, counts in self.counts.items(): - tp, fp, fn = counts - if tp == 0: - p, r, f1 = 0.0, 0.0, 0.0 - else: - p = tp / (tp + fp) - r = tp / (tp + fn) - f1 = 2 * p * r / (p + r) - res[label] = {"f1": f1, "p": p, "r": r, "s": tp + fn} - if self.per_label and label in self.labels: - res["MACRO"]["f1"] += f1 / len(self.labels) - res["MACRO"]["p"] += p / len(self.labels) - res["MACRO"]["r"] += r / len(self.labels) - if self.show_as_markdown: - logger.info(f"\n{self.layer}:\n{pd.DataFrame(res).round(3).T.to_markdown()}") - return res diff --git a/src/pytorch_ie/metrics/statistics.py b/src/pytorch_ie/metrics/statistics.py deleted file mode 100644 index b4a9c134..00000000 --- a/src/pytorch_ie/metrics/statistics.py +++ /dev/null @@ -1,135 +0,0 @@ -import logging -from collections import defaultdict -from typing import Any, Callable, Dict, List, Optional, Type, Union - -from pie_core import Document, DocumentStatistic -from transformers import AutoTokenizer, PreTrainedTokenizer - -from pytorch_ie.documents import TextBasedDocument - -logger = logging.getLogger(__name__) - - -class TokenCountCollector(DocumentStatistic): - """Collects the token count of a field when tokenizing its content with a Huggingface - tokenizer. - - The content of the field should be a string. - """ - - def __init__( - self, - tokenizer: Union[str, PreTrainedTokenizer], - text_field: str = "text", - tokenizer_kwargs: Optional[Dict[str, Any]] = None, - document_type: Optional[Type[Document]] = None, - **kwargs, - ): - if document_type is None and text_field == "text": - document_type = TextBasedDocument - super().__init__(document_type=document_type, **kwargs) - self.tokenizer = ( - AutoTokenizer.from_pretrained(tokenizer) if isinstance(tokenizer, str) else tokenizer - ) - self.tokenizer_kwargs = tokenizer_kwargs or {} - self.text_field = text_field - - def _collect(self, doc: Document) -> int: - text = getattr(doc, self.text_field) - encodings = self.tokenizer(text, **self.tokenizer_kwargs) - tokens = encodings.tokens() - return len(tokens) - - -class FieldLengthCollector(DocumentStatistic): - """Collects the length of a field, e.g. to collect the number the characters in the input text. - - The field should be a list of sized elements. - """ - - def __init__(self, field: str, **kwargs): - super().__init__(**kwargs) - self.field = field - - def _collect(self, doc: Document) -> int: - field_obj = getattr(doc, self.field) - return len(field_obj) - - -class SubFieldLengthCollector(DocumentStatistic): - """Collects the length of a subfield in a field, e.g. to collect the number of arguments of - N-ary relations.""" - - def __init__(self, field: str, subfield: str, **kwargs): - super().__init__(**kwargs) - self.field = field - self.subfield = subfield - - def _collect(self, doc: Document) -> List[int]: - field_obj = getattr(doc, self.field) - lengths = [] - for entry in field_obj: - subfield_obj = getattr(entry, self.subfield) - lengths.append(len(subfield_obj)) - return lengths - - -class DummyCollector(DocumentStatistic): - """A dummy collector that always returns 1, e.g. to count the number of documents. - - Can be used to count the number of documents. - """ - - DEFAULT_AGGREGATION_FUNCTIONS = ["sum"] - - def _collect(self, doc: Document) -> int: - return 1 - - -class LabelCountCollector(DocumentStatistic): - """Collects the number of field entries per label, e.g. to collect the number of entities per - type. - - The field should be a list of elements with a label attribute. - - Important: To make correct use of the result data, missing values need to be filled with 0, e.g.: - {("ORG",): [2, 3], ("LOC",): [2]} -> {("ORG",): [2, 3], ("LOC",): [2, 0]} - """ - - DEFAULT_AGGREGATION_FUNCTIONS = ["mean", "std", "min", "max", "len", "sum"] - - def __init__( - self, field: str, labels: Union[List[str], str], label_attribute: str = "label", **kwargs - ): - super().__init__(**kwargs) - self.field = field - self.label_attribute = label_attribute - if not (isinstance(labels, list) or labels == "INFERRED"): - raise ValueError("labels must be a list of strings or 'INFERRED'") - if labels == "INFERRED": - logger.warning( - f"Inferring labels with {self.__class__.__name__} from data produces wrong results " - f"for certain aggregation functions (e.g. 'mean', 'std', 'min') because zero values " - f"are not included in the calculation. We remove these aggregation functions from " - f"this collector, but be aware that the results may be wrong for your own aggregation " - f"functions that rely on zero values." - ) - self.aggregation_functions: Dict[str, Callable[[List], Any]] = { - name: func - for name, func in self.aggregation_functions.items() - if name not in ["mean", "std", "min"] - } - - self.labels = labels - - def _collect(self, doc: Document) -> Dict[str, int]: - field_obj = getattr(doc, self.field) - counts: Dict[str, int] - if self.labels == "INFERRED": - counts = defaultdict(int) - else: - counts = {label: 0 for label in self.labels} - for elem in field_obj: - label = getattr(elem, self.label_attribute) - counts[label] += 1 - return dict(counts) diff --git a/src/pytorch_ie/metrics/statistics/__init__.py b/src/pytorch_ie/metrics/statistics/__init__.py new file mode 100644 index 00000000..be9c913e --- /dev/null +++ b/src/pytorch_ie/metrics/statistics/__init__.py @@ -0,0 +1,8 @@ +from pie_documents.metrics.statistics import ( + DummyCollector, + FieldLengthCollector, + LabelCountCollector, + SubFieldLengthCollector, +) + +from .token_count_collector import TokenCountCollector diff --git a/src/pytorch_ie/metrics/statistics/token_count_collector.py b/src/pytorch_ie/metrics/statistics/token_count_collector.py new file mode 100644 index 00000000..91545a84 --- /dev/null +++ b/src/pytorch_ie/metrics/statistics/token_count_collector.py @@ -0,0 +1,40 @@ +import logging +from typing import Any, Dict, Optional, Type, Union + +from pie_core import Document, DocumentStatistic +from transformers import AutoTokenizer, PreTrainedTokenizer + +from pytorch_ie.documents import TextBasedDocument + +logger = logging.getLogger(__name__) + + +class TokenCountCollector(DocumentStatistic): + """Collects the token count of a field when tokenizing its content with a Huggingface + tokenizer. + + The content of the field should be a string. + """ + + def __init__( + self, + tokenizer: Union[str, PreTrainedTokenizer], + text_field: str = "text", + tokenizer_kwargs: Optional[Dict[str, Any]] = None, + document_type: Optional[Type[Document]] = None, + **kwargs, + ): + if document_type is None and text_field == "text": + document_type = TextBasedDocument + super().__init__(document_type=document_type, **kwargs) + self.tokenizer = ( + AutoTokenizer.from_pretrained(tokenizer) if isinstance(tokenizer, str) else tokenizer + ) + self.tokenizer_kwargs = tokenizer_kwargs or {} + self.text_field = text_field + + def _collect(self, doc: Document) -> int: + text = getattr(doc, self.text_field) + encodings = self.tokenizer(text, **self.tokenizer_kwargs) + tokens = encodings.tokens() + return len(tokens) diff --git a/src/pytorch_ie/utils/hydra.py b/src/pytorch_ie/utils/hydra.py index 23bb51f6..2c3d562b 100644 --- a/src/pytorch_ie/utils/hydra.py +++ b/src/pytorch_ie/utils/hydra.py @@ -1,3 +1,4 @@ +# backwards compatibility from pie_core.utils.hydra import ( InstantiationException, resolve_optional_document_type, diff --git a/src/pytorch_ie/utils/span.py b/src/pytorch_ie/utils/span.py index ddef0d4c..b8f9bd8d 100644 --- a/src/pytorch_ie/utils/span.py +++ b/src/pytorch_ie/utils/span.py @@ -13,11 +13,14 @@ Tuple, ) +# backwards compatibility imports from pie_documents.utils.span +from pie_documents.utils.span import have_overlap as has_overlap +from pie_documents.utils.span import is_contained_in from transformers import PreTrainedTokenizer from pytorch_ie.annotations import LabeledSpan, Span -# TODO: most of this should be superseded by pie_modules.utils.span and pie_modules.utils.sequence_tagging, +# TODO: most of this should be superseded by pie_documents.utils.sequence_tagging, # remove respective content TypedSpan = Tuple[int, Tuple[int, int]] @@ -229,19 +232,6 @@ def get_token_slice( return start, before_end + 1 -def is_contained_in(start_end: Tuple[int, int], other_start_end: Tuple[int, int]) -> bool: - return other_start_end[0] <= start_end[0] and start_end[1] <= other_start_end[1] - - -def has_overlap(start_end: Tuple[int, int], other_start_end: Tuple[int, int]): - return ( - start_end[0] <= other_start_end[0] < start_end[1] - or start_end[0] < other_start_end[1] <= start_end[1] - or other_start_end[0] <= start_end[0] < other_start_end[1] - or other_start_end[0] < start_end[1] <= other_start_end[1] - ) - - def _char_to_token_mapper( char_idx: int, char_to_token_mapping: Dict[int, int], diff --git a/tests/conftest.py b/tests/conftest.py index c6f71c97..5417bbc5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,8 @@ import dataclasses import json -from typing import Dict, Optional import pytest -from pie_core import Annotation, AnnotationLayer, annotation_field +from pie_core import AnnotationLayer, annotation_field from pytorch_ie.annotations import BinaryRelation, LabeledSpan, Span from pytorch_ie.documents import TextDocument @@ -61,13 +60,3 @@ def documents(document_dataset): def test_documents(documents): assert len(documents) == 8 assert all(isinstance(doc, TestDocument) for doc in documents) - - -def _test_annotation_reconstruction( - annotation: Annotation, annotation_store: Optional[Dict[int, Annotation]] = None -): - ann_str = json.dumps(annotation.asdict()) - annotation_reconstructed = type(annotation).fromdict( - json.loads(ann_str), annotation_store=annotation_store - ) - assert annotation_reconstructed == annotation diff --git a/tests/metrics/test_confusion_matrix.py b/tests/metrics/test_confusion_matrix.py deleted file mode 100644 index cf09d6c4..00000000 --- a/tests/metrics/test_confusion_matrix.py +++ /dev/null @@ -1,60 +0,0 @@ -from dataclasses import dataclass - -import pytest -from pie_core import AnnotationLayer, annotation_field - -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.documents import TextBasedDocument -from pytorch_ie.metrics import ConfusionMatrix - - -@pytest.fixture -def documents(): - @dataclass - class TextDocumentWithEntities(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - # a test sentence with two entities - doc1 = TextDocumentWithEntities( - text="The quick brown fox jumps over the lazy dog.", - ) - doc1.entities.append(LabeledSpan(start=4, end=19, label="animal")) - doc1.entities.append(LabeledSpan(start=35, end=43, label="animal")) - assert str(doc1.entities[0]) == "quick brown fox" - assert str(doc1.entities[1]) == "lazy dog" - - # a second test sentence with a different text and a single entity (a company) - doc2 = TextDocumentWithEntities(text="Apple is a great company.") - doc2.entities.append(LabeledSpan(start=0, end=5, label="company")) - assert str(doc2.entities[0]) == "Apple" - - documents = [doc1, doc2] - - # add predictions - # correct - documents[0].entities.predictions.append(LabeledSpan(start=4, end=19, label="animal")) - # wrong label - documents[0].entities.predictions.append(LabeledSpan(start=35, end=43, label="cat")) - # correct - documents[1].entities.predictions.append(LabeledSpan(start=0, end=5, label="company")) - # wrong span - documents[1].entities.predictions.append(LabeledSpan(start=10, end=15, label="company")) - - return documents - - -def test_confusion_matrix(documents): - metric = ConfusionMatrix(layer="entities") - metric(documents) - # (gold_label, predicted_label): count - assert dict(metric.counts) == { - ("animal", "animal"): 1, - ("animal", "cat"): 1, - ("UNDETECTED", "company"): 1, - ("company", "company"): 1, - } - assert metric.compute() == { - "animal": {"animal": 1, "cat": 1}, - "UNDETECTED": {"company": 1}, - "company": {"company": 1}, - } diff --git a/tests/metrics/test_f1.py b/tests/metrics/test_f1.py deleted file mode 100644 index 5e6e5539..00000000 --- a/tests/metrics/test_f1.py +++ /dev/null @@ -1,128 +0,0 @@ -from dataclasses import dataclass - -import pytest -from pie_core import AnnotationLayer, annotation_field - -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.documents import TextBasedDocument -from pytorch_ie.metrics import F1Metric - - -@pytest.fixture -def documents(): - @dataclass - class TextDocumentWithEntities(TextBasedDocument): - entities: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - # a test sentence with two entities - doc1 = TextDocumentWithEntities( - text="The quick brown fox jumps over the lazy dog.", - ) - doc1.entities.append(LabeledSpan(start=4, end=19, label="animal")) - doc1.entities.append(LabeledSpan(start=35, end=43, label="animal")) - assert str(doc1.entities[0]) == "quick brown fox" - assert str(doc1.entities[1]) == "lazy dog" - - # a second test sentence with a different text and a single entity (a company) - doc2 = TextDocumentWithEntities(text="Apple is a great company.") - doc2.entities.append(LabeledSpan(start=0, end=5, label="company")) - assert str(doc2.entities[0]) == "Apple" - - documents = [doc1, doc2] - - # add predictions - # correct - documents[0].entities.predictions.append(LabeledSpan(start=4, end=19, label="animal")) - # correct, but duplicate, this should not be counted - documents[0].entities.predictions.append(LabeledSpan(start=4, end=19, label="animal")) - # correct - documents[0].entities.predictions.append(LabeledSpan(start=35, end=43, label="animal")) - # wrong label - documents[0].entities.predictions.append(LabeledSpan(start=35, end=43, label="cat")) - # correct - documents[1].entities.predictions.append(LabeledSpan(start=0, end=5, label="company")) - # wrong span - documents[1].entities.predictions.append(LabeledSpan(start=10, end=15, label="company")) - - return documents - - -def test_f1(documents): - metric = F1Metric(layer="entities") - metric(documents) - # tp, fp, fn for micro - assert dict(metric.counts) == {"MICRO": (3, 2, 0)} - assert metric.compute() == {"MICRO": {"f1": 0.7499999999999999, "p": 0.6, "r": 1.0, "s": 3}} - - -def test_f1_per_label(documents): - metric = F1Metric(layer="entities", labels=["animal", "company", "cat"]) - metric(documents) - # tp, fp, fn for micro and per label - assert dict(metric.counts) == { - "MICRO": (3, 2, 0), - "cat": (0, 1, 0), - "company": (1, 1, 0), - "animal": (2, 0, 0), - } - assert metric.compute() == { - "MACRO": {"f1": 0.5555555555555556, "p": 0.5, "r": 0.6666666666666666}, - "MICRO": {"f1": 0.7499999999999999, "p": 0.6, "r": 1.0, "s": 3}, - "animal": {"f1": 1.0, "p": 1.0, "r": 1.0, "s": 2}, - "cat": {"f1": 0.0, "p": 0.0, "r": 0.0, "s": 0}, - "company": {"f1": 0.6666666666666666, "p": 0.5, "r": 1.0, "s": 1}, - } - - -def test_f1_per_label_inferred(documents): - metric = F1Metric(layer="entities", labels="INFERRED") - metric(documents) - # tp, fp, fn for micro and per label - assert dict(metric.counts) == { - "MICRO": (3, 2, 0), - "animal": (2, 0, 0), - "company": (1, 1, 0), - "cat": (0, 1, 0), - } - assert metric.compute() == { - "MACRO": {"f1": 0.5555555555555556, "p": 0.5, "r": 0.6666666666666666}, - "MICRO": {"f1": 0.7499999999999999, "p": 0.6, "r": 1.0, "s": 3}, - "animal": {"f1": 1.0, "p": 1.0, "r": 1.0, "s": 2}, - "cat": {"f1": 0.0, "p": 0.0, "r": 0.0, "s": 0}, - "company": {"f1": 0.6666666666666666, "p": 0.5, "r": 1.0, "s": 1}, - } - - -def test_f1_per_label_no_labels(documents): - with pytest.raises(ValueError) as excinfo: - F1Metric(layer="entities", labels=[]) - assert str(excinfo.value) == "labels cannot be empty" - - -def test_f1_per_label_not_allowed(): - with pytest.raises(ValueError) as excinfo: - F1Metric(layer="entities", labels=["animal", "MICRO"]) - assert ( - str(excinfo.value) - == "labels cannot contain 'MICRO' or 'MACRO' because they are used to capture aggregated metrics" - ) - - -# def test_f1_show_as_markdown(documents, caplog): -# metric = F1Metric(layer="entities", labels=["animal", "company", "cat"], show_as_markdown=True) -# metric(documents) -# caplog.set_level(logging.INFO) -# caplog.clear() -# metric.compute() -# assert len(caplog.records) == 1 -# assert ( -# caplog.records[0].message == "\n" -# "entities:\n" -# "| | f1 | p | r |\n" -# "|:--------|------:|----:|------:|\n" -# "| MACRO | 0.556 | 0.5 | 0.667 |\n" -# "| MICRO | 0.75 | 0.6 | 1 |\n" -# "| animal | 1 | 1 | 1 |\n" -# "| company | 0.667 | 0.5 | 1 |\n" -# "| cat | 0 | 0 | 0 |" -# ) diff --git a/tests/metrics/test_statistics.py b/tests/metrics/test_statistics.py index 8035b462..d48dd767 100644 --- a/tests/metrics/test_statistics.py +++ b/tests/metrics/test_statistics.py @@ -1,76 +1,4 @@ -from pytorch_ie.metrics.statistics import ( - DummyCollector, - FieldLengthCollector, - LabelCountCollector, - SubFieldLengthCollector, - TokenCountCollector, -) - - -def test_statistics(document_dataset): - statistic = DummyCollector() - values = statistic(document_dataset) - assert values == {"test": {"sum": 2}, "train": {"sum": 8}, "val": {"sum": 2}} - - # note that we check for labels=["LOC", "PER", "ORG"], but the actual labels in the data are just ["PER", "ORG"] - statistic = LabelCountCollector(field="entities", labels=["LOC", "PER", "ORG"]) - values = statistic(document_dataset) - assert values == { - "test": { - "LOC": {"mean": 0.0, "std": 0.0, "min": 0, "max": 0, "len": 2, "sum": 0}, - "PER": {"mean": 0.5, "std": 0.5, "min": 0, "max": 1, "len": 2, "sum": 1}, - "ORG": {"mean": 1.0, "std": 1.0, "min": 0, "max": 2, "len": 2, "sum": 2}, - }, - "val": { - "LOC": {"mean": 0.0, "std": 0.0, "min": 0, "max": 0, "len": 2, "sum": 0}, - "PER": {"mean": 0.5, "std": 0.5, "min": 0, "max": 1, "len": 2, "sum": 1}, - "ORG": {"mean": 1.0, "std": 1.0, "min": 0, "max": 2, "len": 2, "sum": 2}, - }, - "train": { - "LOC": {"mean": 0.0, "std": 0.0, "min": 0, "max": 0, "len": 8, "sum": 0}, - "PER": { - "mean": 0.875, - "std": 0.5994789404140899, - "min": 0, - "max": 2, - "len": 8, - "sum": 7, - }, - "ORG": { - "mean": 1.125, - "std": 0.7806247497997998, - "min": 0, - "max": 2, - "len": 8, - "sum": 9, - }, - }, - } - - statistic = LabelCountCollector(field="entities", labels="INFERRED") - values = statistic(document_dataset) - assert values == { - "test": {"PER": {"max": 1, "len": 1, "sum": 1}, "ORG": {"max": 2, "len": 1, "sum": 2}}, - "val": {"PER": {"max": 1, "len": 1, "sum": 1}, "ORG": {"max": 2, "len": 1, "sum": 2}}, - "train": {"PER": {"max": 2, "len": 6, "sum": 7}, "ORG": {"max": 2, "len": 6, "sum": 9}}, - } - - statistic = FieldLengthCollector(field="text") - values = statistic(document_dataset) - assert values == { - "test": {"max": 51, "mean": 34.5, "min": 18, "std": 16.5}, - "train": {"max": 54, "mean": 28.25, "min": 15, "std": 14.694812009685595}, - "val": {"max": 51, "mean": 34.5, "min": 18, "std": 16.5}, - } - - # this is not super useful, we just collect the lengths of the labels, but it is enough to test the code - statistic = SubFieldLengthCollector(field="entities", subfield="label") - values = statistic(document_dataset) - assert values == { - "test": {"max": 3, "mean": 3.0, "min": 3, "std": 0.0}, - "train": {"max": 3, "mean": 3.0, "min": 3, "std": 0.0}, - "val": {"max": 3, "mean": 3.0, "min": 3, "std": 0.0}, - } +from pytorch_ie.metrics.statistics import TokenCountCollector def test_statistics_with_tokenize(document_dataset): diff --git a/tests/test_annotations.py b/tests/test_annotations.py deleted file mode 100644 index 03116aa9..00000000 --- a/tests/test_annotations.py +++ /dev/null @@ -1,377 +0,0 @@ -import dataclasses -import re - -import pytest - -from pytorch_ie import AnnotationLayer, annotation_field -from pytorch_ie.annotations import ( - BinaryRelation, - Label, - LabeledMultiSpan, - LabeledSpan, - MultiLabel, - MultiLabeledBinaryRelation, - MultiLabeledSpan, - NaryRelation, - Span, -) -from pytorch_ie.documents import TextBasedDocument -from tests.conftest import _test_annotation_reconstruction - - -def test_label(): - label1 = Label(label="label1") - assert label1.label == "label1" - assert label1.score == pytest.approx(1.0) - assert label1.resolve() == "label1" - - label2 = Label(label="label2", score=0.5) - assert label2.label == "label2" - assert label2.score == pytest.approx(0.5) - - assert label2.asdict() == { - "_id": label2._id, - "label": "label2", - "score": 0.5, - } - - _test_annotation_reconstruction(label2) - - -def test_multilabel(): - multilabel1 = MultiLabel(label=("label1", "label2")) - assert multilabel1.label == ("label1", "label2") - assert multilabel1.score == pytest.approx((1.0, 1.0)) - assert multilabel1.resolve() == ("label1", "label2") - - multilabel2 = MultiLabel(label=("label3", "label4"), score=(0.4, 0.5)) - assert multilabel2.label == ("label3", "label4") - assert multilabel2.score == pytest.approx((0.4, 0.5)) - - assert multilabel2.asdict() == { - "_id": multilabel2._id, - "label": ("label3", "label4"), - "score": (0.4, 0.5), - } - - _test_annotation_reconstruction(multilabel2) - - with pytest.raises( - ValueError, match=re.escape("Number of labels (2) and scores (3) must be equal.") - ): - MultiLabel(label=("label5", "label6"), score=(0.1, 0.2, 0.3)) - - -def test_span(): - span = Span(start=1, end=2) - assert span.start == 1 - assert span.end == 2 - - assert span.asdict() == { - "_id": span._id, - "start": 1, - "end": 2, - } - - _test_annotation_reconstruction(span) - - with pytest.raises(ValueError) as excinfo: - span.resolve() - assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." - - @dataclasses.dataclass - class TestDocument(TextBasedDocument): - spans: AnnotationLayer[Span] = annotation_field(target="text") - - doc = TestDocument(text="Hello, world!") - span = Span(start=7, end=12) - doc.spans.append(span) - assert span.resolve() == "world" - - -def test_labeled_span(): - labeled_span1 = LabeledSpan(start=1, end=2, label="label1") - assert labeled_span1.start == 1 - assert labeled_span1.end == 2 - assert labeled_span1.label == "label1" - assert labeled_span1.score == pytest.approx(1.0) - - labeled_span2 = LabeledSpan(start=3, end=4, label="label2", score=0.5) - assert labeled_span2.start == 3 - assert labeled_span2.end == 4 - assert labeled_span2.label == "label2" - assert labeled_span2.score == pytest.approx(0.5) - - assert labeled_span2.asdict() == { - "_id": labeled_span2._id, - "start": 3, - "end": 4, - "label": "label2", - "score": 0.5, - } - - _test_annotation_reconstruction(labeled_span2) - - with pytest.raises(ValueError) as excinfo: - labeled_span1.resolve() - assert ( - str(excinfo.value) - == "LabeledSpan(start=1, end=2, label='label1', score=1.0) is not attached to a target." - ) - - @dataclasses.dataclass - class TestDocument(TextBasedDocument): - spans: AnnotationLayer[LabeledSpan] = annotation_field(target="text") - - doc = TestDocument(text="Hello, world!") - labeled_span = LabeledSpan(start=7, end=12, label="LOC") - doc.spans.append(labeled_span) - assert labeled_span.resolve() == ("LOC", "world") - - -def test_multilabeled_span(): - multilabeled_span1 = MultiLabeledSpan(start=1, end=2, label=("label1", "label2")) - assert multilabeled_span1.start == 1 - assert multilabeled_span1.end == 2 - assert multilabeled_span1.label == ("label1", "label2") - assert multilabeled_span1.score == pytest.approx((1.0, 1.0)) - - multilabeled_span2 = MultiLabeledSpan( - start=3, end=4, label=("label3", "label4"), score=(0.4, 0.5) - ) - assert multilabeled_span2.start == 3 - assert multilabeled_span2.end == 4 - assert multilabeled_span2.label == ("label3", "label4") - assert multilabeled_span2.score == pytest.approx((0.4, 0.5)) - - assert multilabeled_span2.asdict() == { - "_id": multilabeled_span2._id, - "start": 3, - "end": 4, - "label": ("label3", "label4"), - "score": (0.4, 0.5), - } - - _test_annotation_reconstruction(multilabeled_span2) - - with pytest.raises( - ValueError, match=re.escape("Number of labels (2) and scores (3) must be equal.") - ): - MultiLabeledSpan(start=5, end=6, label=("label5", "label6"), score=(0.1, 0.2, 0.3)) - - with pytest.raises(ValueError) as excinfo: - multilabeled_span1.resolve() - assert ( - str(excinfo.value) - == "MultiLabeledSpan(start=1, end=2, label=('label1', 'label2'), score=(1.0, 1.0)) is not attached to a target." - ) - - @dataclasses.dataclass - class TestDocument(TextBasedDocument): - spans: AnnotationLayer[MultiLabeledSpan] = annotation_field(target="text") - - doc = TestDocument(text="Hello, world!") - multilabeled_span = MultiLabeledSpan(start=7, end=12, label=("LOC", "ORG")) - doc.spans.append(multilabeled_span) - assert multilabeled_span.resolve() == (("LOC", "ORG"), "world") - - -def test_labeled_multi_span(): - labeled_multi_span1 = LabeledMultiSpan(slices=((1, 2), (3, 4)), label="label1") - assert labeled_multi_span1.slices == ((1, 2), (3, 4)) - assert labeled_multi_span1.label == "label1" - assert labeled_multi_span1.score == pytest.approx(1.0) - - labeled_multi_span2 = LabeledMultiSpan( - slices=((5, 6), (7, 8)), - label="label2", - score=0.5, - ) - assert labeled_multi_span2.slices == ((5, 6), (7, 8)) - assert labeled_multi_span2.label == "label2" - assert labeled_multi_span2.score == pytest.approx(0.5) - - assert labeled_multi_span2.asdict() == { - "_id": labeled_multi_span2._id, - "slices": ((5, 6), (7, 8)), - "label": "label2", - "score": 0.5, - } - - _test_annotation_reconstruction(labeled_multi_span2) - - -def test_binary_relation(): - head = Span(start=1, end=2) - tail = Span(start=3, end=4) - - binary_relation1 = BinaryRelation(head=head, tail=tail, label="label1") - assert binary_relation1.head == head - assert binary_relation1.tail == tail - assert binary_relation1.label == "label1" - assert binary_relation1.score == pytest.approx(1.0) - - binary_relation2 = BinaryRelation(head=head, tail=tail, label="label2", score=0.5) - assert binary_relation2.head == head - assert binary_relation2.tail == tail - assert binary_relation2.label == "label2" - assert binary_relation2.score == pytest.approx(0.5) - - assert binary_relation2.asdict() == { - "_id": binary_relation2._id, - "head": head._id, - "tail": tail._id, - "label": "label2", - "score": 0.5, - } - - annotation_store = { - head._id: head, - tail._id: tail, - } - _test_annotation_reconstruction(binary_relation2, annotation_store=annotation_store) - - with pytest.raises( - ValueError, - match=re.escape("Unable to resolve the annotation id without annotation_store."), - ): - BinaryRelation.fromdict(binary_relation2.asdict()) - - with pytest.raises(ValueError) as excinfo: - binary_relation1.resolve() - assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." - - @dataclasses.dataclass - class TestDocument(TextBasedDocument): - spans: AnnotationLayer[Span] = annotation_field(target="text") - relations: AnnotationLayer[BinaryRelation] = annotation_field(target="spans") - - doc = TestDocument(text="Hello, world!") - head = Span(start=0, end=5) - tail = Span(start=7, end=12) - doc.spans.extend([head, tail]) - relation = BinaryRelation(head=head, tail=tail, label="LABEL") - doc.relations.append(relation) - assert relation.resolve() == ("LABEL", ("Hello", "world")) - - -def test_multilabeled_binary_relation(): - head = Span(start=1, end=2) - tail = Span(start=3, end=4) - - binary_relation1 = MultiLabeledBinaryRelation(head=head, tail=tail, label=("label1", "label2")) - assert binary_relation1.head == head - assert binary_relation1.tail == tail - assert binary_relation1.label == ("label1", "label2") - assert binary_relation1.score == pytest.approx((1.0, 1.0)) - - binary_relation2 = MultiLabeledBinaryRelation( - head=head, tail=tail, label=("label3", "label4"), score=(0.4, 0.5) - ) - assert binary_relation2.head == head - assert binary_relation2.tail == tail - assert binary_relation2.label == ("label3", "label4") - assert binary_relation2.score == pytest.approx((0.4, 0.5)) - - assert binary_relation2.asdict() == { - "_id": binary_relation2._id, - "head": head._id, - "tail": tail._id, - "label": ("label3", "label4"), - "score": (0.4, 0.5), - } - - annotation_store = { - head._id: head, - tail._id: tail, - } - _test_annotation_reconstruction(binary_relation2, annotation_store=annotation_store) - - with pytest.raises( - ValueError, - match=re.escape("Unable to resolve the annotation id without annotation_store."), - ): - MultiLabeledBinaryRelation.fromdict(binary_relation2.asdict()) - - with pytest.raises( - ValueError, match=re.escape("Number of labels (2) and scores (3) must be equal.") - ): - MultiLabeledBinaryRelation( - head=head, tail=tail, label=("label5", "label6"), score=(0.1, 0.2, 0.3) - ) - - with pytest.raises(ValueError) as excinfo: - binary_relation1.resolve() - assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." - - @dataclasses.dataclass - class TestDocument(TextBasedDocument): - spans: AnnotationLayer[Span] = annotation_field(target="text") - relations: AnnotationLayer[MultiLabeledBinaryRelation] = annotation_field(target="spans") - - doc = TestDocument(text="Hello, world!") - head = Span(start=0, end=5) - tail = Span(start=7, end=12) - doc.spans.extend([head, tail]) - relation = MultiLabeledBinaryRelation(head=head, tail=tail, label=("LABEL1", "LABEL2")) - doc.relations.append(relation) - assert relation.resolve() == (("LABEL1", "LABEL2"), ("Hello", "world")) - - -def test_nary_relation(): - arg1 = Span(start=1, end=2) - arg2 = Span(start=3, end=4) - arg3 = Span(start=5, end=6) - - nary_relation1 = NaryRelation( - arguments=(arg1, arg2, arg3), roles=("role1", "role2", "role3"), label="label1" - ) - - assert nary_relation1.arguments == (arg1, arg2, arg3) - assert nary_relation1.roles == ("role1", "role2", "role3") - assert nary_relation1.label == "label1" - assert nary_relation1.score == pytest.approx(1.0) - - assert nary_relation1.asdict() == { - "_id": nary_relation1._id, - "arguments": [arg1._id, arg2._id, arg3._id], - "roles": ("role1", "role2", "role3"), - "label": "label1", - "score": 1.0, - } - - annotation_store = { - arg1._id: arg1, - arg2._id: arg2, - arg3._id: arg3, - } - _test_annotation_reconstruction(nary_relation1, annotation_store=annotation_store) - - with pytest.raises( - ValueError, - match=re.escape("Unable to resolve the annotation id without annotation_store."), - ): - NaryRelation.fromdict(nary_relation1.asdict()) - - with pytest.raises(ValueError) as excinfo: - nary_relation1.resolve() - assert str(excinfo.value) == "Span(start=1, end=2) is not attached to a target." - - @dataclasses.dataclass - class TestDocument(TextBasedDocument): - spans: AnnotationLayer[Span] = annotation_field(target="text") - relations: AnnotationLayer[NaryRelation] = annotation_field(target="spans") - - doc = TestDocument(text="Hello, world A and B!") - arg1 = Span(start=0, end=5) - arg2 = Span(start=7, end=14) - arg3 = Span(start=19, end=20) - doc.spans.extend([arg1, arg2, arg3]) - relation = NaryRelation( - arguments=(arg1, arg2, arg3), roles=("ARG1", "ARG2", "ARG3"), label="LABEL" - ) - doc.relations.append(relation) - assert relation.resolve() == ( - "LABEL", - (("ARG1", "Hello"), ("ARG2", "world A"), ("ARG3", "B")), - ) diff --git a/tests/utils/test_span.py b/tests/utils/test_span.py deleted file mode 100644 index 555da4f0..00000000 --- a/tests/utils/test_span.py +++ /dev/null @@ -1,442 +0,0 @@ -# import pytest - -# from pytorch_ie.utils.span import ( -# convert_span_annotations_to_tag_sequence, -# get_char_to_token_mapper, -# has_overlap, -# ) -# from tests.fixtures.document import get_doc1, get_doc2, get_doc3 - - -# @pytest.fixture -# def documents(): -# doc_kwargs = dict( -# assert_span_text=True, -# ) -# documents = [get_doc1(**doc_kwargs), get_doc2(**doc_kwargs), get_doc3(**doc_kwargs)] -# return documents - - -# @pytest.mark.skip -# def test_get_char_to_token_mapper(): -# # TODO: implement! -# pass - - -# @pytest.mark.skip -# def test_get_special_token_mask(): -# # TODO: implement! -# pass - - -# def test_convert_span_annotations_to_tag_sequence(documents): -# doc = documents[0] -# entities = doc.annotations.spans["entities"] -# assert len(entities) == 3 -# char_to_token_mapping = { -# 0: 1, -# 1: 1, -# 2: 1, -# 3: 1, -# 5: 2, -# 6: 2, -# 7: 2, -# 8: 2, -# 9: 2, -# 11: 3, -# 12: 3, -# 14: 4, -# 15: 4, -# 16: 4, -# 17: 4, -# 18: 4, -# 19: 4, -# 20: 5, -# 22: 6, -# 23: 6, -# 24: 6, -# 25: 6, -# 27: 7, -# 28: 7, -# 30: 8, -# 31: 8, -# 33: 9, -# 34: 9, -# 35: 9, -# 36: 9, -# 37: 9, -# 38: 9, -# 39: 9, -# 40: 9, -# 42: 10, -# 43: 10, -# 44: 10, -# 45: 10, -# 46: 10, -# 48: 11, -# 49: 11, -# 50: 11, -# 51: 11, -# } -# char_to_token_mapper = get_char_to_token_mapper( -# char_to_token_mapping=char_to_token_mapping, -# ) -# special_tokens_mask = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] -# tag_sequence = convert_span_annotations_to_tag_sequence( -# spans=entities, -# special_tokens_mask=special_tokens_mask, -# char_to_token_mapper=char_to_token_mapper, -# ) -# assert tag_sequence == [ -# None, -# "B-person", -# "O", -# "O", -# "B-city", -# "O", -# "O", -# "O", -# "O", -# "O", -# "O", -# "B-person", -# None, -# ] - -# doc = documents[1] -# entities = doc.annotations.spans["entities"] -# assert len(entities) == 2 -# char_to_token_mapping = { -# 0: 1, -# 1: 1, -# 2: 1, -# 3: 1, -# 4: 1, -# 5: 1, -# 6: 1, -# 8: 2, -# 9: 2, -# 11: 3, -# 13: 4, -# 14: 4, -# 15: 4, -# 16: 4, -# 17: 4, -# 19: 5, -# 20: 5, -# 21: 5, -# 22: 5, -# 23: 6, -# 25: 7, -# 26: 7, -# 27: 7, -# 28: 7, -# 29: 7, -# 31: 8, -# 32: 8, -# 33: 9, -# 34: 9, -# 35: 9, -# 36: 10, -# 38: 11, -# 39: 11, -# 41: 12, -# 42: 12, -# 43: 12, -# 45: 13, -# 46: 13, -# 47: 13, -# 48: 13, -# 49: 14, -# 50: 15, -# 52: 16, -# 53: 16, -# 54: 16, -# 55: 16, -# 56: 16, -# 57: 17, -# } -# char_to_token_mapper = get_char_to_token_mapper( -# char_to_token_mapping=char_to_token_mapping, -# ) -# special_tokens_mask = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] -# tag_sequence = convert_span_annotations_to_tag_sequence( -# spans=entities, -# special_tokens_mask=special_tokens_mask, -# char_to_token_mapper=char_to_token_mapper, -# ) -# assert tag_sequence == [ -# None, -# "B-city", -# "O", -# "O", -# "O", -# "O", -# "O", -# "B-person", -# "I-person", -# "I-person", -# "I-person", -# "O", -# "O", -# "O", -# "O", -# "O", -# "O", -# "O", -# None, -# ] - -# doc = documents[2] -# entities = doc.annotations.spans["entities"] -# assert len(entities) == 2 -# char_to_token_mapping = { -# 0: 1, -# 1: 1, -# 2: 1, -# 3: 1, -# 5: 2, -# 6: 2, -# 7: 2, -# 8: 2, -# 9: 2, -# 10: 2, -# 12: 3, -# 13: 3, -# 14: 3, -# 15: 3, -# 16: 3, -# 18: 4, -# 19: 4, -# 20: 4, -# 21: 4, -# 23: 5, -# 24: 5, -# 26: 6, -# 27: 6, -# 28: 6, -# 29: 6, -# 30: 6, -# 31: 6, -# 32: 7, -# } -# char_to_token_mapper = get_char_to_token_mapper( -# char_to_token_mapping=char_to_token_mapping, -# ) -# special_tokens_mask = [1, 0, 0, 0, 0, 0, 0, 0, 1] -# tag_sequence = convert_span_annotations_to_tag_sequence( -# spans=entities, -# special_tokens_mask=special_tokens_mask, -# char_to_token_mapper=char_to_token_mapper, -# ) -# assert tag_sequence == [None, "B-person", "O", "O", "O", "O", "B-city", "O", None] - - -# def test_convert_span_annotations_to_tag_sequence_with_partition(documents): -# doc = documents[0] -# entities = doc.annotations.spans["entities"] -# assert len(entities) == 3 -# partitions = doc.annotations.spans["sentences"] -# assert len(partitions) == 1 -# partition = partitions[0] -# char_to_token_mapping = { -# 0: 1, -# 1: 1, -# 2: 1, -# 3: 1, -# 5: 2, -# 6: 2, -# 7: 2, -# 8: 2, -# 9: 2, -# 11: 3, -# 12: 3, -# 14: 4, -# 15: 4, -# 16: 4, -# 17: 4, -# 18: 4, -# 19: 4, -# 20: 5, -# } -# special_tokens_mask = [1, 0, 0, 0, 0, 0, 1] -# char_to_token_mapper = get_char_to_token_mapper( -# char_to_token_mapping=char_to_token_mapping, -# ) -# tag_sequence = convert_span_annotations_to_tag_sequence( -# spans=entities, -# special_tokens_mask=special_tokens_mask, -# char_to_token_mapper=char_to_token_mapper, -# partition=partition, -# ) -# assert tag_sequence == [None, "B-person", "O", "O", "B-city", "O", None] - -# doc = documents[1] -# entities = doc.annotations.spans["entities"] -# assert len(entities) == 2 -# partitions = doc.annotations.spans["sentences"] -# assert len(partitions) == 2 -# partition = partitions[0] -# char_to_token_mapping = { -# 0: 1, -# 1: 1, -# 2: 1, -# 3: 1, -# 4: 1, -# 5: 1, -# 6: 1, -# 8: 2, -# 9: 2, -# 11: 3, -# 13: 4, -# 14: 4, -# 15: 4, -# 16: 4, -# 17: 4, -# 19: 5, -# 20: 5, -# 21: 5, -# 22: 5, -# 23: 6, -# } -# special_tokens_mask = [1, 0, 0, 0, 0, 0, 0, 1] -# char_to_token_mapper = get_char_to_token_mapper( -# char_to_token_mapping=char_to_token_mapping, -# ) -# tag_sequence = convert_span_annotations_to_tag_sequence( -# spans=entities, -# special_tokens_mask=special_tokens_mask, -# char_to_token_mapper=char_to_token_mapper, -# partition=partition, -# ) -# assert tag_sequence == [None, "B-city", "O", "O", "O", "O", "O", None] -# partition = partitions[1] -# char_to_token_mapping = { -# 0: 1, -# 1: 1, -# 2: 1, -# 3: 1, -# 4: 1, -# 6: 2, -# 7: 2, -# 8: 3, -# 9: 3, -# 10: 3, -# 11: 4, -# 13: 5, -# 14: 5, -# 16: 6, -# 17: 6, -# 18: 6, -# 20: 7, -# 21: 7, -# 22: 7, -# 23: 7, -# 24: 8, -# 25: 9, -# 27: 10, -# 28: 10, -# 29: 10, -# 30: 10, -# 31: 10, -# 32: 11, -# } -# special_tokens_mask = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] -# char_to_token_mapper = get_char_to_token_mapper( -# char_to_token_mapping=char_to_token_mapping, -# ) -# tag_sequence = convert_span_annotations_to_tag_sequence( -# spans=entities, -# special_tokens_mask=special_tokens_mask, -# char_to_token_mapper=char_to_token_mapper, -# partition=partition, -# ) -# assert tag_sequence == [ -# None, -# "B-person", -# "I-person", -# "I-person", -# "I-person", -# "O", -# "O", -# "O", -# "O", -# "O", -# "O", -# "O", -# None, -# ] - -# doc = documents[2] -# entities = doc.annotations.spans["entities"] -# assert len(entities) == 2 -# partitions = doc.annotations.spans["sentences"] -# assert len(partitions) == 1 -# partition = partitions[0] -# char_to_token_mapping = { -# 0: 1, -# 1: 1, -# 2: 1, -# 3: 1, -# 5: 2, -# 6: 2, -# 7: 2, -# 8: 2, -# 9: 2, -# 10: 2, -# 12: 3, -# 13: 3, -# 14: 3, -# 15: 3, -# 16: 3, -# 18: 4, -# 19: 4, -# 20: 4, -# 21: 4, -# 23: 5, -# 24: 5, -# 26: 6, -# 27: 6, -# 28: 6, -# 29: 6, -# 30: 6, -# 31: 6, -# 32: 7, -# } -# special_tokens_mask = [1, 0, 0, 0, 0, 0, 0, 0, 1] -# char_to_token_mapper = get_char_to_token_mapper( -# char_to_token_mapping=char_to_token_mapping, -# ) -# tag_sequence = convert_span_annotations_to_tag_sequence( -# spans=entities, -# special_tokens_mask=special_tokens_mask, -# char_to_token_mapper=char_to_token_mapper, -# partition=partition, -# ) -# assert tag_sequence == [None, "B-person", "O", "O", "O", "O", "B-city", "O", None] - - -# def test_has_overlap(): -# # no overlap - not touching -# assert not has_overlap((3, 5), (6, 10)) -# assert not has_overlap((6, 10), (3, 5)) - -# # no overlap - touching -# assert not has_overlap((5, 10), (3, 5)) -# assert not has_overlap((3, 5), (5, 10)) - -# # partly overlap -# assert has_overlap((3, 5), (4, 10)) -# assert has_overlap((4, 10), (3, 5)) - -# # partly overlap - same start -# assert has_overlap((3, 5), (3, 10)) -# assert has_overlap((3, 10), (3, 5)) - -# # partly overlap - same end -# assert has_overlap((3, 5), (2, 5)) -# assert has_overlap((2, 5), (3, 5)) - -# # total overlap (containing) -# assert has_overlap((3, 5), (2, 10)) -# assert has_overlap((2, 10), (3, 5))