From 40dfc8304535fc2aaa522cd0a43940ad62c52f7a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 1 Jul 2022 17:22:53 +0200 Subject: [PATCH 01/62] started new PageRank branch --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 05a9d7c0..f6f33045 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# hackage-server +# Hackage-server [![Build status](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml) [![Build status](https://github.com/haskell/hackage-server/actions/workflows/nix-flake.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/nix-flake.yml) From cbbaeb67bb3ae16c55569715e962387dd4f5ad7e Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 1 Jul 2022 17:45:41 +0200 Subject: [PATCH 02/62] correct the error --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f6f33045..05a9d7c0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Hackage-server +# hackage-server [![Build status](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml) [![Build status](https://github.com/haskell/hackage-server/actions/workflows/nix-flake.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/nix-flake.yml) From 164128bf37a8fd3e66291ec76c6e2db9f31d3d49 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 3 Jul 2022 21:27:14 +0200 Subject: [PATCH 03/62] Created PackageRank module and added it to build --- hackage-server.cabal | 1 + src/Distribution/Server/Features/PackageRank.hs | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 src/Distribution/Server/Features/PackageRank.hs diff --git a/hackage-server.cabal b/hackage-server.cabal index 50576dfd..9ce2c883 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -398,6 +398,7 @@ library Distribution.Server.Features.StaticFiles Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap + Distribution.Server.Features.PackageRank Distribution.Server.Util.NLP.Snowball if flag(debug) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs new file mode 100644 index 00000000..3bbe6221 --- /dev/null +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -0,0 +1,9 @@ +module Distribution.Server.Features.PackageRank ( + rankPackage + ) where + +import Distribution.Package +import Distribution.Server.Packages.Types + +rankPackage :: (Package a) => a -> IO Double +rankPackage p=return 0 From 54de869d5baf0176a843b0b86d1b3ec733e7ec6a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 3 Jul 2022 21:37:37 +0200 Subject: [PATCH 04/62] write out ranking criteria --- src/Distribution/Server/Features/PackageRank.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 3bbe6221..4438f566 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -3,7 +3,15 @@ module Distribution.Server.Features.PackageRank ( ) where import Distribution.Package -import Distribution.Server.Packages.Types rankPackage :: (Package a) => a -> IO Double -rankPackage p=return 0 +rankPackage p=return$ reverseDeps+usageTrend+docScore+stability + +authNum+goodMetadata+weightUniqueDeps+activelyMaintained + where reverseDeps=1 + usageTrend=1 + docScore=1 + stability=1 + authNum=1 + goodMetadata=1 + weightUniqueDeps=1 + activelyMaintained=1 From 1e2c6c8f618e6f26c78e18544cd7f54c7ab7fd41 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 4 Jul 2022 23:22:32 +0200 Subject: [PATCH 05/62] started with maintainer number --- .../Server/Features/PackageRank.hs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 4438f566..0d51bffc 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -3,15 +3,25 @@ module Distribution.Server.Features.PackageRank ( ) where import Distribution.Package +import Distribution.PackageDescription +import Distribution.Server.Features.Upload +import Distribution.Server.Users.UserIdSet as UserIdSet -rankPackage :: (Package a) => a -> IO Double -rankPackage p=return$ reverseDeps+usageTrend+docScore+stability - +authNum+goodMetadata+weightUniqueDeps+activelyMaintained +rankPackage :: PackageDescription -> IO Double +rankPackage p=do + maintainers <- maintNum + return maintainers+reverseDeps+usageTrend+docScore+stability + +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 stability=1 - authNum=1 + maintNum :: IO Double + maintNum=do + maintSet<-queryUserGroup$maintainersGroupDescription pkgNm + return fromInteger.UserIdSet$size maintSet goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 + pkgNm :: PackageName + pkgNm=pkgName$package p From e777bf28ac41c9aaca04c923fc100f85dc4bf58a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 22:31:19 +0200 Subject: [PATCH 06/62] added Upload Feature and got number of maintainers for package --- src/Distribution/Server/Features/PackageRank.hs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0d51bffc..ae5d7fda 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,22 +4,22 @@ module Distribution.Server.Features.PackageRank ( import Distribution.Package import Distribution.PackageDescription +import Distribution.Server.Users.Group import Distribution.Server.Features.Upload -import Distribution.Server.Users.UserIdSet as UserIdSet -rankPackage :: PackageDescription -> IO Double -rankPackage p=do +rankPackage :: UploadFeature -> PackageDescription -> IO Double +rankPackage upload p=do maintainers <- maintNum - return maintainers+reverseDeps+usageTrend+docScore+stability + return$maintainers+reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 - stability=1 + stabilityScore=1 maintNum :: IO Double maintNum=do - maintSet<-queryUserGroup$maintainersGroupDescription pkgNm - return fromInteger.UserIdSet$size maintSet + maint<-queryUserGroups$[maintainersGroup upload pkgNm] + return.fromInteger.toInteger$size maint goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 From 6a45d62e3702f2bfa104d2e813db6b2b661e3b54 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 22:42:20 +0200 Subject: [PATCH 07/62] divided rankPackage to pure and IO version --- .../Server/Features/PackageRank.hs | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ae5d7fda..aa3921cf 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -7,21 +7,23 @@ import Distribution.PackageDescription import Distribution.Server.Users.Group import Distribution.Server.Features.Upload -rankPackage :: UploadFeature -> PackageDescription -> IO Double -rankPackage upload p=do - maintainers <- maintNum - return$maintainers+reverseDeps+usageTrend+docScore+stabilityScore +rankPackageIO upload p=maintNum + where + maintNum :: IO Double + maintNum=do + maint<-queryUserGroups$[maintainersGroup upload pkgNm] + return.fromInteger.toInteger$size maint + pkgNm :: PackageName + pkgNm=pkgName$package p +rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 stabilityScore=1 - maintNum :: IO Double - maintNum=do - maint<-queryUserGroups$[maintainersGroup upload pkgNm] - return.fromInteger.toInteger$size maint goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 - pkgNm :: PackageName - pkgNm=pkgName$package p + +rankPackage :: UploadFeature -> PackageDescription -> IO Double +rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) From 0a6dc0047f51fca8dd8158681b6572b142cdfe32 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 23:04:25 +0200 Subject: [PATCH 08/62] added benchmark and test info --- src/Distribution/Server/Features/PackageRank.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index aa3921cf..8d11ad8c 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -21,9 +21,13 @@ rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore usageTrend=1 docScore=1 stabilityScore=1 + testsBench=(bool2Double.hasTests) p + (bool2Double.hasBenchmarks) p goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 + bool2Double :: Bool -> Double + bool2Double true=1 + bool2Double false=0 rankPackage :: UploadFeature -> PackageDescription -> IO Double rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) From 722103b942422793133ec6f51c3a17f0c6aac1fc Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 7 Jul 2022 23:29:12 +0200 Subject: [PATCH 09/62] added Download Feature --- src/Distribution/Server/Features/PackageRank.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8d11ad8c..7fa7ca66 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,8 +6,9 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Users.Group import Distribution.Server.Features.Upload +import Distribution.Server.Features.DownloadCount -rankPackageIO upload p=maintNum +rankPackageIO download upload p=maintNum where maintNum :: IO Double maintNum=do @@ -29,5 +30,5 @@ rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore bool2Double true=1 bool2Double false=0 -rankPackage :: UploadFeature -> PackageDescription -> IO Double -rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) +rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double +rankPackage download upload p=rankPackageIO download upload p>>=(\x->return$x + rankPackagePure p) From 056c83331c627dcf38f71d8c2e48f620d6912f7d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 8 Jul 2022 23:06:24 +0200 Subject: [PATCH 10/62] formatted a bit --- .../Server/Features/PackageRank.hs | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 7fa7ca66..43fc0a02 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -8,27 +8,35 @@ import Distribution.Server.Users.Group import Distribution.Server.Features.Upload import Distribution.Server.Features.DownloadCount -rankPackageIO download upload p=maintNum +rankPackageIO download upload p = maintNum where + -- Number of maintainers maintNum :: IO Double - maintNum=do - maint<-queryUserGroups$[maintainersGroup upload pkgNm] - return.fromInteger.toInteger$size maint + maintNum = do + maint <- queryUserGroups $ + [maintainersGroup upload pkgNm] + return . fromInteger . toInteger $ size maint pkgNm :: PackageName - pkgNm=pkgName$package p -rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore + pkgNm = pkgName $ package p + +rankPackagePure p = reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained - where reverseDeps=1 - usageTrend=1 - docScore=1 - stabilityScore=1 - testsBench=(bool2Double.hasTests) p + (bool2Double.hasBenchmarks) p - goodMetadata=1 - weightUniqueDeps=1 - activelyMaintained=1 + where reverseDeps = 1 + usageTrend = 1 + docScore = 1 + stabilityScore = 1 + -- Does the Package have tests and Benchmarks + testsBench = (bool2Double . hasTests) p + + (bool2Double . hasBenchmarks) p + goodMetadata = 1 + weightUniqueDeps = 1 + activelyMaintained = 1 bool2Double :: Bool -> Double - bool2Double true=1 - bool2Double false=0 + bool2Double true = 1 + bool2Double false = 0 rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p=rankPackageIO download upload p>>=(\x->return$x + rankPackagePure p) +rankPackage download upload p = rankPackageIO download upload p + >>= (\x->return$x + rankPackagePure p) + + From c0abd3d2acb2f63acae557f2aaf9879154e4aab4 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 8 Jul 2022 23:17:27 +0200 Subject: [PATCH 11/62] used brittany --- .../Server/Features/PackageRank.hs | 74 ++++++++++--------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 43fc0a02..aee6adb5 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,42 +1,48 @@ -module Distribution.Server.Features.PackageRank ( - rankPackage +module Distribution.Server.Features.PackageRank + ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Server.Users.Group -import Distribution.Server.Features.Upload -import Distribution.Server.Features.DownloadCount +import Distribution.Package +import Distribution.PackageDescription +import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.Upload +import Distribution.Server.Users.Group rankPackageIO download upload p = maintNum - where - -- Number of maintainers - maintNum :: IO Double - maintNum = do - maint <- queryUserGroups $ - [maintainersGroup upload pkgNm] - return . fromInteger . toInteger $ size maint - pkgNm :: PackageName - pkgNm = pkgName $ package p + where + -- Number of maintainers + maintNum :: IO Double + maintNum = do + maint <- queryUserGroups [maintainersGroup upload pkgNm] + return . fromInteger . toInteger $ size maint + pkgNm :: PackageName + pkgNm = pkgName $ package p -rankPackagePure p = reverseDeps+usageTrend+docScore+stabilityScore - +goodMetadata+weightUniqueDeps+activelyMaintained - where reverseDeps = 1 - usageTrend = 1 - docScore = 1 - stabilityScore = 1 - -- Does the Package have tests and Benchmarks - testsBench = (bool2Double . hasTests) p - + (bool2Double . hasBenchmarks) p - goodMetadata = 1 - weightUniqueDeps = 1 - activelyMaintained = 1 - bool2Double :: Bool -> Double - bool2Double true = 1 - bool2Double false = 0 +rankPackagePure p = + reverseDeps + + usageTrend + + docScore + + stabilityScore + + goodMetadata + + weightUniqueDeps + + activelyMaintained + where + reverseDeps = 1 + usageTrend = 1 + docScore = 1 + stabilityScore = 1 + -- Does the Package have tests and Benchmarks + testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p + goodMetadata = 1 + weightUniqueDeps = 1 + activelyMaintained = 1 + bool2Double :: Bool -> Double + bool2Double true = 1 + bool2Double false = 0 -rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p = rankPackageIO download upload p - >>= (\x->return$x + rankPackagePure p) +rankPackage + :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double +rankPackage download upload p = + rankPackageIO download upload p >>= (\x -> return $ x + rankPackagePure p) From 4c7dba1d7217c14aa3f1bc994e6d5f5259169689 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 11 Jul 2022 17:43:04 +0200 Subject: [PATCH 12/62] added further info about the package --- src/Distribution/Server/Features/PackageRank.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index aee6adb5..4db50700 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,6 +4,7 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription +import Distribution.Types.Version import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Upload import Distribution.Server.Users.Group @@ -28,6 +29,8 @@ rankPackagePure p = + activelyMaintained where reverseDeps = 1 + versions = versionNumbers . pkgVersion $ package p + dependencies = allBuildDepends p usageTrend = 1 docScore = 1 stabilityScore = 1 From 36ef484a5ef0e10f94699df9bddeff650dbccf6d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 12 Jul 2022 22:34:59 +0200 Subject: [PATCH 13/62] added isApp function --- src/Distribution/Server/Features/PackageRank.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 4db50700..5184ed1a 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -9,6 +9,8 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Upload import Distribution.Server.Users.Group +import Data.Maybe (isNothing) + rankPackageIO download upload p = maintNum where -- Number of maintainers @@ -39,6 +41,7 @@ rankPackagePure p = goodMetadata = 1 weightUniqueDeps = 1 activelyMaintained = 1 + isApp = (isNothing.library) p && (not.null.executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 From 53f5104875beabb4fe058a6f7ac6a155ab35f69b Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 13 Jul 2022 11:31:19 +0200 Subject: [PATCH 14/62] added Scorer type and started to extract versions --- .../Server/Features/PackageRank.hs | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 5184ed1a..e009c87d 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,12 +4,25 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.PackageInfoJSON.State + ( getVersionsFor ) import Distribution.Server.Features.Upload import Distribution.Server.Users.Group + ( queryUserGroups + , size + ) +import Distribution.Types.Version + +import Data.Maybe ( isNothing ) + +data Scorer = Scorer + { total :: Double + , score :: Double + } -import Data.Maybe (isNothing) +instance Num Scorer where + Scorer a b + Scorer c d = Scorer (a + c) (b + d) rankPackageIO download upload p = maintNum where @@ -18,6 +31,8 @@ rankPackageIO download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint + versionsPkg :: IO Double + versionsPkg = getVersionsFor pkgNm >>= return length pkgNm :: PackageName pkgNm = pkgName $ package p @@ -25,23 +40,14 @@ rankPackagePure p = reverseDeps + usageTrend + docScore - + stabilityScore - + goodMetadata - + weightUniqueDeps - + activelyMaintained + + reverseDeps where reverseDeps = 1 - versions = versionNumbers . pkgVersion $ package p dependencies = allBuildDepends p usageTrend = 1 docScore = 1 - stabilityScore = 1 - -- Does the Package have tests and Benchmarks testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - goodMetadata = 1 - weightUniqueDeps = 1 - activelyMaintained = 1 - isApp = (isNothing.library) p && (not.null.executables) p + isApp = (isNothing . library) p && (not . null . executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 From 1a157b8452046cca05ce73fd349ceae3bbb63ce5 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 14 Jul 2022 21:46:06 +0200 Subject: [PATCH 15/62] more work on versions --- .../Server/Features/PackageRank.hs | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e009c87d..e333a3f7 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -5,8 +5,8 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.PackageInfoJSON.State - ( getVersionsFor ) +import Distribution.Server.Features.HaskellPlatform +import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.Upload import Distribution.Server.Users.Group ( queryUserGroups @@ -21,40 +21,47 @@ data Scorer = Scorer , score :: Double } -instance Num Scorer where - Scorer a b + Scorer c d = Scorer (a + c) (b + d) +add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO download upload p = maintNum +rankPackageIO + :: VersionsFeature + -> PlatformFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> IO Double +rankPackageIO prefferedV platform download upload p = maintNum where + pkgNm :: PackageName + pkgNm = pkgName $ package p -- Number of maintainers maintNum :: IO Double maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint - versionsPkg :: IO Double - versionsPkg = getVersionsFor pkgNm >>= return length - pkgNm :: PackageName - pkgNm = pkgName $ package p + versions = platformVersions platform pkgNm -rankPackagePure p = - reverseDeps - + usageTrend - + docScore - + reverseDeps +rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where - reverseDeps = 1 - dependencies = allBuildDepends p - usageTrend = 1 - docScore = 1 - testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - isApp = (isNothing . library) p && (not . null . executables) p + reverseDeps = 1 + dependencies = allBuildDepends p + usageTrend = 1 + docScore = 1 + testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p + isApp = (isNothing . library) p && (not . null . executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 rankPackage - :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p = - rankPackageIO download upload p >>= (\x -> return $ x + rankPackagePure p) + :: VersionsFeature + -> PlatformFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> IO Double +rankPackage versions platform download upload p = + rankPackageIO versions platform download upload p + >>= (\x -> return $ x + rankPackagePure p) From 7ab35a9496d5a29615362f1469e429756308b132 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 11:36:56 +0200 Subject: [PATCH 16/62] got versions and partitioned based on status --- .../Server/Features/HaskellPlatform.hs | 2 +- .../Server/Features/PackageRank.hs | 41 ++++++++++++------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/Distribution/Server/Features/HaskellPlatform.hs b/src/Distribution/Server/Features/HaskellPlatform.hs index 9d0840bd..15be3e81 100644 --- a/src/Distribution/Server/Features/HaskellPlatform.hs +++ b/src/Distribution/Server/Features/HaskellPlatform.hs @@ -1,6 +1,6 @@ {-# LANGUAGE RankNTypes, NamedFieldPuns, RecordWildCards #-} module Distribution.Server.Features.HaskellPlatform ( - PlatformFeature, + PlatformFeature(..), PlatformResource(..), initPlatformFeature, ) where diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e333a3f7..b26887e6 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,10 +4,17 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription +import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.HaskellPlatform import Distribution.Server.Features.PreferredVersions +import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload +import Distribution.Server.Framework +import Distribution.Server.Packages.PackageIndex + ( PackageIndex ) +import qualified Distribution.Server.Packages.PackageIndex + as PackageIndex +import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups , size @@ -23,14 +30,7 @@ data Scorer = Scorer add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO - :: VersionsFeature - -> PlatformFeature - -> DownloadFeature - -> UploadFeature - -> PackageDescription - -> IO Double -rankPackageIO prefferedV platform download upload p = maintNum +rankPackageIO core versions download upload p = maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -39,7 +39,20 @@ rankPackageIO prefferedV platform download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint - versions = platformVersions platform pkgNm + descriptions = do + desc <- lookupPackageName core pkgNm + return (pkgDesc <$> desc) + partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer = do + desc <- descriptions + return + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions + x + (map (pkgVersion . package . packageDescription) desc) + ) + + rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -54,14 +67,14 @@ rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps bool2Double false = 0 rankPackage - :: VersionsFeature - -> PlatformFeature + :: CoreResource + -> VersionsFeature -> DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage versions platform download upload p = - rankPackageIO versions platform download upload p +rankPackage core versions download upload p = + rankPackageIO core versions download upload p >>= (\x -> return $ x + rankPackagePure p) From 8d5e68afab2378bb7393d08bf13b779ea4b9b59a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 11:46:31 +0200 Subject: [PATCH 17/62] separated versions into versionList and verPart --- .../Server/Features/PackageRank.hs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index b26887e6..20876e98 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -42,15 +42,20 @@ rankPackageIO core versions download upload p = maintNum descriptions = do desc <- lookupPackageName core pkgNm return (pkgDesc <$> desc) - partVer :: ServerPartE (IO ([Version], [Version], [Version])) - partVer = do + + versionList = do desc <- descriptions - return - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions - x - (map (pkgVersion . package . packageDescription) desc) + return (map (pkgVersion . package . packageDescription) desc) + + partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer = + versionList + >>= (\y -> + return + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions x y) ) + From 38598033c1dd26a0859bd1b3ce03ab3fa6b81363 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 14:41:49 +0200 Subject: [PATCH 18/62] added last upload times --- .../Server/Features/PackageRank.hs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 20876e98..ff1c37d5 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -21,7 +21,11 @@ import Distribution.Server.Users.Group ) import Distribution.Types.Version +import Data.List ( sort + , sortBy + ) import Data.Maybe ( isNothing ) +import Data.Time.Clock ( UTCTime(..) ) data Scorer = Scorer { total :: Double @@ -39,9 +43,10 @@ rankPackageIO core versions download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint + info = lookupPackageName core pkgNm descriptions = do - desc <- lookupPackageName core pkgNm - return (pkgDesc <$> desc) + infPkg <- info + return (pkgDesc <$> infPkg) versionList = do desc <- descriptions @@ -55,9 +60,12 @@ rankPackageIO core versions download upload p = maintNum $ queryGetPreferredInfo versions pkgNm >>= (\x -> return $ partitionVersions x y) ) - - - + lastUploads = do + infPkg <- info + return + $ sortBy (flip compare) + $ (\x -> fst (pkgOriginalUploadInfo x)) + <$> infPkg rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where From 95e02d3f892aa7a5a6779f73e4e52ca722e06bb6 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 20:40:51 +0200 Subject: [PATCH 19/62] added freshnessScore --- .../Server/Features/PackageRank.hs | 95 ++++++++++++++----- 1 file changed, 72 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ff1c37d5..1401cb9e 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -9,11 +9,7 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload -import Distribution.Server.Framework -import Distribution.Server.Packages.PackageIndex - ( PackageIndex ) -import qualified Distribution.Server.Packages.PackageIndex - as PackageIndex +import Distribution.Server.Framework ( ServerPartE ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -21,20 +17,73 @@ import Distribution.Server.Users.Group ) import Distribution.Types.Version +import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort , sortBy ) import Data.Maybe ( isNothing ) -import Data.Time.Clock ( UTCTime(..) ) +import Data.Ord ( max + , min + ) +import Data.Time.Clock ( UTCTime(..) + , diffUTCTime + , getCurrentTime + , nominalDay + ) +import GHC.Float ( int2Double ) data Scorer = Scorer - { total :: Double - , score :: Double + { maximum :: Double + , score :: Double } add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO core versions download upload p = maintNum +total (Scorer a b) = a / b + +freshnessScore :: [Version] -> UTCTime -> Bool -> IO Double +freshnessScore [] _ app = return 0 +freshnessScore (x : xs) lastUpd app = + daysPastExpiration + >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) + where + versionLatest = versionNumbers x + isNightly = case major versionLatest of + 0 -> True + _ -> False + daysPastExpiration = + age >>= (\a -> return $ max 0 a - expectedUpdateInterval) + expectedUpdateInterval = + int2Double (min (versionStabilityInterval versionLatest) $ length (x : xs)) + / (if isNightly then 4 else 1) + versionStabilityInterval v | patches v > 3 && major v > 0 = 700 + | patches v > 3 = 450 + | patches v > 0 = 300 + | major v > 0 = 200 + | minor v > 3 = 140 + | otherwise = 80 + age = + getCurrentTime + >>= (\x -> + return + $ fromRational + $ toRational + $ diffUTCTime x lastUpd + / fromRational (toRational nominalDay) + ) + -- expected_update_interval/2 + if cr.is_nightly { 30 } else if is_app_only {300} else {200}; + decayDays = + expectedUpdateInterval + / 2 + + (if isNightly then 30 else (if app then 300 else 200)) + major (x : xs) = x + major _ = 0 + minor (x : y : xs) = y + minor _ = 0 + patches (x : y : xs) = sum xs + patches _ = 0 + +rankPackageIO core versions download upload p = liftIO maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -42,30 +91,29 @@ rankPackageIO core versions download upload p = maintNum maintNum :: IO Double maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] - return . fromInteger . toInteger $ size maint + return . int2Double $ size maint info = lookupPackageName core pkgNm descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - versionList = do - desc <- descriptions - return (map (pkgVersion . package . packageDescription) desc) + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions - partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer :: ServerPartE ([Version], [Version], [Version]) partVer = versionList >>= (\y -> - return + liftIO $ queryGetPreferredInfo versions pkgNm >>= (\x -> return $ partitionVersions x y) ) lastUploads = do infPkg <- info - return - $ sortBy (flip compare) - $ (\x -> fst (pkgOriginalUploadInfo x)) - <$> infPkg + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -75,9 +123,10 @@ rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps docScore = 1 testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p isApp = (isNothing . library) p && (not . null . executables) p - bool2Double :: Bool -> Double - bool2Double true = 1 - bool2Double false = 0 + +bool2Double :: Bool -> Double +bool2Double true = 1 +bool2Double false = 0 rankPackage :: CoreResource @@ -85,7 +134,7 @@ rankPackage -> DownloadFeature -> UploadFeature -> PackageDescription - -> IO Double + -> ServerPartE Double rankPackage core versions download upload p = rankPackageIO core versions download upload p >>= (\x -> return $ x + rankPackagePure p) From c1451240e9e0ff4978fa93ab0a1ff6c824072e0f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 22:07:08 +0200 Subject: [PATCH 20/62] added Download Scorer --- src/Distribution/Server/Features/PackageRank.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1401cb9e..db5ab28a 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,6 +6,7 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.DownloadCount.State import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload @@ -16,6 +17,7 @@ import Distribution.Server.Users.Group , size ) import Distribution.Types.Version +import Distribution.Server.Util.CountingMap (cmFind) import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort @@ -87,6 +89,7 @@ rankPackageIO core versions download upload p = liftIO maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p + isApp = (isNothing . library) p && (not . null . executables) p -- Number of maintainers maintNum :: IO Double maintNum = do @@ -96,7 +99,9 @@ rankPackageIO core versions download upload p = liftIO maintNum descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - + downloadScore :: IO Scorer + downloadScore = recentPackageDownloads download >>=return.calcDownScore.(cmFind pkgNm) + calcDownScore i = Scorer 5 $ (logBase 2 (int2Double$max 0 (i-100) + 100) - 6.6) / (if isApp then 5 else 6) versionList = do sortBy (flip compare) From faa42c28df202fd7f635e6718b13028a59d1055d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 16 Jul 2022 20:00:56 +0200 Subject: [PATCH 21/62] finished simple temporalScore (rankPackageIO) --- .../Server/Features/PackageRank.hs | 66 ++++++++++++------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index db5ab28a..c5c480b8 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,7 +6,6 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.DownloadCount.State import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload @@ -16,8 +15,9 @@ import Distribution.Server.Users.Group ( queryUserGroups , size ) +import Distribution.Server.Util.CountingMap + ( cmFind ) import Distribution.Types.Version -import Distribution.Server.Util.CountingMap (cmFind) import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort @@ -39,25 +39,27 @@ data Scorer = Scorer , score :: Double } +-- frac 0<=frac<=1 +fracScor maxim frac = Scorer maxim (maxim * frac) + +boolScor k true = Scorer k k +boolScor k true = Scorer k 0 + add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) total (Scorer a b) = a / b -freshnessScore :: [Version] -> UTCTime -> Bool -> IO Double -freshnessScore [] _ app = return 0 -freshnessScore (x : xs) lastUpd app = +freshness :: [Version] -> UTCTime -> Bool -> IO Double +freshness [] _ app = return 0 +freshness (x : xs) lastUpd app = daysPastExpiration >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) where versionLatest = versionNumbers x - isNightly = case major versionLatest of - 0 -> True - _ -> False daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = - int2Double (min (versionStabilityInterval versionLatest) $ length (x : xs)) - / (if isNightly then 4 else 1) + expectedUpdateInterval = int2Double + (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 | patches v > 0 = 300 @@ -73,11 +75,7 @@ freshnessScore (x : xs) lastUpd app = $ diffUTCTime x lastUpd / fromRational (toRational nominalDay) ) - -- expected_update_interval/2 + if cr.is_nightly { 30 } else if is_app_only {300} else {200}; - decayDays = - expectedUpdateInterval - / 2 - + (if isNightly then 30 else (if app then 300 else 200)) + decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) major (x : xs) = x major _ = 0 minor (x : y : xs) = y @@ -85,11 +83,15 @@ freshnessScore (x : xs) lastUpd app = patches (x : y : xs) = sum xs patches _ = 0 -rankPackageIO core versions download upload p = liftIO maintNum +temporalScore core versions download upload p = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ add tract $ add fresh downs where pkgNm :: PackageName pkgNm = pkgName $ package p - isApp = (isNothing . library) p && (not . null . executables) p + isApp = (isNothing . library) p && (not . null . executables) p -- Number of maintainers maintNum :: IO Double maintNum = do @@ -99,9 +101,14 @@ rankPackageIO core versions download upload p = liftIO maintNum descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - downloadScore :: IO Scorer - downloadScore = recentPackageDownloads download >>=return.calcDownScore.(cmFind pkgNm) - calcDownScore i = Scorer 5 $ (logBase 2 (int2Double$max 0 (i-100) + 100) - 6.6) / (if isApp then 5 else 6) + downloadScore = downloadsPerMonth >>= return . calcDownScore + downloadsPerMonth = + liftIO $ recentPackageDownloads download >>= return . cmFind pkgNm + calcDownScore i = Scorer 5 $ max + ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) + / (if isApp then 5 else 6) + ) + 5 versionList = do sortBy (flip compare) @@ -119,6 +126,19 @@ rankPackageIO core versions download upload p = liftIO maintNum lastUploads = do infPkg <- info return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + -- [Version] -> UTCTime -> Bool + packageFreshness = do + ups <- lastUploads + vers <- versionList + case ups of + [] -> return 0 + _ -> liftIO $ freshness vers (head ups) isApp + freshnessScore = packageFreshness >>= return . fracScor 10 + -- Missing dependencyFreshnessScore for reasonable effectivity needs caching + tractionScore = do + fresh <- packageFreshness + downs <- downloadsPerMonth + return $ boolScor 1 (fresh * int2Double downs > 1000) rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -141,7 +161,7 @@ rankPackage -> PackageDescription -> ServerPartE Double rankPackage core versions download upload p = - rankPackageIO core versions download upload p - >>= (\x -> return $ x + rankPackagePure p) + temporalScore core versions download upload p + >>= (\x -> return $ total x + rankPackagePure p) From 64453e870c79cdacc8f885dccd6b9843135fc789 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 17 Jul 2022 21:00:29 +0200 Subject: [PATCH 22/62] separated rankIO from temporalScore --- .../Server/Features/PackageRank.hs | 81 ++++++++++++------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c5c480b8..c8cd2fdf 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -83,7 +83,55 @@ freshness (x : xs) lastUpd app = patches (x : y : xs) = sum xs patches _ = 0 -temporalScore core versions download upload p = do + +-- partVer :: ServerPartE ([Version], [Version], [Version]) +-- partVer = +-- versionList +-- >>= (\y -> +-- liftIO +-- $ queryGetPreferredInfo versions pkgNm +-- >>= (\x -> return $ partitionVersions x y) +-- ) +-- +-- -- Number of maintainers +-- maintNum :: IO Double +-- maintNum = do +-- maint <- queryUserGroups [maintainersGroup upload pkgNm] +-- return . int2Double $ size maint + +rankIO + :: CoreResource + -> VersionsFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> ServerPartE Scorer + +rankIO core vers downs upl pkg = do + temp <- temporalScore core vers downs upl pkg lastUploads versionList downloadsPerMonth + return temp + + where + pkgNm :: PackageName + pkgNm = pkgName $ package pkg + info = lookupPackageName core pkgNm + descriptions = do + infPkg <- info + return (pkgDesc <$> infPkg) + lastUploads = do + infPkg <- info + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions + downloadsPerMonth = + liftIO $ recentPackageDownloads downs >>= return . cmFind pkgNm + + + +temporalScore core versions download upload p lastUploads versionList downloadsPerMonth= do fresh <- freshnessScore downs <- downloadScore tract <- tractionScore @@ -92,41 +140,12 @@ temporalScore core versions download upload p = do pkgNm :: PackageName pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p - -- Number of maintainers - maintNum :: IO Double - maintNum = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - return . int2Double $ size maint - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) downloadScore = downloadsPerMonth >>= return . calcDownScore - downloadsPerMonth = - liftIO $ recentPackageDownloads download >>= return . cmFind pkgNm calcDownScore i = Scorer 5 $ max ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) 5 - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - - partVer :: ServerPartE ([Version], [Version], [Version]) - partVer = - versionList - >>= (\y -> - liftIO - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions x y) - ) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - -- [Version] -> UTCTime -> Bool packageFreshness = do ups <- lastUploads vers <- versionList @@ -161,7 +180,7 @@ rankPackage -> PackageDescription -> ServerPartE Double rankPackage core versions download upload p = - temporalScore core versions download upload p + rankIO core versions download upload p >>= (\x -> return $ total x + rankPackagePure p) From 3c9321250b5859a69472913088043ad3d01941c5 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 17 Jul 2022 22:10:39 +0200 Subject: [PATCH 23/62] added pageRank --- .../Server/Features/PackageRank.hs | 102 +++++++++--------- 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c8cd2fdf..e7011085 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -32,6 +32,7 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) data Scorer = Scorer @@ -42,10 +43,11 @@ data Scorer = Scorer -- frac 0<=frac<=1 fracScor maxim frac = Scorer maxim (maxim * frac) -boolScor k true = Scorer k k -boolScor k true = Scorer k 0 +boolScor k true = Scorer k k +boolScor k false = Scorer k 0 -add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) +(><) :: Scorer -> Scorer -> Scorer +(><) (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) total (Scorer a b) = a / b @@ -108,39 +110,46 @@ rankIO -> ServerPartE Scorer rankIO core vers downs upl pkg = do - temp <- temporalScore core vers downs upl pkg lastUploads versionList downloadsPerMonth - return temp - - where - pkgNm :: PackageName - pkgNm = pkgName $ package pkg - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - downloadsPerMonth = - liftIO $ recentPackageDownloads downs >>= return . cmFind pkgNm - - - -temporalScore core versions download upload p lastUploads versionList downloadsPerMonth= do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ add tract $ add fresh downs + temp <- temporalScore core + vers + downs + upl + pkg + lastUploads + versionList + downloadsPerMonth + return temp + + where + pkgNm :: PackageName + pkgNm = pkgName $ package pkg + info = lookupPackageName core pkgNm + descriptions = do + infPkg <- info + return (pkgDesc <$> infPkg) + lastUploads = do + infPkg <- info + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions + downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + + + +temporalScore core versions download upload p lastUploads versionList downloadsPerMonth + = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract >< fresh >< downs where pkgNm :: PackageName - pkgNm = pkgName $ package p - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = downloadsPerMonth >>= return . calcDownScore + pkgNm = pkgName $ package p + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ max ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -152,25 +161,22 @@ temporalScore core versions download upload p lastUploads versionList downloadsP case ups of [] -> return 0 _ -> liftIO $ freshness vers (head ups) isApp - freshnessScore = packageFreshness >>= return . fracScor 10 - -- Missing dependencyFreshnessScore for reasonable effectivity needs caching + freshnessScore = fracScor 10 <$> packageFreshness +-- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) -rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps +rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats where - reverseDeps = 1 - dependencies = allBuildDepends p - usageTrend = 1 - docScore = 1 - testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - isApp = (isNothing . library) p && (not . null . executables) p - -bool2Double :: Bool -> Double -bool2Double true = 1 -bool2Double false = 0 + tests = boolScor 50 (hasTests p) + benchs = boolScor 10 (hasBenchmarks p) + desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) + -- ducumentation = boolScor 30 () + homeP = boolScor 30 (not $ S.null $ homepage p) + sourceRp = boolScor 8 (not $ null $ sourceRepos p) + cats = boolScor 5 (not $ S.null $ category p) rankPackage :: CoreResource @@ -181,6 +187,6 @@ rankPackage -> ServerPartE Double rankPackage core versions download upload p = rankIO core versions download upload p - >>= (\x -> return $ total x + rankPackagePure p) + >>= (\x -> return $ total x + total (rankPackagePage p)) From 73a65c99fbfe25dec14e470dfc9dc6411d6f97ee Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 18 Jul 2022 23:34:51 +0200 Subject: [PATCH 24/62] added versionScore --- .../Server/Features/PackageRank.hs | 105 ++++++++++++------ 1 file changed, 74 insertions(+), 31 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e7011085..df2868c9 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -24,7 +24,8 @@ import Data.List ( sort , sortBy ) import Data.Maybe ( isNothing ) -import Data.Ord ( max +import Data.Ord ( comparing + , max , min ) import Data.Time.Clock ( UTCTime(..) @@ -32,6 +33,9 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import Distribution.Simple.Utils ( safeHead + , safeLast + ) import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) @@ -40,6 +44,9 @@ data Scorer = Scorer , score :: Double } +scorer maxim frac = case maxim >= frac of + true -> Scorer maxim frac + false -> Scorer maxim maxim -- frac 0<=frac<=1 fracScor maxim frac = Scorer maxim (maxim * frac) @@ -51,6 +58,19 @@ boolScor k false = Scorer k 0 total (Scorer a b) = a / b +major (x : xs) = x +major _ = 0 +minor (x : y : xs) = y +minor _ = 0 +patches (x : y : xs) = sum xs +patches _ = 0 + +numDays :: Maybe UTCTime -> Maybe UTCTime -> Double +numDays (Just first) (Just last) = + fromRational $ toRational $ diffUTCTime first last / fromRational + (toRational nominalDay) +numDays _ _ = 0 + freshness :: [Version] -> UTCTime -> Bool -> IO Double freshness [] _ app = return 0 freshness (x : xs) lastUpd app = @@ -68,33 +88,10 @@ freshness (x : xs) lastUpd app = | major v > 0 = 200 | minor v > 3 = 140 | otherwise = 80 - age = - getCurrentTime - >>= (\x -> - return - $ fromRational - $ toRational - $ diffUTCTime x lastUpd - / fromRational (toRational nominalDay) - ) + age = flip numDays (Just lastUpd) . Just <$> getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) - major (x : xs) = x - major _ = 0 - minor (x : y : xs) = y - minor _ = 0 - patches (x : y : xs) = sum xs - patches _ = 0 - - --- partVer :: ServerPartE ([Version], [Version], [Version]) --- partVer = --- versionList --- >>= (\y -> --- liftIO --- $ queryGetPreferredInfo versions pkgNm --- >>= (\x -> return $ partitionVersions x y) --- ) --- + + -- -- Number of maintainers -- maintNum :: IO Double -- maintNum = do @@ -118,10 +115,10 @@ rankIO core vers downs upl pkg = do lastUploads versionList downloadsPerMonth - return temp + vers <- versionScore versionList vers lastUploads pkg + return (temp >< vers) where - pkgNm :: PackageName pkgNm = pkgName $ package pkg info = lookupPackageName core pkgNm descriptions = do @@ -137,7 +134,53 @@ rankIO core vers downs upl pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs - +versionScore + :: ServerPartE [Version] + -> VersionsFeature + -> ServerPartE [UTCTime] + -> PackageDescription + -> ServerPartE Scorer +versionScore versionList versions lastUploads desc = do + intUse <- intUsable + depre <- deprec + lUps <- lastUploads + return $ calculateScore depre lUps intUse + where + pkgNm = pkgName $ package desc + partVers = + versionList + >>= (\y -> + liftIO + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions x y) + ) + intUsable = do + (norm, _, unpref) <- partVers + return $ versionNumbers <$> norm ++ unpref + deprec = do + (_, deprec, _) <- partVers + return deprec + calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer + calculateScore [] _ _ = Scorer 118 0 + calculateScore depre lUps intUse = + boolScor 20 (length intUse > 1) + >< scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + >< scorer + 15 + (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) + intUse + ) + >< scorer + 20 + (int2Double $ 4 * length + (filter (\x -> major x > 0 && patches x > 0) intUse) + ) + >< scorer + 10 + (int2Double $ patches $ head $ sortBy (comparing patches) intUse) + >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + >< boolScor 5 (not $ null $ depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do @@ -150,7 +193,7 @@ temporalScore core versions download upload p lastUploads versionList downloadsP pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore <$> downloadsPerMonth - calcDownScore i = Scorer 5 $ max + calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) From bf38c80b76e0b9e189616d664be3580fc1761f69 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 19 Jul 2022 22:27:38 +0200 Subject: [PATCH 25/62] added authorScore --- .../Server/Features/PackageRank.hs | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index df2868c9..0315c079 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -20,7 +20,8 @@ import Distribution.Server.Util.CountingMap import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) -import Data.List ( sort +import Data.List ( maximumBy + , sort , sortBy ) import Data.Maybe ( isNothing ) @@ -92,12 +93,6 @@ freshness (x : xs) lastUpd app = decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) --- -- Number of maintainers --- maintNum :: IO Double --- maintNum = do --- maint <- queryUserGroups [maintainersGroup upload pkgNm] --- return . int2Double $ size maint - rankIO :: CoreResource -> VersionsFeature @@ -116,7 +111,8 @@ rankIO core vers downs upl pkg = do versionList downloadsPerMonth vers <- versionScore versionList vers lastUploads pkg - return (temp >< vers) + auth <- authorScore upl pkg + return (temp >< vers >< auth) where pkgNm = pkgName $ package pkg @@ -134,6 +130,19 @@ rankIO core vers downs upl pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs +authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore upload desc = + liftIO maintScore + >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) >< x) + where + pkgNm = pkgName $ package desc + maintScore :: IO Scorer + maintScore = do + maint <- queryUserGroups [maintainersGroup upload pkgNm] + + return $ boolScor 3 (size maint > 1) >< scorer 5 (int2Double $ size maint) + + versionScore :: ServerPartE [Version] -> VersionsFeature @@ -175,12 +184,10 @@ versionScore versionList versions lastUploads desc = do (int2Double $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - >< scorer - 10 - (int2Double $ patches $ head $ sortBy (comparing patches) intUse) - >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + >< scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - >< boolScor 5 (not $ null $ depre) + >< boolScor 5 (not $ null depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do From bf77ee3bfa009c3dc85802185853d880fb03bcb2 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 22 Jul 2022 23:19:38 +0200 Subject: [PATCH 26/62] instance Semigroup Scorer --- .../Server/Features/PackageRank.hs | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0315c079..2d5caeaa 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -10,6 +10,8 @@ import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) +import Distribution.Server.Framework.Feature + ( queryState ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -45,6 +47,9 @@ data Scorer = Scorer , score :: Double } +instance Semigroup Scorer where + (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) + scorer maxim frac = case maxim >= frac of true -> Scorer maxim frac false -> Scorer maxim maxim @@ -54,9 +59,6 @@ fracScor maxim frac = Scorer maxim (maxim * frac) boolScor k true = Scorer k k boolScor k false = Scorer k 0 -(><) :: Scorer -> Scorer -> Scorer -(><) (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) - total (Scorer a b) = a / b major (x : xs) = x @@ -92,9 +94,11 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) +-- lookupPackageId +-- queryHasDocumentation rankIO - :: CoreResource + :: CoreFeature -> VersionsFeature -> DownloadFeature -> UploadFeature @@ -102,7 +106,7 @@ rankIO -> ServerPartE Scorer rankIO core vers downs upl pkg = do - temp <- temporalScore core + temp <- temporalScore coreR vers downs upl @@ -112,11 +116,13 @@ rankIO core vers downs upl pkg = do downloadsPerMonth vers <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - return (temp >< vers >< auth) + return (temp <> vers <> auth) where + pkgId = package pkg pkgNm = pkgName $ package pkg - info = lookupPackageName core pkgNm + info = lookupPackageName coreR pkgNm + coreR = coreResource core descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -133,14 +139,14 @@ rankIO core vers downs upl pkg = do authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = liftIO maintScore - >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) >< x) + >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do maint <- queryUserGroups [maintainersGroup upload pkgNm] - return $ boolScor 3 (size maint > 1) >< scorer 5 (int2Double $ size maint) + return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) versionScore @@ -173,28 +179,28 @@ versionScore versionList versions lastUploads desc = do calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) - >< scorer 40 (numDays (safeHead lUps) (safeLast lUps)) - >< scorer + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + <> scorer 15 (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) intUse ) - >< scorer + <> scorer 20 (int2Double $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - >< scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) - >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) - >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - >< boolScor 5 (not $ null depre) + <> scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + <> boolScor 5 (not $ null depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do fresh <- freshnessScore downs <- downloadScore tract <- tractionScore - return $ tract >< fresh >< downs + return $ tract <> fresh <> downs where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -218,7 +224,7 @@ temporalScore core versions download upload p lastUploads versionList downloadsP downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) -rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats +rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -229,7 +235,7 @@ rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats cats = boolScor 5 (not $ S.null $ category p) rankPackage - :: CoreResource + :: CoreFeature -> VersionsFeature -> DownloadFeature -> UploadFeature From a27a0c7c963c2993b75ba06a3f38f2abb13b7713 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 23 Jul 2022 15:41:21 +0200 Subject: [PATCH 27/62] got tarballs and fixed warnings --- .../Server/Features/PackageRank.hs | 126 +++++++++++------- 1 file changed, 80 insertions(+), 46 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 2d5caeaa..0ff9dfec 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -5,11 +5,15 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core +import Distribution.Server.Features.Documentation + ( DocumentationFeature(..) ) import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) +import Distribution.Server.Framework.BlobStorage + ( BlobId ) import Distribution.Server.Framework.Feature ( queryState ) import Distribution.Server.Packages.Types @@ -23,7 +27,6 @@ import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) import Data.List ( maximumBy - , sort , sortBy ) import Data.Maybe ( isNothing ) @@ -50,32 +53,37 @@ data Scorer = Scorer instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) -scorer maxim frac = case maxim >= frac of - true -> Scorer maxim frac - false -> Scorer maxim maxim --- frac 0<=frac<=1 -fracScor maxim frac = Scorer maxim (maxim * frac) +scorer :: Double -> Double -> Scorer +scorer maxim scr = if maxim >= scr then (Scorer maxim scr) else (Scorer maxim maxim) -boolScor k true = Scorer k k -boolScor k false = Scorer k 0 +fracScor :: Double -> Double -> Scorer +fracScor maxim frac = scorer maxim (maxim * frac) +boolScor :: Double -> Bool -> Scorer +boolScor k True = Scorer k k +boolScor k False = Scorer k 0 + +total :: Scorer -> Double total (Scorer a b) = a / b -major (x : xs) = x -major _ = 0 -minor (x : y : xs) = y -minor _ = 0 -patches (x : y : xs) = sum xs +major :: Num a => [a] -> a +major (x : _) = x +major _ = 0 +minor :: Num a => [a] -> a +minor (_ : y : _) = y +minor _ = 0 +patches :: Num a => [a] -> a +patches (_ : _ : xs) = sum xs patches _ = 0 numDays :: Maybe UTCTime -> Maybe UTCTime -> Double -numDays (Just first) (Just last) = - fromRational $ toRational $ diffUTCTime first last / fromRational +numDays (Just first) (Just end) = + fromRational $ toRational $ diffUTCTime first end / fromRational (toRational nominalDay) numDays _ _ = 0 freshness :: [Version] -> UTCTime -> Bool -> IO Double -freshness [] _ app = return 0 +freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) @@ -97,32 +105,26 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation +-- TODO CoreFeature can be substituted by CoreResource rankIO - :: CoreFeature + :: CoreResource -> VersionsFeature -> DownloadFeature -> UploadFeature + -> DocumentationFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl pkg = do - temp <- temporalScore coreR - vers - downs - upl - pkg - lastUploads - versionList - downloadsPerMonth - vers <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg - return (temp <> vers <> auth) +rankIO core vers downs upl docs pkg = do + temp <- temporalScore pkg lastUploads versionList downloadsPerMonth + versS <- versionScore versionList vers lastUploads pkg + auth <- authorScore upl pkg + return (temp <> versS <> auth) where pkgId = package pkg - pkgNm = pkgName $ package pkg - info = lookupPackageName coreR pkgNm - coreR = coreResource core + pkgNm = pkgName pkgId + info = lookupPackageName core pkgNm descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -135,6 +137,31 @@ rankIO core vers downs upl pkg = do . map (pkgVersion . package . packageDescription) <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + -- TODO get appropriate pkgInfo (head might fail) + packageTarball = pkgLatestTarball . head <$> info + documentTarball :: ServerPartE (Maybe BlobId) + documentTarball = queryDocumentation docs pkgId + +-- mdocs <- queryState documentationState $ LookupDocumentation pkgid +-- case mdocs of +-- Nothing -> +-- errNotFoundH "Not Found" +-- [ MText "There is no documentation for " +-- , MLink (display pkgid) ("/package/" ++ display pkgid) +-- , MText ". See " +-- , MLink canonicalLink canonicalLink +-- , MText " for the latest version." +-- ] +-- where +-- -- Essentially errNotFound, but overloaded to specify a header. +-- -- (Needed since errNotFound throws away result of setHeaderM) +-- errNotFoundH title message = throwError +-- (ErrorResponse 404 +-- [("Link", canonicalHeader)] +-- title message) +-- Just blob -> do +-- index <- liftIO $ cachedTarIndex blob +-- func pkgid blob index authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -173,8 +200,8 @@ versionScore versionList versions lastUploads desc = do (norm, _, unpref) <- partVers return $ versionNumbers <$> norm ++ unpref deprec = do - (_, deprec, _) <- partVers - return deprec + (_, deprecN, _) <- partVers + return deprecN calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = @@ -195,15 +222,18 @@ versionScore versionList versions lastUploads desc = do <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) <> boolScor 5 (not $ null depre) -temporalScore core versions download upload p lastUploads versionList downloadsPerMonth - = do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ tract <> fresh <> downs +temporalScore + :: PackageDescription + -> ServerPartE [UTCTime] + -> ServerPartE [Version] + -> ServerPartE Int + -> ServerPartE Scorer +temporalScore p lastUploads versionList downloadsPerMonth = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract <> fresh <> downs where - pkgNm :: PackageName - pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ min @@ -224,25 +254,29 @@ temporalScore core versions download upload p lastUploads versionList downloadsP downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) +rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) - -- ducumentation = boolScor 30 () + -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) +-- TODO fix the function Signature replace PackageDescription to PackageName/Identifier + rankPackage - :: CoreFeature + :: CoreResource -> VersionsFeature -> DownloadFeature -> UploadFeature + -> DocumentationFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload p = - rankIO core versions download upload p +rankPackage core versions download upload docs p = + rankIO core versions download upload docs p >>= (\x -> return $ total x + total (rankPackagePage p)) From 8931732d2cebaa5d4a5c7e9c15cc8fbf321450b8 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 25 Jul 2022 14:32:18 +0200 Subject: [PATCH 28/62] extracted documentation length --- .../Server/Features/PackageRank.hs | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0ff9dfec..c107789c 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -14,6 +14,8 @@ import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import Distribution.Server.Framework.BlobStorage ( BlobId ) +import qualified Distribution.Server.Framework.BlobStorage + as BlobStorage import Distribution.Server.Framework.Feature ( queryState ) import Distribution.Server.Packages.Types @@ -26,6 +28,7 @@ import Distribution.Server.Util.CountingMap import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) +import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) @@ -39,6 +42,8 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import Distribution.Server.Framework.ServerEnv + ( ServerEnv(..) ) import Distribution.Simple.Utils ( safeHead , safeLast ) @@ -54,7 +59,8 @@ instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) scorer :: Double -> Double -> Scorer -scorer maxim scr = if maxim >= scr then (Scorer maxim scr) else (Scorer maxim maxim) +scorer maxim scr = + if maxim >= scr then Scorer maxim scr else Scorer maxim maxim fracScor :: Double -> Double -> Scorer fracScor maxim frac = scorer maxim (maxim * frac) @@ -112,10 +118,11 @@ rankIO -> DownloadFeature -> UploadFeature -> DocumentationFeature + -> ServerEnv -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs pkg = do +rankIO core vers downs upl docs env pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg @@ -139,29 +146,19 @@ rankIO core vers downs upl docs pkg = do downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) packageTarball = pkgLatestTarball . head <$> info - documentTarball :: ServerPartE (Maybe BlobId) - documentTarball = queryDocumentation docs pkgId + documentBlob :: ServerPartE (Maybe BlobId) + documentBlob = queryDocumentation docs pkgId + blobStore = serverBlobStore env + documentation = do + blob <- documentBlob + maybeIO blob + where + maybeIO Nothing = return Nothing + maybeIO (Just a) = liftIO (Just <$> BlobStorage.fetch blobStore a) --- mdocs <- queryState documentationState $ LookupDocumentation pkgid --- case mdocs of --- Nothing -> --- errNotFoundH "Not Found" --- [ MText "There is no documentation for " --- , MLink (display pkgid) ("/package/" ++ display pkgid) --- , MText ". See " --- , MLink canonicalLink canonicalLink --- , MText " for the latest version." --- ] --- where --- -- Essentially errNotFound, but overloaded to specify a header. --- -- (Needed since errNotFound throws away result of setHeaderM) --- errNotFoundH title message = throwError --- (ErrorResponse 404 --- [("Link", canonicalHeader)] --- title message) --- Just blob -> do --- index <- liftIO $ cachedTarIndex blob --- func pkgid blob index + documLines = + (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) + <$> documentation -- 10 is \n authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -273,10 +270,11 @@ rankPackage -> DownloadFeature -> UploadFeature -> DocumentationFeature + -> ServerEnv -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs p = - rankIO core versions download upload docs p +rankPackage core versions download upload docs env p = + rankIO core versions download upload docs env p >>= (\x -> return $ total x + total (rankPackagePage p)) From b5ca917c87125d7412ef24f42f713e683665f9a0 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 27 Jul 2022 22:50:20 +0200 Subject: [PATCH 29/62] got tarEntries for package and fixed it for documentation --- .../Server/Features/PackageRank.hs | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c107789c..9166638f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -10,10 +10,9 @@ import Distribution.Server.Features.Documentation import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State +import Distribution.Server.Features.TarIndexCache import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) -import Distribution.Server.Framework.BlobStorage - ( BlobId ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.Feature @@ -27,6 +26,11 @@ import Distribution.Server.Util.CountingMap ( cmFind ) import Distribution.Types.Version +import Control.Monad ( forM + , join + , liftM2 + , mapM + ) import Control.Monad.IO.Class ( liftIO ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy @@ -37,6 +41,7 @@ import Data.Ord ( comparing , max , min ) +import qualified Data.TarIndex as TarIndex import Data.Time.Clock ( UTCTime(..) , diffUTCTime , getCurrentTime @@ -54,6 +59,7 @@ data Scorer = Scorer { maximum :: Double , score :: Double } + deriving Show instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) @@ -119,10 +125,11 @@ rankIO -> UploadFeature -> DocumentationFeature -> ServerEnv + -> TarIndexCacheFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs env pkg = do +rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg @@ -145,20 +152,32 @@ rankIO core vers downs upl docs env pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageTarball = pkgLatestTarball . head <$> info - documentBlob :: ServerPartE (Maybe BlobId) - documentBlob = queryDocumentation docs pkgId - blobStore = serverBlobStore env - documentation = do + packageTarB = info >>= liftIO . packageTarball tarCache . head + packageTarEntr = do + tarB <- packageTarB + return + . join + $ (\(path, _, index) -> TarIndex.lookup index path) + <$> rightToMaybe tarB + rightToMaybe (Right a) = Just a + rightToMaybe (Left _) = Nothing + documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) + documentBlob = queryDocumentation docs pkgId + documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentationEntry = do + index <- documentIndex + path <- documentPath + return . join $ liftM2 TarIndex.lookup index path + + blobStore = serverBlobStore env + documentPath = do blob <- documentBlob - maybeIO blob - where - maybeIO Nothing = return Nothing - maybeIO (Just a) = liftIO (Just <$> BlobStorage.fetch blobStore a) + return $ (BlobStorage.filepath blobStore) <$> blob - documLines = - (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) - <$> documentation -- 10 is \n + -- TODO fix this + --documLines = + -- (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) + -- <$> documentation -- 10 is \n authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -271,10 +290,11 @@ rankPackage -> UploadFeature -> DocumentationFeature -> ServerEnv + -> TarIndexCacheFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs env p = - rankIO core versions download upload docs env p +rankPackage core versions download upload docs env tarCache p = + rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) From 37ce2fb0e624e5d369f2e56d8624bd6a622e1949 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 29 Jul 2022 17:18:22 +0200 Subject: [PATCH 30/62] added codeScore --- .../Server/Features/PackageRank.hs | 109 ++++++++++++------ 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9166638f..1c4564ea 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,9 +1,9 @@ +{-# LANGUAGE TupleSections #-} + module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) @@ -15,8 +15,8 @@ import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage -import Distribution.Server.Framework.Feature - ( queryState ) +import Distribution.Server.Framework.ServerEnv + ( ServerEnv(..) ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -24,12 +24,18 @@ import Distribution.Server.Users.Group ) import Distribution.Server.Util.CountingMap ( cmFind ) +import Distribution.Package +import Distribution.PackageDescription import Distribution.Types.Version +import Distribution.Simple.Utils ( safeHead + , safeLast + ) +import qualified Distribution.Utils.ShortText as S -import Control.Monad ( forM - , join +import qualified Codec.Archive.Tar as Tar +import qualified Codec.Archive.Tar.Entry as Tar +import Control.Monad ( join , liftM2 - , mapM ) import Control.Monad.IO.Class ( liftIO ) import qualified Data.ByteString.Lazy as BSL @@ -37,23 +43,16 @@ import Data.List ( maximumBy , sortBy ) import Data.Maybe ( isNothing ) -import Data.Ord ( comparing - , max - , min - ) -import qualified Data.TarIndex as TarIndex +import Data.Ord ( comparing ) +import qualified Data.TarIndex as T import Data.Time.Clock ( UTCTime(..) , diffUTCTime , getCurrentTime , nominalDay ) -import Distribution.Server.Framework.ServerEnv - ( ServerEnv(..) ) -import Distribution.Simple.Utils ( safeHead - , safeLast - ) -import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) +import System.FilePath ( isExtensionOf ) +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -133,7 +132,8 @@ rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - return (temp <> versS <> auth) + codeS <- codeScore documentLines srcLines packageLines + return (temp <> versS <> auth <> codeS) where pkgId = package pkg @@ -152,32 +152,53 @@ rankIO core vers downs upl docs env tarCache pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageTarB = info >>= liftIO . packageTarball tarCache . head - packageTarEntr = do - tarB <- packageTarB + packageEntr = do + inf <- info + tarB <- liftIO . packageTarball tarCache . head $ inf return - . join - $ (\(path, _, index) -> TarIndex.lookup index path) - <$> rightToMaybe tarB + $ (\(path, _, index) -> (path, ) <$> T.lookup index path) + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing + documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) - documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) - documentationEntry = do + documentBlob = queryDocumentation docs pkgId + documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentationEntr = do index <- documentIndex path <- documentPath - return . join $ liftM2 TarIndex.lookup index path + return $ liftM2 (,) path (join $ liftM2 T.lookup index path) + documentLines = documentationEntr >>= liftIO . filterLinesTar (const True) + srcLines = packageEntr >>= liftIO . filterLinesTar (isExtensionOf ".hs") + packageLines = packageEntr >>= liftIO . filterLinesTar (const True) + + filterLinesTar + :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double + filterLinesTar f (Just (path, T.TarFileEntry offset)) = + if f path then getLines path offset else return 0 + filterLinesTar f (Just (_, T.TarDir dir)) = + sum <$> mapM (filterLinesTar f . Just) dir + filterLinesTar _ _ = return 0 + + -- TODO if size is too big give it a good score and do not read the file + getLines path offset = do + handle <- SIO.openFile path SIO.ReadMode + SIO.hSeek handle SIO.AbsoluteSeek (fromIntegral $ offset * 512) + header <- BSL.hGet handle 512 + case Tar.read header of + (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do + body <- BSL.hGet handle (fromIntegral siz) + return + $ int2Double + . length + . filter (not . BSL.null) + . BSL.split 10 + $ body + _ -> return 0 - blobStore = serverBlobStore env documentPath = do blob <- documentBlob - return $ (BlobStorage.filepath blobStore) <$> blob - - -- TODO fix this - --documLines = - -- (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) - -- <$> documentation -- 10 is \n + return $ BlobStorage.filepath (serverBlobStore env) <$> blob authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -191,6 +212,20 @@ authorScore upload desc = return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) +codeScore + :: ServerPartE Double + -> ServerPartE Double + -> ServerPartE Double + -> ServerPartE Scorer +codeScore documentL haskellL packageL = do + docum <- documentL + haskell <- haskellL + pkg <- packageL + return + $ boolScor 1 (pkg > 700) + <> boolScor 1 (pkg < 80000) + <> fracScor 2 (min 1 (haskell / 5000)) + <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) versionScore :: ServerPartE [Version] @@ -296,5 +331,3 @@ rankPackage rankPackage core versions download upload docs env tarCache p = rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) - - From 817559dd3c225fabdcf5a4d4d0814b91ca9fb1f5 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 2 Aug 2022 17:10:42 +0200 Subject: [PATCH 31/62] replaced some Features by ListFeature --- .../Server/Features/PackageRank.hs | 81 +++++++++---------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1c4564ea..c7b0c68c 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,32 +4,25 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where +import Distribution.Package +import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.PackageList import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache -import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types -import Distribution.Server.Users.Group - ( queryUserGroups - , size - ) -import Distribution.Server.Util.CountingMap - ( cmFind ) -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead , safeLast ) +import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar @@ -52,7 +45,7 @@ import Data.Time.Clock ( UTCTime(..) ) import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -116,22 +109,20 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation --- TODO CoreFeature can be substituted by CoreResource rankIO :: CoreResource -> VersionsFeature - -> DownloadFeature - -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature + -> ListFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs env tarCache pkg = do +rankIO core vers docs env tarCache list pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg + auth <- authorScore pkg pkgIt codeS <- codeScore documentLines srcLines packageLines return (temp <> versS <> auth <> codeS) @@ -139,6 +130,7 @@ rankIO core vers downs upl docs env tarCache pkg = do pkgId = package pkg pkgNm = pkgName pkgId info = lookupPackageName core pkgNm + pkgIt = safeHead <$> makeItemList list [pkgNm] descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -150,14 +142,18 @@ rankIO core vers downs upl docs env tarCache pkg = do sortBy (flip compare) . map (pkgVersion . package . packageDescription) <$> descriptions - downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + downloadsPerMonth :: ServerPartE (Maybe Int) + downloadsPerMonth = liftIO $ do + items <- pkgIt + return (itemDownloads <$> items) -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do inf <- info - tarB <- liftIO . packageTarball tarCache . head $ inf + tarB <- liftIO $ mapM (packageTarball tarCache) (safeHead inf) return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB + =<< (join $ rightToMaybe <$> tarB) + rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -200,17 +196,19 @@ rankIO core vers downs upl docs env tarCache pkg = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer -authorScore upload desc = +authorScore + :: PackageDescription -> IO (Maybe PackageItem) -> ServerPartE Scorer +authorScore desc item = liftIO maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - - return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) + it <- item + return $ boolScor 3 (nMaint it > 1) <> scorer 5 (int2Double $ nMaint it) + nMaint (Just iT) = length $ itemMaintainer iT + nMaint Nothing = 0 codeScore :: ServerPartE Double @@ -277,16 +275,18 @@ temporalScore :: PackageDescription -> ServerPartE [UTCTime] -> ServerPartE [Version] - -> ServerPartE Int + -> ServerPartE (Maybe Int) -> ServerPartE Scorer -temporalScore p lastUploads versionList downloadsPerMonth = do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ tract <> fresh <> downs +temporalScore p lastUploads versionList downloadsPM = do + download <- downloadsPM + fresh <- freshnessScore + downS <- downloadScore download + tract <- tractionScore download + return $ tract <> fresh <> downS where - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = calcDownScore <$> downloadsPerMonth + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore Nothing = return $ scorer 5 0 + downloadScore (Just downloads) = return $ calcDownScore downloads calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -300,10 +300,10 @@ temporalScore p lastUploads versionList downloadsPerMonth = do _ -> liftIO $ freshness vers (head ups) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching - tractionScore = do + tractionScore Nothing = return $ scorer 1 0 + tractionScore (Just downloads) = do fresh <- packageFreshness - downs <- downloadsPerMonth - return $ boolScor 1 (fresh * int2Double downs > 1000) + return $ boolScor 1 (fresh * int2Double downloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -321,13 +321,12 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: CoreResource -> VersionsFeature - -> DownloadFeature - -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature + -> ListFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs env tarCache p = - rankIO core versions download upload docs env tarCache p +rankPackage core versions docs env tarCache list p = + rankIO core versions docs env tarCache list p >>= (\x -> return $ total x + total (rankPackagePage p)) From 0cd6c96cefbceaaf488e51d468d060a3f0e6d58f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 3 Aug 2022 18:57:20 +0200 Subject: [PATCH 32/62] added some Features to BrowseFeatures - prototype --- src/Distribution/Server/Features.hs | 4 ++++ src/Distribution/Server/Features/Browse.hs | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index 9755fce2..3e72889c 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -375,6 +375,10 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do searchFeature distroFeature candidatesFeature + versionsFeature + documentationCoreFeature + tarIndexCacheFeature + env packageInfoJSONFeature <- mkPackageJSONFeature coreFeature diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index 9b53e01b..72ca04fc 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -26,6 +26,9 @@ import Distribution.Server.Features.PackageList (ListFeature(ListFeature), Packa import Distribution.Server.Features.Search (SearchFeature(SearchFeature), searchPackages) import Distribution.Server.Features.Tags (Tag(..), TagsFeature(TagsFeature), TagsResource, tagUri, tagsResource) import Distribution.Server.Features.Users (UserFeature(UserFeature), UserResource, userResource, userPageUri) +import Distribution.Server.Features.PreferredVersions (VersionsFeature) +import Distribution.Server.Features.Documentation (DocumentationFeature) +import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature) import Distribution.Server.Framework.Error (ErrorResponse(ErrorResponse)) import Distribution.Server.Framework.Feature (HackageFeature(..), emptyHackageFeature) import Distribution.Server.Framework.RequestContentTypes (expectAesonContent) @@ -48,6 +51,10 @@ type BrowseFeature = -> SearchFeature -> DistroFeature -> PackageCandidatesFeature + -> VersionsFeature + -> DocumentationFeature + -> TarIndexCacheFeature + -> ServerEnv -> IO HackageFeature data ResponseFormatWithMethod @@ -64,7 +71,7 @@ initBrowseFeature ServerEnv{serverTemplatesDir, serverTemplatesMode} = do , "noscript-search-form.html" , "noscript-next-page-form.html" ] - pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature -> do + pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature versionsFeature documentationFeature tarIndexCacheFeature serverEnv -> do let html = htmlUtilities coreFeature packageCandidatesFeature tagsFeature userFeature renderer :: ResponseFormatWithMethod -> ServerPartT (ExceptT ErrorResponse IO) Response From 983606fcca9f3e8dcdb251c3d08c2a3eaa6a40c4 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 4 Aug 2022 22:26:54 +0200 Subject: [PATCH 33/62] Revert "added some Features to BrowseFeatures - prototype" This reverts commit 124006fe3c1d3b01942def21d560b6f13b9e6dec. --- src/Distribution/Server/Features.hs | 4 ---- src/Distribution/Server/Features/Browse.hs | 9 +-------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index 3e72889c..9755fce2 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -375,10 +375,6 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do searchFeature distroFeature candidatesFeature - versionsFeature - documentationCoreFeature - tarIndexCacheFeature - env packageInfoJSONFeature <- mkPackageJSONFeature coreFeature diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index 72ca04fc..9b53e01b 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -26,9 +26,6 @@ import Distribution.Server.Features.PackageList (ListFeature(ListFeature), Packa import Distribution.Server.Features.Search (SearchFeature(SearchFeature), searchPackages) import Distribution.Server.Features.Tags (Tag(..), TagsFeature(TagsFeature), TagsResource, tagUri, tagsResource) import Distribution.Server.Features.Users (UserFeature(UserFeature), UserResource, userResource, userPageUri) -import Distribution.Server.Features.PreferredVersions (VersionsFeature) -import Distribution.Server.Features.Documentation (DocumentationFeature) -import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature) import Distribution.Server.Framework.Error (ErrorResponse(ErrorResponse)) import Distribution.Server.Framework.Feature (HackageFeature(..), emptyHackageFeature) import Distribution.Server.Framework.RequestContentTypes (expectAesonContent) @@ -51,10 +48,6 @@ type BrowseFeature = -> SearchFeature -> DistroFeature -> PackageCandidatesFeature - -> VersionsFeature - -> DocumentationFeature - -> TarIndexCacheFeature - -> ServerEnv -> IO HackageFeature data ResponseFormatWithMethod @@ -71,7 +64,7 @@ initBrowseFeature ServerEnv{serverTemplatesDir, serverTemplatesMode} = do , "noscript-search-form.html" , "noscript-next-page-form.html" ] - pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature versionsFeature documentationFeature tarIndexCacheFeature serverEnv -> do + pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature -> do let html = htmlUtilities coreFeature packageCandidatesFeature tagsFeature userFeature renderer :: ResponseFormatWithMethod -> ServerPartT (ExceptT ErrorResponse IO) Response From 514755470f10e867a995d92929a9991e42702137 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 11:20:34 +0200 Subject: [PATCH 34/62] Revert "replaced some Features by ListFeature" This reverts commit a8ae12e6db9353000839dd7b4a97e8f2022d82d4. --- .../Server/Features/PackageRank.hs | 81 ++++++++++--------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c7b0c68c..1c4564ea 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,25 +4,32 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.PackageList +import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache +import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types +import Distribution.Server.Users.Group + ( queryUserGroups + , size + ) +import Distribution.Server.Util.CountingMap + ( cmFind ) +import Distribution.Package +import Distribution.PackageDescription +import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead , safeLast ) -import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar @@ -45,7 +52,7 @@ import Data.Time.Clock ( UTCTime(..) ) import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -109,20 +116,22 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation +-- TODO CoreFeature can be substituted by CoreResource rankIO :: CoreResource -> VersionsFeature + -> DownloadFeature + -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> ListFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers docs env tarCache list pkg = do +rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore pkg pkgIt + auth <- authorScore upl pkg codeS <- codeScore documentLines srcLines packageLines return (temp <> versS <> auth <> codeS) @@ -130,7 +139,6 @@ rankIO core vers docs env tarCache list pkg = do pkgId = package pkg pkgNm = pkgName pkgId info = lookupPackageName core pkgNm - pkgIt = safeHead <$> makeItemList list [pkgNm] descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -142,18 +150,14 @@ rankIO core vers docs env tarCache list pkg = do sortBy (flip compare) . map (pkgVersion . package . packageDescription) <$> descriptions - downloadsPerMonth :: ServerPartE (Maybe Int) - downloadsPerMonth = liftIO $ do - items <- pkgIt - return (itemDownloads <$> items) + downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do inf <- info - tarB <- liftIO $ mapM (packageTarball tarCache) (safeHead inf) + tarB <- liftIO . packageTarball tarCache . head $ inf return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< (join $ rightToMaybe <$> tarB) - + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -196,19 +200,17 @@ rankIO core vers docs env tarCache list pkg = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore - :: PackageDescription -> IO (Maybe PackageItem) -> ServerPartE Scorer -authorScore desc item = +authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore upload desc = liftIO maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do - it <- item - return $ boolScor 3 (nMaint it > 1) <> scorer 5 (int2Double $ nMaint it) - nMaint (Just iT) = length $ itemMaintainer iT - nMaint Nothing = 0 + maint <- queryUserGroups [maintainersGroup upload pkgNm] + + return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) codeScore :: ServerPartE Double @@ -275,18 +277,16 @@ temporalScore :: PackageDescription -> ServerPartE [UTCTime] -> ServerPartE [Version] - -> ServerPartE (Maybe Int) + -> ServerPartE Int -> ServerPartE Scorer -temporalScore p lastUploads versionList downloadsPM = do - download <- downloadsPM - fresh <- freshnessScore - downS <- downloadScore download - tract <- tractionScore download - return $ tract <> fresh <> downS +temporalScore p lastUploads versionList downloadsPerMonth = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract <> fresh <> downs where - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore Nothing = return $ scorer 5 0 - downloadScore (Just downloads) = return $ calcDownScore downloads + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -300,10 +300,10 @@ temporalScore p lastUploads versionList downloadsPM = do _ -> liftIO $ freshness vers (head ups) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching - tractionScore Nothing = return $ scorer 1 0 - tractionScore (Just downloads) = do + tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double downloads > 1000) + downs <- downloadsPerMonth + return $ boolScor 1 (fresh * int2Double downs > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -321,12 +321,13 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: CoreResource -> VersionsFeature + -> DownloadFeature + -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> ListFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions docs env tarCache list p = - rankIO core versions docs env tarCache list p +rankPackage core versions download upload docs env tarCache p = + rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) From 22bdc454ce6b18aa0fd0de92cedb134f1aa1ef2a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 11:48:50 +0200 Subject: [PATCH 35/62] changed ListFeature to fit PackageRank --- src/Distribution/Server/Features.hs | 2 ++ .../Server/Features/PackageList.hs | 25 +++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index 9755fce2..94e1df57 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -291,6 +291,8 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do versionsFeature usersFeature uploadFeature + documentationCoreFeature + tarIndexCacheFeature searchFeature <- mkSearchFeature coreFeature diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index d2b063c2..711ed5ed 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -15,6 +15,9 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Tags import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) +import Distribution.Server.Features.Documentation (DocumentationFeature(..)) +import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) + import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet import Distribution.Server.Users.Group(UserGroup(..), GroupDescription(..)) @@ -41,7 +44,6 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Time.Clock (UTCTime(..)) - data ListFeature = ListFeature { listFeatureInterface :: HackageFeature, @@ -128,6 +130,8 @@ initListFeature :: ServerEnv -> VersionsFeature -> UserFeature -> UploadFeature + -> DocumentationFeature + -> TarIndexCacheFeature -> IO ListFeature) initListFeature _env = do itemCache <- newMemStateWHNF Map.empty @@ -140,11 +144,12 @@ initListFeature _env = do tagsf@TagsFeature{..} versions@VersionsFeature{..} users@UserFeature{..} - uploads@UploadFeature{..} -> do + uploads@UploadFeature{..} + docum tar -> do let (feature, modifyItem, updateDesc) = listFeature core revs download votesf tagsf versions users uploads - itemCache itemUpdate + itemCache itemUpdate docum tar _env registerHookJust packageChangeHook isPackageChangeAny $ \(pkgid, _) -> updateDesc (packageName pkgid) @@ -213,6 +218,9 @@ listFeature :: CoreFeature -> UploadFeature -> MemState (Map PackageName PackageItem) -> Hook (Set PackageName) () + -> DocumentationFeature + -> TarIndexCacheFeature + -> ServerEnv -> (ListFeature, PackageName -> (PackageItem -> PackageItem) -> IO (), PackageName -> IO ()) @@ -226,6 +234,7 @@ listFeature CoreFeature{..} UserFeature{..} UploadFeature{..} itemCache itemUpdate + docum tar env = (ListFeature{..}, modifyItem, updateDesc) where listFeatureInterface = (emptyHackageFeature "list") { @@ -256,7 +265,7 @@ listFeature CoreFeature{..} let pkgs = PackageIndex.lookupPackageName index pkgname case pkgs of [] -> return () --this shouldn't happen - _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem (last pkgs) + _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem pkgs updateDesc pkgname = do index <- queryGetPackageIndex @@ -277,13 +286,15 @@ listFeature CoreFeature{..} constructItemIndex :: IO (Map PackageName PackageItem) constructItemIndex = do index <- queryGetPackageIndex - items <- mapM (constructItem . last) $ PackageIndex.allPackagesByName index + items <- mapM constructItem $ PackageIndex.allPackagesByName index return $ Map.fromList items - constructItem :: PkgInfo -> IO (PackageName, PackageItem) - constructItem pkg = do + constructItem :: [PkgInfo] -> IO (PackageName, PackageItem) + constructItem pkgs = do let pkgname = packageName pkg desc = pkgDesc pkg + pkg = last pkgs + -- [reverse index disabled] revCount <- query . GetReverseCount $ pkgname intRevDirectCount <- revDirectCount pkgname users <- queryGetUserDb tags <- queryTagsForPackage pkgname From 8acc750c61aacf91631505933a60a20ff163d97f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 16:02:58 +0200 Subject: [PATCH 36/62] changed PackageRank to fit in constructItem --- .../Server/Features/PackageRank.hs | 171 +++++++----------- 1 file changed, 66 insertions(+), 105 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1c4564ea..53f552a4 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,7 +4,8 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Server.Features.Core +import Distribution.Package +import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) import Distribution.Server.Features.DownloadCount @@ -12,7 +13,6 @@ import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import Distribution.Server.Features.Upload -import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv @@ -20,39 +20,28 @@ import Distribution.Server.Framework.ServerEnv import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups - , size - ) + , size) import Distribution.Server.Util.CountingMap ( cmFind ) -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead - , safeLast - ) + , safeLast) +import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad ( join - , liftM2 - ) -import Control.Monad.IO.Class ( liftIO ) + , liftM2) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy - ) + , sortBy) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T -import Data.Time.Clock ( UTCTime(..) - , diffUTCTime - , getCurrentTime - , nominalDay - ) +import qualified Data.Time.Clock as CL import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -87,13 +76,13 @@ patches :: Num a => [a] -> a patches (_ : _ : xs) = sum xs patches _ = 0 -numDays :: Maybe UTCTime -> Maybe UTCTime -> Double +numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Double numDays (Just first) (Just end) = - fromRational $ toRational $ diffUTCTime first end / fromRational - (toRational nominalDay) + fromRational $ toRational $ CL.diffUTCTime first end / fromRational + (toRational CL.nominalDay) numDays _ _ = 0 -freshness :: [Version] -> UTCTime -> Bool -> IO Double +freshness :: [Version] -> CL.UTCTime -> Bool -> IO Double freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration @@ -110,7 +99,7 @@ freshness (x : xs) lastUpd app = | major v > 0 = 200 | minor v > 3 = 140 | otherwise = 80 - age = flip numDays (Just lastUpd) . Just <$> getCurrentTime + age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) -- lookupPackageId @@ -118,59 +107,54 @@ freshness (x : xs) lastUpd app = -- TODO CoreFeature can be substituted by CoreResource rankIO - :: CoreResource - -> VersionsFeature + :: VersionsFeature -> DownloadFeature -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> PackageDescription - -> ServerPartE Scorer + -> [PkgInfo] + -> IO Scorer -rankIO core vers downs upl docs env tarCache pkg = do +rankIO vers downs upl docs env tarCache pkgs = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - codeS <- codeScore documentLines srcLines packageLines + codeS <- codeScore documentLines srcLines return (temp <> versS <> auth <> codeS) where - pkgId = package pkg - pkgNm = pkgName pkgId - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + pkg = packageDescription <$> pkgDesc $ last pkgs + pkgId = package pkg + pkgNm = pkgName pkgId + lastUploads = + sortBy (flip compare) + $ (fst . pkgOriginalUploadInfo <$> pkgs) + ++ (fst . pkgLatestUploadInfo <$> pkgs) + versionList :: [Version] + versionList = sortBy (flip compare) + $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + downloadsPerMonth = cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) packageEntr = do - inf <- info - tarB <- liftIO . packageTarball tarCache . head $ inf + tarB <- packageTarball tarCache . head $ pkgs return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing - documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) + documentBlob :: IO (Maybe BlobStorage.BlobId) documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache) documentationEntr = do index <- documentIndex path <- documentPath return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines = documentationEntr >>= liftIO . filterLinesTar (const True) - srcLines = packageEntr >>= liftIO . filterLinesTar (isExtensionOf ".hs") - packageLines = packageEntr >>= liftIO . filterLinesTar (const True) + documentLines :: IO Double + documentLines = documentationEntr >>= filterLinesTar (const True) + srcLines :: IO Double + srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double @@ -188,22 +172,16 @@ rankIO core vers downs upl docs env tarCache pkg = do case Tar.read header of (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do body <- BSL.hGet handle (fromIntegral siz) - return - $ int2Double - . length - . filter (not . BSL.null) - . BSL.split 10 - $ body + return $ int2Double . length . BSL.split 10 $ body _ -> return 0 documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore :: UploadFeature -> PackageDescription -> IO Scorer authorScore upload desc = - liftIO maintScore - >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) + maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer @@ -212,48 +190,37 @@ authorScore upload desc = return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) -codeScore - :: ServerPartE Double - -> ServerPartE Double - -> ServerPartE Double - -> ServerPartE Scorer -codeScore documentL haskellL packageL = do +codeScore :: IO Double -> IO Double -> IO Scorer +codeScore documentL haskellL = do docum <- documentL haskell <- haskellL - pkg <- packageL return - $ boolScor 1 (pkg > 700) - <> boolScor 1 (pkg < 80000) + $ boolScor 1 (haskell > 700) + <> boolScor 1 (haskell < 80000) <> fracScor 2 (min 1 (haskell / 5000)) <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) versionScore - :: ServerPartE [Version] + :: [Version] -> VersionsFeature - -> ServerPartE [UTCTime] + -> [CL.UTCTime] -> PackageDescription - -> ServerPartE Scorer + -> IO Scorer versionScore versionList versions lastUploads desc = do - intUse <- intUsable - depre <- deprec - lUps <- lastUploads - return $ calculateScore depre lUps intUse + use <- intUsable + depre <- deprec + return $ calculateScore depre lastUploads use where pkgNm = pkgName $ package desc partVers = - versionList - >>= (\y -> - liftIO - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions x y) - ) + flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm intUsable = do (norm, _, unpref) <- partVers return $ versionNumbers <$> norm ++ unpref deprec = do (_, deprecN, _) <- partVers return deprecN - calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer + calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) @@ -274,11 +241,7 @@ versionScore versionList versions lastUploads desc = do <> boolScor 5 (not $ null depre) temporalScore - :: PackageDescription - -> ServerPartE [UTCTime] - -> ServerPartE [Version] - -> ServerPartE Int - -> ServerPartE Scorer + :: PackageDescription -> [CL.UTCTime] -> [Version] -> IO Int -> IO Scorer temporalScore p lastUploads versionList downloadsPerMonth = do fresh <- freshnessScore downs <- downloadScore @@ -292,18 +255,15 @@ temporalScore p lastUploads versionList downloadsPerMonth = do / (if isApp then 5 else 6) ) 5 - packageFreshness = do - ups <- lastUploads - vers <- versionList - case ups of - [] -> return 0 - _ -> liftIO $ freshness vers (head ups) isApp + packageFreshness = case lastUploads of + [] -> return 0 + _ -> freshness versionList (head lastUploads) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do + dows <- downloadsPerMonth fresh <- packageFreshness - downs <- downloadsPerMonth - return $ boolScor 1 (fresh * int2Double downs > 1000) + return $ boolScor 1 (fresh * int2Double dows > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -319,15 +279,16 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats -- TODO fix the function Signature replace PackageDescription to PackageName/Identifier rankPackage - :: CoreResource - -> VersionsFeature + :: VersionsFeature -> DownloadFeature -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> PackageDescription - -> ServerPartE Double -rankPackage core versions download upload docs env tarCache p = - rankIO core versions download upload docs env tarCache p - >>= (\x -> return $ total x + total (rankPackagePage p)) + -> [PkgInfo] + -> IO Double +rankPackage versions download upload docs env tarCache pkgs = + total + . (<>) (rankPackagePage pkgD) + <$> rankIO versions download upload docs env tarCache pkgs + where pkgD = packageDescription $ pkgDesc $ last pkgs From 6be29307eaac8437b445731fbc8ccb0a259c8464 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 14:30:46 +0200 Subject: [PATCH 37/62] integrated PackageRank into ListFeature --- .../Server/Features/PackageRank.hs | 70 ++++++++----------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 53f552a4..adf496ef 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,5 +1,7 @@ {-# LANGUAGE TupleSections #-} +-- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank + module Distribution.Server.Features.PackageRank ( rankPackage ) where @@ -8,33 +10,29 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache -import Distribution.Server.Features.Upload import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types -import Distribution.Server.Users.Group - ( queryUserGroups - , size) -import Distribution.Server.Util.CountingMap - ( cmFind ) import Distribution.Simple.Utils ( safeHead - , safeLast) + , safeLast + ) import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad ( join - , liftM2) + , liftM2 + ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy) + , sortBy + ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T @@ -108,25 +106,23 @@ freshness (x : xs) lastUpd app = -- TODO CoreFeature can be substituted by CoreResource rankIO :: VersionsFeature - -> DownloadFeature - -> UploadFeature + -> Int + -> Int -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] -> IO Scorer -rankIO vers downs upl docs env tarCache pkgs = do - temp <- temporalScore pkg lastUploads versionList downloadsPerMonth +rankIO vers recentDownloads maintainers docs env tarCache pkgs = do + temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg codeS <- codeScore documentLines srcLines - return (temp <> versS <> auth <> codeS) + return (temp <> versS <> codeS <> authorScore maintainers pkg) where pkg = packageDescription <$> pkgDesc $ last pkgs pkgId = package pkg - pkgNm = pkgName pkgId lastUploads = sortBy (flip compare) $ (fst . pkgOriginalUploadInfo <$> pkgs) @@ -134,9 +130,7 @@ rankIO vers downs upl docs env tarCache pkgs = do versionList :: [Version] versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) - downloadsPerMonth = cmFind pkgNm <$> recentPackageDownloads downs - -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do tarB <- packageTarball tarCache . head $ pkgs return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) @@ -179,16 +173,12 @@ rankIO vers downs upl docs env tarCache pkgs = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> IO Scorer -authorScore upload desc = - maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) +authorScore :: Int -> PackageDescription -> Scorer +authorScore maintainers desc = + boolScor 1 (not $ S.null $ author desc) <> maintScore where - pkgNm = pkgName $ package desc - maintScore :: IO Scorer - maintScore = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - - return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) + maintScore = + boolScor 3 (maintainers > 1) <> scorer 5 (int2Double maintainers) codeScore :: IO Double -> IO Double -> IO Scorer codeScore documentL haskellL = do @@ -241,15 +231,14 @@ versionScore versionList versions lastUploads desc = do <> boolScor 5 (not $ null depre) temporalScore - :: PackageDescription -> [CL.UTCTime] -> [Version] -> IO Int -> IO Scorer -temporalScore p lastUploads versionList downloadsPerMonth = do + :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> IO Scorer +temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore - downs <- downloadScore tract <- tractionScore - return $ tract <> fresh <> downs + return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = calcDownScore <$> downloadsPerMonth + downloadScore = calcDownScore recentDownloads calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -261,9 +250,8 @@ temporalScore p lastUploads versionList downloadsPerMonth = do freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do - dows <- downloadsPerMonth fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double dows > 1000) + return $ boolScor 1 (fresh * int2Double recentDownloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -280,15 +268,15 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: VersionsFeature - -> DownloadFeature - -> UploadFeature + -> Int + -> Int -> DocumentationFeature - -> ServerEnv -> TarIndexCacheFeature + -> ServerEnv -> [PkgInfo] -> IO Double -rankPackage versions download upload docs env tarCache pkgs = +rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) - <$> rankIO versions download upload docs env tarCache pkgs + <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs where pkgD = packageDescription $ pkgDesc $ last pkgs From 8745c694fbc93806f725fe3b362e1f8f8440ba51 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 14:33:10 +0200 Subject: [PATCH 38/62] --no-edit --- src/Distribution/Server/Features/PackageList.hs | 15 ++++++++++----- src/Distribution/Server/Framework/MemSize.hs | 3 +++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 711ed5ed..47554b94 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -17,6 +17,7 @@ import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) import Distribution.Server.Features.Documentation (DocumentationFeature(..)) import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) +import Distribution.Server.Features.PackageRank import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet @@ -94,6 +95,8 @@ data PackageItem = PackageItem { itemHotness :: !Float, -- Reference version (non-deprecated highest numbered version) itemReferenceVersion :: !String + -- heuristic way to sort packages + itemPackageRank :: !Double } instance MemSize PackageItem where @@ -120,7 +123,6 @@ emptyPackageItem pkg = itemReferenceVersion = "" } - initListFeature :: ServerEnv -> IO (CoreFeature -> ReverseFeature @@ -145,11 +147,11 @@ initListFeature _env = do versions@VersionsFeature{..} users@UserFeature{..} uploads@UploadFeature{..} - docum tar -> do + documentation tar -> do let (feature, modifyItem, updateDesc) = listFeature core revs download votesf tagsf versions users uploads - itemCache itemUpdate docum tar _env + itemCache itemUpdate documentation tar _env registerHookJust packageChangeHook isPackageChangeAny $ \(pkgid, _) -> updateDesc (packageName pkgid) @@ -230,11 +232,11 @@ listFeature CoreFeature{..} DownloadFeature{..} VotesFeature{..} TagsFeature{..} - VersionsFeature{..} + versions@VersionsFeature{..} UserFeature{..} UploadFeature{..} itemCache itemUpdate - docum tar env + documentation tar env = (ListFeature{..}, modifyItem, updateDesc) where listFeatureInterface = (emptyHackageFeature "list") { @@ -303,6 +305,8 @@ listFeature CoreFeature{..} deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) prefsinfo <- queryGetPreferredInfo pkgname + packageR <- rankPackage versions (cmFind pkgname downs) + (UserIdSet.size maintainers) documentation tar env pkgs return $ (,) pkgname . updateReferenceVersion prefsinfo [pkgVersion (pkgInfoId pkg)] $ (updateDescriptionItem desc $ emptyPackageItem pkgname) { itemTags = tags @@ -313,6 +317,7 @@ listFeature CoreFeature{..} , itemLastUpload = fst (pkgOriginalUploadInfo pkg) , itemRevDepsCount = intRevDirectCount , itemHotness = votes + fromIntegral (cmFind pkgname downs) + fromIntegral intRevDirectCount * 2 + , itemPackageRank = packageR } ------------------------------ diff --git a/src/Distribution/Server/Framework/MemSize.hs b/src/Distribution/Server/Framework/MemSize.hs index d98e0008..0fcc1a36 100644 --- a/src/Distribution/Server/Framework/MemSize.hs +++ b/src/Distribution/Server/Framework/MemSize.hs @@ -139,6 +139,9 @@ instance MemSize Integer where instance MemSize Float where memSize _ = 2 +instance MemSize Double where + memSize _ = 3 + instance MemSize UTCTime where memSize _ = 7 From a3bb571bc97bb2a5afb1d9c1cf108018f96143ed Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 17:10:29 +0200 Subject: [PATCH 39/62] tried to add an column and failed --- datafiles/templates/Html/noscript-search-form.html.st | 1 + src/Distribution/Server/Features/Browse.hs | 4 +++- src/Distribution/Server/Features/Browse/ApplyFilter.hs | 1 + src/Distribution/Server/Features/Browse/Options.hs | 5 ++++- src/Distribution/Server/Features/Browse/Parsers.hs | 3 ++- 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/datafiles/templates/Html/noscript-search-form.html.st b/datafiles/templates/Html/noscript-search-form.html.st index 7c1f318e..0193d6e9 100644 --- a/datafiles/templates/Html/noscript-search-form.html.st +++ b/datafiles/templates/Html/noscript-search-form.html.st @@ -23,6 +23,7 @@ + diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index 9b53e01b..fa53d746 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -139,7 +139,8 @@ packageIndexInfoToValue :: CoreResource -> TagsResource -> UserResource -> Packa packageIndexInfoToValue coreResource tagsResource userResource PackageItem{itemName, itemDownloads, itemVotes, - itemDesc, itemTags, itemLastUpload, itemReferenceVersion, itemMaintainer} = + itemDesc, itemTags, itemLastUpload, + itemReferenceVersion, itemMaintainer, itemPackageRank} = object [ Key.fromString "name" .= renderPackage itemName , Key.fromString "downloads" .= itemDownloads @@ -149,6 +150,7 @@ packageIndexInfoToValue , Key.fromString "lastUpload" .= iso8601Show itemLastUpload , Key.fromString "referenceVersion" .= itemReferenceVersion , Key.fromString "maintainers" .= map renderUser itemMaintainer + , Key.fromString "packageRank" .= itemPackageRank ] where renderTag :: Tag -> Value diff --git a/src/Distribution/Server/Features/Browse/ApplyFilter.hs b/src/Distribution/Server/Features/Browse/ApplyFilter.hs index f129109f..4b33c97d 100644 --- a/src/Distribution/Server/Features/Browse/ApplyFilter.hs +++ b/src/Distribution/Server/Features/Browse/ApplyFilter.hs @@ -65,6 +65,7 @@ sort isSearch sortColumn sortDirection = LastUpload -> comparing itemLastUpload ReferenceVersion -> comparing itemReferenceVersion Maintainers -> comparing itemMaintainer + PackageRank -> comparing itemPackageRank in sortBy (maybeReverse comparer) where maybeReverse = diff --git a/src/Distribution/Server/Features/Browse/Options.hs b/src/Distribution/Server/Features/Browse/Options.hs index 64416b35..e8145814 100644 --- a/src/Distribution/Server/Features/Browse/Options.hs +++ b/src/Distribution/Server/Features/Browse/Options.hs @@ -9,7 +9,8 @@ import Distribution.Server.Features.Browse.Parsers (Filter, conditions, condsToF data IsSearch = IsSearch | IsNotSearch -data NormalColumn = Name | Downloads | Rating | Description | Tags | LastUpload | ReferenceVersion | Maintainers +data NormalColumn = Name | Downloads | Rating | Description | Tags | + LastUpload | ReferenceVersion | Maintainers | PackageRank deriving (Show, Eq) data Column = DefaultColumn | NormalColumn NormalColumn @@ -38,6 +39,7 @@ instance FromJSON Column where "lastUpload" -> pure $ NormalColumn LastUpload "referenceVersion" -> pure $ NormalColumn ReferenceVersion "maintainers" -> pure $ NormalColumn Maintainers + "packageRank" -> pure $ NormalColumn PackageRank t -> fail $ "Column invalid: " ++ T.unpack t columnToTemplateName :: Column -> String @@ -51,6 +53,7 @@ columnToTemplateName = \case NormalColumn LastUpload -> "lastUpload" NormalColumn ReferenceVersion -> "referenceVersion" NormalColumn Maintainers -> "maintainers" + NormalColumn PackageRank -> "packageRank" instance FromJSON Direction where parseJSON = diff --git a/src/Distribution/Server/Features/Browse/Parsers.hs b/src/Distribution/Server/Features/Browse/Parsers.hs index 6445bbc1..2775cd79 100644 --- a/src/Distribution/Server/Features/Browse/Parsers.hs +++ b/src/Distribution/Server/Features/Browse/Parsers.hs @@ -85,7 +85,7 @@ allowedAfterOpeningBrace AllowNot = "not " <|> allowedAfterOpeningBrace Disallow allowedAfterOpeningBrace _ = asum [ "downloads", "rating", "lastUpload" , "ageOfLastUpload" - , "tag:", "maintainer:", "deprecated:", "distro:" + , "tag:", "maintainer:", "deprecated:", "distro:", "packageRank" ] -- Whether the 'not' operator can be used. @@ -113,6 +113,7 @@ filterWith allowNot = do "maintainer:" -> MaintainerFilter <$> wordWoSpaceOrParens "deprecated:" -> DeprecatedFilter <$> deprecatedOption "distro:" -> DistroFilter <$> wordWoSpaceOrParens + "packageRank" -> DownloadsFilter <$> opAndSndParam decimal _ -> fail "Impossible since fieldName possibilities are known at compile time" pure filt From e53968cb36b4a802360d9665e2301d0574959c7d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 17:54:45 +0200 Subject: [PATCH 40/62] switch Doubles for Floats --- .../Server/Features/PackageList.hs | 2 +- .../Server/Features/PackageRank.hs | 46 +++++++++---------- src/Distribution/Server/Framework/MemSize.hs | 3 -- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 47554b94..a8ab9c8b 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -96,7 +96,7 @@ data PackageItem = PackageItem { -- Reference version (non-deprecated highest numbered version) itemReferenceVersion :: !String -- heuristic way to sort packages - itemPackageRank :: !Double + itemPackageRank :: !Float } instance MemSize PackageItem where diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index adf496ef..e8bc74fd 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -37,31 +37,31 @@ import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T import qualified Data.Time.Clock as CL -import GHC.Float ( int2Double ) +import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximum :: Double - , score :: Double + { maximum :: Float + , score :: Float } deriving Show instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) -scorer :: Double -> Double -> Scorer +scorer :: Float -> Float -> Scorer scorer maxim scr = if maxim >= scr then Scorer maxim scr else Scorer maxim maxim -fracScor :: Double -> Double -> Scorer +fracScor :: Float -> Float -> Scorer fracScor maxim frac = scorer maxim (maxim * frac) -boolScor :: Double -> Bool -> Scorer +boolScor :: Float -> Bool -> Scorer boolScor k True = Scorer k k boolScor k False = Scorer k 0 -total :: Scorer -> Double +total :: Scorer -> Float total (Scorer a b) = a / b major :: Num a => [a] -> a @@ -74,13 +74,13 @@ patches :: Num a => [a] -> a patches (_ : _ : xs) = sum xs patches _ = 0 -numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Double +numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Float numDays (Just first) (Just end) = fromRational $ toRational $ CL.diffUTCTime first end / fromRational (toRational CL.nominalDay) numDays _ _ = 0 -freshness :: [Version] -> CL.UTCTime -> Bool -> IO Double +freshness :: [Version] -> CL.UTCTime -> Bool -> IO Float freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration @@ -89,7 +89,7 @@ freshness (x : xs) lastUpd app = versionLatest = versionNumbers x daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = int2Double + expectedUpdateInterval = int2Float (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 @@ -145,13 +145,13 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do index <- documentIndex path <- documentPath return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines :: IO Double + documentLines :: IO Float documentLines = documentationEntr >>= filterLinesTar (const True) - srcLines :: IO Double + srcLines :: IO Float srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar - :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double + :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float filterLinesTar f (Just (path, T.TarFileEntry offset)) = if f path then getLines path offset else return 0 filterLinesTar f (Just (_, T.TarDir dir)) = @@ -166,7 +166,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do case Tar.read header of (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do body <- BSL.hGet handle (fromIntegral siz) - return $ int2Double . length . BSL.split 10 $ body + return $ int2Float . length . BSL.split 10 $ body _ -> return 0 documentPath = do @@ -178,9 +178,9 @@ authorScore maintainers desc = boolScor 1 (not $ S.null $ author desc) <> maintScore where maintScore = - boolScor 3 (maintainers > 1) <> scorer 5 (int2Double maintainers) + boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) -codeScore :: IO Double -> IO Double -> IO Scorer +codeScore :: IO Float -> IO Float -> IO Scorer codeScore documentL haskellL = do docum <- documentL haskell <- haskellL @@ -217,15 +217,15 @@ versionScore versionList versions lastUploads desc = do <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) <> scorer 15 - (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) + (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) intUse ) <> scorer 20 - (int2Double $ 4 * length + (int2Float $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - <> scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) <> boolScor 5 (not $ null depre) @@ -240,7 +240,7 @@ temporalScore p lastUploads versionList recentDownloads = do isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads calcDownScore i = Scorer 5 $ min - ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) + ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) 5 @@ -251,14 +251,14 @@ temporalScore p lastUploads versionList recentDownloads = do -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double recentDownloads > 1000) + return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) - desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) + desc = Scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) @@ -274,7 +274,7 @@ rankPackage -> TarIndexCacheFeature -> ServerEnv -> [PkgInfo] - -> IO Double + -> IO Float rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) diff --git a/src/Distribution/Server/Framework/MemSize.hs b/src/Distribution/Server/Framework/MemSize.hs index 0fcc1a36..d98e0008 100644 --- a/src/Distribution/Server/Framework/MemSize.hs +++ b/src/Distribution/Server/Framework/MemSize.hs @@ -139,9 +139,6 @@ instance MemSize Integer where instance MemSize Float where memSize _ = 2 -instance MemSize Double where - memSize _ = 3 - instance MemSize UTCTime where memSize _ = 7 From 16d6e67db9f90d4f0aba34c8a3fe9173799f8287 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 9 Aug 2022 17:48:38 +0200 Subject: [PATCH 41/62] added the column and redid some packageRank issues --- datafiles/static/browse.js | 1 + src/Distribution/Server/Features/PackageRank.hs | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafiles/static/browse.js b/datafiles/static/browse.js index 4c79adcb..90bcec8b 100644 --- a/datafiles/static/browse.js +++ b/datafiles/static/browse.js @@ -135,6 +135,7 @@ const replaceRows = (response) => { tr.appendChild(createLastUpload(row.lastUpload)); tr.appendChild(createSimpleText(row.referenceVersion)); tr.appendChild(createMaintainers(row.maintainers)); + tr.appendChild(createSimpleText(row.packageRank)); l.appendChild(tr); } }; diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e8bc74fd..a6cf628a 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -89,8 +89,8 @@ freshness (x : xs) lastUpd app = versionLatest = versionNumbers x daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = int2Float - (min (versionStabilityInterval versionLatest) $ length (x : xs)) + expectedUpdateInterval = + int2Float (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 | patches v > 0 = 300 @@ -177,8 +177,7 @@ authorScore :: Int -> PackageDescription -> Scorer authorScore maintainers desc = boolScor 1 (not $ S.null $ author desc) <> maintScore where - maintScore = - boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) + maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) codeScore :: IO Float -> IO Float -> IO Scorer codeScore documentL haskellL = do @@ -218,7 +217,7 @@ versionScore versionList versions lastUploads desc = do <> scorer 15 (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) - intUse + intUse ) <> scorer 20 From f90c797d53ed84d75a70b06f0597272caaf19864 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 9 Aug 2022 18:04:18 +0200 Subject: [PATCH 42/62] fixed some basic bugs --- src/Distribution/Server/Features/PackageRank.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index a6cf628a..8462f600 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -42,13 +42,13 @@ import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximum :: Float + { maximumS :: Float , score :: Float } deriving Show instance Semigroup Scorer where - (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) + (Scorer a b) <> (Scorer c d) = Scorer (a + c) (b + d) scorer :: Float -> Float -> Scorer scorer maxim scr = @@ -62,7 +62,7 @@ boolScor k True = Scorer k k boolScor k False = Scorer k 0 total :: Scorer -> Float -total (Scorer a b) = a / b +total (Scorer a b) = b / a major :: Num a => [a] -> a major (x : _) = x @@ -257,7 +257,7 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) - desc = Scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) + desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) From d878f42da05904da041bf172f2c25cee32197e5a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:22:57 +0200 Subject: [PATCH 43/62] removed Browse/parser changes --- src/Distribution/Server/Features/Browse/Parsers.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Distribution/Server/Features/Browse/Parsers.hs b/src/Distribution/Server/Features/Browse/Parsers.hs index 2775cd79..6445bbc1 100644 --- a/src/Distribution/Server/Features/Browse/Parsers.hs +++ b/src/Distribution/Server/Features/Browse/Parsers.hs @@ -85,7 +85,7 @@ allowedAfterOpeningBrace AllowNot = "not " <|> allowedAfterOpeningBrace Disallow allowedAfterOpeningBrace _ = asum [ "downloads", "rating", "lastUpload" , "ageOfLastUpload" - , "tag:", "maintainer:", "deprecated:", "distro:", "packageRank" + , "tag:", "maintainer:", "deprecated:", "distro:" ] -- Whether the 'not' operator can be used. @@ -113,7 +113,6 @@ filterWith allowNot = do "maintainer:" -> MaintainerFilter <$> wordWoSpaceOrParens "deprecated:" -> DeprecatedFilter <$> deprecatedOption "distro:" -> DistroFilter <$> wordWoSpaceOrParens - "packageRank" -> DownloadsFilter <$> opAndSndParam decimal _ -> fail "Impossible since fieldName possibilities are known at compile time" pure filt From a7bcef6f2c583371ab4b33c9276cc951933aa6ff Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:39:50 +0200 Subject: [PATCH 44/62] Fixed missing titile and changed fixed description --- datafiles/templates/Html/browse.html.st | 4 ++++ datafiles/templates/Html/noscript-search-form.html.st | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/datafiles/templates/Html/browse.html.st b/datafiles/templates/Html/browse.html.st index ddc240e7..aa919775 100644 --- a/datafiles/templates/Html/browse.html.st +++ b/datafiles/templates/Html/browse.html.st @@ -93,6 +93,9 @@ #arrow-maintainers { width: 100px; } + #arrow-packageRank { + width: 150px; + } .lastUpload, #sliderAndOutput { white-space: nowrap; } @@ -214,6 +217,7 @@ Last U/L Reference Version Maintainers + Package Rank diff --git a/datafiles/templates/Html/noscript-search-form.html.st b/datafiles/templates/Html/noscript-search-form.html.st index 0193d6e9..55c242af 100644 --- a/datafiles/templates/Html/noscript-search-form.html.st +++ b/datafiles/templates/Html/noscript-search-form.html.st @@ -23,7 +23,7 @@ - + From 6a887b5fb1ad48efc4061a590aed345559b3f936 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:51:04 +0200 Subject: [PATCH 45/62] Strict Scorer --- src/Distribution/Server/Features/PackageRank.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8462f600..a3e9ded0 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections, BangPatterns #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -42,8 +42,8 @@ import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximumS :: Float - , score :: Float + { maximumS :: !Float + , score :: !Float } deriving Show From e881d70352f6ea36e70b5e2fcc9aee6131564256 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 20:58:51 +0200 Subject: [PATCH 46/62] fixed some partial functions --- .../Server/Features/PackageRank.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index a3e9ded0..9b4c8356 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -114,6 +114,7 @@ rankIO -> [PkgInfo] -> IO Scorer +rankIO _ _ _ _ _ _ [] = return (Scorer (118 + 16 + 4 + 1) 0) rankIO vers recentDownloads maintainers docs env tarCache pkgs = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg @@ -131,10 +132,10 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- packageTarball tarCache . head $ pkgs + tarB <- mapM (packageTarball tarCache) (safeHead pkgs) return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB + =<< (join $ rightToMaybe <$> tarB) rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -243,17 +244,18 @@ temporalScore p lastUploads versionList recentDownloads = do / (if isApp then 5 else 6) ) 5 - packageFreshness = case lastUploads of - [] -> return 0 - _ -> freshness versionList (head lastUploads) isApp + packageFreshness = case safeHead lastUploads of + Nothing -> return 0 + (Just l) -> freshness versionList l isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) -rankPackagePage :: PackageDescription -> Scorer -rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats +rankPackagePage :: Maybe PackageDescription -> Scorer +rankPackagePage Nothing = Scorer 233 0 +rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -278,4 +280,4 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs - where pkgD = packageDescription $ pkgDesc $ last pkgs + where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From b2a80ce7f5fdf1c947d52f3ab0044c3ec97202c8 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 21:09:17 +0200 Subject: [PATCH 47/62] fixed some bugs --- src/Distribution/Server/Features/PackageRank.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9b4c8356..855e3887 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -112,17 +112,17 @@ rankIO -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] + -> PkgInfo -> IO Scorer -rankIO _ _ _ _ _ _ [] = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs = do +rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) +rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines return (temp <> versS <> codeS <> authorScore maintainers pkg) where - pkg = packageDescription <$> pkgDesc $ last pkgs pkgId = package pkg lastUploads = sortBy (flip compare) @@ -132,10 +132,10 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- mapM (packageTarball tarCache) (safeHead pkgs) + tarB <- packageTarball tarCache $ pkg return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< (join $ rightToMaybe <$> tarB) + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -279,5 +279,5 @@ rankPackage rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) - <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs + <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs (safeLast pkgs) where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From 3089b6dba79fa4700ad5b24f00f9e3fb1f1cae46 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 21:22:45 +0200 Subject: [PATCH 48/62] fixed a bug --- .../Server/Features/PackageRank.hs | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 855e3887..8c0e7802 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections, BangPatterns #-} +{-# LANGUAGE TupleSections #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -43,7 +43,7 @@ import qualified System.IO as SIO data Scorer = Scorer { maximumS :: !Float - , score :: !Float + , score :: !Float } deriving Show @@ -112,17 +112,18 @@ rankIO -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] - -> PkgInfo + -> Maybe PkgInfo -> IO Scorer rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do +rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines return (temp <> versS <> codeS <> authorScore maintainers pkg) where + pkg = packageDescription $ pkgDesc pkgI pkgId = package pkg lastUploads = sortBy (flip compare) @@ -132,7 +133,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- packageTarball tarCache $ pkg + tarB <- packageTarball tarCache pkgI return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) =<< rightToMaybe tarB @@ -245,8 +246,8 @@ temporalScore p lastUploads versionList recentDownloads = do ) 5 packageFreshness = case safeHead lastUploads of - Nothing -> return 0 - (Just l) -> freshness versionList l isApp + Nothing -> return 0 + (Just l) -> freshness versionList l isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do @@ -254,7 +255,7 @@ temporalScore p lastUploads versionList recentDownloads = do return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) rankPackagePage :: Maybe PackageDescription -> Scorer -rankPackagePage Nothing = Scorer 233 0 +rankPackagePage Nothing = Scorer 233 0 rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) @@ -277,7 +278,12 @@ rankPackage -> [PkgInfo] -> IO Float rankPackage versions recentDownloads maintainers docs tarCache env pkgs = - total - . (<>) (rankPackagePage pkgD) - <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs (safeLast pkgs) + total . (<>) (rankPackagePage pkgD) <$> rankIO versions + recentDownloads + maintainers + docs + env + tarCache + pkgs + (safeLast pkgs) where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From b888ccb5efab4108f5780874e19a6ffb7d374684 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 18 Aug 2022 23:08:04 +0200 Subject: [PATCH 49/62] retrieves src correctly --- .../Server/Features/PackageRank.hs | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8c0e7802..085f7da2 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE BangPatterns #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -120,7 +120,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines - return (temp <> versS <> codeS <> authorScore maintainers pkg) + return $ temp <> versS <> codeS <> authorScore maintainers pkg where pkg = packageDescription $ pkgDesc pkgI @@ -132,13 +132,17 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do versionList :: [Version] versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) - packageEntr = do - tarB <- packageTarball tarCache pkgI - return - $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB - rightToMaybe (Right a) = Just a - rightToMaybe (Left _) = Nothing + srcLines = do + Right (path, _, _) <- packageTarball tarCache pkgI + filterLines (isExtensionOf ".hs") . Tar.read <$> BSL.readFile path + + filterLines f = Tar.foldEntries (countLines f) 0 (const 0) + countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float + countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns + where + !lns = case Tar.entryContent entry of + (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) + _ -> l documentBlob :: IO (Maybe BlobStorage.BlobId) documentBlob = queryDocumentation docs pkgId @@ -149,8 +153,6 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do return $ liftM2 (,) path (join $ liftM2 T.lookup index path) documentLines :: IO Float documentLines = documentationEntr >>= filterLinesTar (const True) - srcLines :: IO Float - srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float From 4748abd1bd52a0f1682bfdcfd043ecac09f0d648 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 19 Aug 2022 22:18:17 +0200 Subject: [PATCH 50/62] fixed documentation retrieval --- .../Server/Features/PackageRank.hs | 59 +++++++------------ 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 085f7da2..9a93e3dd 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -25,21 +25,17 @@ import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar -import qualified Codec.Archive.Tar.Entry as Tar -import Control.Monad ( join - , liftM2 - ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) -import qualified Data.TarIndex as T import qualified Data.Time.Clock as CL import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO + +-- import Debug.Trace (trace) data Scorer = Scorer { maximumS :: !Float @@ -119,7 +115,7 @@ rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documentLines srcLines + codeS <- codeScore documSize srcLines return $ temp <> versS <> codeS <> authorScore maintainers pkg where @@ -134,45 +130,34 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI - filterLines (isExtensionOf ".hs") . Tar.read <$> BSL.readFile path + filterLines (isExtensionOf ".hs") countLines + . Tar.read + <$> BSL.readFile path + documSize = do + path <- documentPath + case path of + Nothing -> return 0 + Just pth -> + filterLines (isExtensionOf ".html") countSize + . Tar.read + <$> BSL.readFile pth - filterLines f = Tar.foldEntries (countLines f) 0 (const 0) + filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns where !lns = case Tar.entryContent entry of (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) _ -> l + countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float + countSize f entry l = if not . f . Tar.entryPath $ entry then l else s + where + !s = case Tar.entryContent entry of + (Tar.NormalFile _ siz) -> l + fromInteger (toInteger siz) + _ -> l documentBlob :: IO (Maybe BlobStorage.BlobId) - documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache) - documentationEntr = do - index <- documentIndex - path <- documentPath - return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines :: IO Float - documentLines = documentationEntr >>= filterLinesTar (const True) - - filterLinesTar - :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float - filterLinesTar f (Just (path, T.TarFileEntry offset)) = - if f path then getLines path offset else return 0 - filterLinesTar f (Just (_, T.TarDir dir)) = - sum <$> mapM (filterLinesTar f . Just) dir - filterLinesTar _ _ = return 0 - - -- TODO if size is too big give it a good score and do not read the file - getLines path offset = do - handle <- SIO.openFile path SIO.ReadMode - SIO.hSeek handle SIO.AbsoluteSeek (fromIntegral $ offset * 512) - header <- BSL.hGet handle 512 - case Tar.read header of - (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do - body <- BSL.hGet handle (fromIntegral siz) - return $ int2Float . length . BSL.split 10 $ body - _ -> return 0 - + documentBlob = queryDocumentation docs pkgId documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob From 7609a8a3386565b659d600639dafee05da4a6f49 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 23 Aug 2022 23:20:34 +0200 Subject: [PATCH 51/62] changed the algorithm to match cargo --- .../Server/Features/PackageList.hs | 3 +- .../Server/Features/PackageRank.hs | 122 +++++++++++------- 2 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index a8ab9c8b..c80dbfcc 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -35,6 +35,7 @@ import Distribution.PackageDescription.Configuration import Distribution.Pretty (prettyShow) import Distribution.Types.Version (Version) import Distribution.Utils.ShortText (fromShortText) +import Distribution.Simple.Utils (safeLast) import Control.Concurrent import qualified Data.List.NonEmpty as NE @@ -306,7 +307,7 @@ listFeature CoreFeature{..} maintainers <- queryUserGroup (maintainersGroup pkgname) prefsinfo <- queryGetPreferredInfo pkgname packageR <- rankPackage versions (cmFind pkgname downs) - (UserIdSet.size maintainers) documentation tar env pkgs + (UserIdSet.size maintainers) documentation tar env pkgs (safeLast pkgs) return $ (,) pkgname . updateReferenceVersion prefsinfo [pkgVersion (pkgInfoId pkg)] $ (updateDescriptionItem desc $ emptyPackageItem pkgname) { itemTags = tags diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9a93e3dd..ac5c49ba 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -32,6 +32,7 @@ import Data.List ( maximumBy import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL +import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) @@ -60,6 +61,9 @@ boolScor k False = Scorer k 0 total :: Scorer -> Float total (Scorer a b) = b / a +scale :: Float -> Scorer -> Scorer +scale mx sc = fracScor mx (total sc) + major :: Num a => [a] -> a major (x : _) = x major _ = 0 @@ -96,38 +100,46 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) --- lookupPackageId --- queryHasDocumentation +cabalScore :: PackageDescription -> IO Bool -> IO Scorer +cabalScore p docum = + (<>) (tests <> benchs <> desc <> homeP <> sourceRp <> cats) + <$> (boolScor 30 <$> docum) + where + tests = boolScor 50 (hasTests p) + benchs = boolScor 10 (hasBenchmarks p) + desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) + -- documentation = boolScor 30 () + homeP = boolScor 30 (not $ S.null $ homepage p) + sourceRp = boolScor 8 (not $ null $ sourceRepos p) + cats = boolScor 5 (not $ S.null $ category p) + +readmeScore _ = Scorer 0 0 --- TODO CoreFeature can be substituted by CoreResource -rankIO +-- queryHasDocumentation +baseScore :: VersionsFeature -> Int - -> Int -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> [PkgInfo] - -> Maybe PkgInfo + -> [Version] + -> [CL.UTCTime] + -> PkgInfo -> IO Scorer -rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do - temp <- temporalScore pkg lastUploads versionList recentDownloads - versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documSize srcLines - return $ temp <> versS <> codeS <> authorScore maintainers pkg - +baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do + versS <- versionScore versionList vers lastUploads pkg + codeS <- codeScore documSize srcLines + cabalS <- cabalScore pkg documHas + return + $ scale 5 versS + <> scale 2 codeS + <> scale 3 (authorScore maintainers pkg) + <> scale 2 cabalS + <> scale 5 (readmeScore readme) where - pkg = packageDescription $ pkgDesc pkgI - pkgId = package pkg - lastUploads = - sortBy (flip compare) - $ (fst . pkgOriginalUploadInfo <$> pkgs) - ++ (fst . pkgLatestUploadInfo <$> pkgs) - versionList :: [Version] - versionList = sortBy (flip compare) - $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + pkg = packageDescription $ pkgDesc pkgI + pkgId = package pkg srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI filterLines (isExtensionOf ".hs") countLines @@ -141,6 +153,8 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do filterLines (isExtensionOf ".html") countSize . Tar.read <$> BSL.readFile pth + readme = findToplevelFile tarCache pkgI isReadmeFile + >>= either (\_ -> return Nothing) (return . Just) filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float @@ -161,6 +175,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob + documHas = queryHasDocumentation docs pkgId authorScore :: Int -> PackageDescription -> Scorer authorScore maintainers desc = @@ -169,14 +184,14 @@ authorScore maintainers desc = maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) codeScore :: IO Float -> IO Float -> IO Scorer -codeScore documentL haskellL = do - docum <- documentL +codeScore documentS haskellL = do + docum <- documentS haskell <- haskellL return $ boolScor 1 (haskell > 700) <> boolScor 1 (haskell < 80000) <> fracScor 2 (min 1 (haskell / 5000)) - <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) + <> fracScor 2 (min 1 docum / ((3000 + haskell) * 200)) versionScore :: [Version] @@ -241,20 +256,6 @@ temporalScore p lastUploads versionList recentDownloads = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) -rankPackagePage :: Maybe PackageDescription -> Scorer -rankPackagePage Nothing = Scorer 233 0 -rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats - where - tests = boolScor 50 (hasTests p) - benchs = boolScor 10 (hasBenchmarks p) - desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) - -- documentation = boolScor 30 () - homeP = boolScor 30 (not $ S.null $ homepage p) - sourceRp = boolScor 8 (not $ null $ sourceRepos p) - cats = boolScor 5 (not $ S.null $ category p) - --- TODO fix the function Signature replace PackageDescription to PackageName/Identifier - rankPackage :: VersionsFeature -> Int @@ -263,14 +264,35 @@ rankPackage -> TarIndexCacheFeature -> ServerEnv -> [PkgInfo] + -> Maybe PkgInfo -> IO Float -rankPackage versions recentDownloads maintainers docs tarCache env pkgs = - total . (<>) (rankPackagePage pkgD) <$> rankIO versions - recentDownloads - maintainers - docs - env - tarCache - pkgs - (safeLast pkgs) - where pkgD = packageDescription . pkgDesc <$> safeLast pkgs +rankPackage _ _ _ _ _ _ _ Nothing = return 0 +rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pkgUsed) + = do + t <- temporalScore pkgD uploads versionList recentDownloads + + b <- baseScore versions + maintainers + docs + env + tarCache + versionList + uploads + pkgUsed + depr <- deprP + return $ sAverage t b * case depr of + Nothing -> 1 + _ -> 0.2 + where + pkgname = pkgName . package $ pkgD + pkgD = packageDescription . pkgDesc $ pkgUsed + deprP = queryGetDeprecatedFor versions pkgname + sAverage x y = (total x + total y) * 0.5 + + versionList :: [Version] + versionList = sortBy (flip compare) + $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + uploads = + sortBy (flip compare) + $ (fst . pkgOriginalUploadInfo <$> pkgs) + ++ (fst . pkgLatestUploadInfo <$> pkgs) From f26effe1d3cd0ad924a66ff42fe9fee508478487 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 25 Aug 2022 23:11:02 +0200 Subject: [PATCH 52/62] prototype for readme parser (collects some info about markdown) --- hackage-server.cabal | 1 + src/Distribution/Server/Features/PackageRank.hs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hackage-server.cabal b/hackage-server.cabal index 9ce2c883..f7a0c2ae 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -399,6 +399,7 @@ library Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap Distribution.Server.Features.PackageRank + Distribution.Server.Features.PackageRank.Parser Distribution.Server.Util.NLP.Snowball if flag(debug) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ac5c49ba..0cef111f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,6 +6,8 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where +import Distribution.Server.Features.PackageRank.Parser + import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation From 5f38c6b6bb83ef30c17c9469a90a33ad8a0da25b Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 25 Aug 2022 23:20:46 +0200 Subject: [PATCH 53/62] forgot to add the parser --- .../Server/Features/PackageRank/Parser.hs | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 src/Distribution/Server/Features/PackageRank/Parser.hs diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageRank/Parser.hs new file mode 100644 index 00000000..5b02ed59 --- /dev/null +++ b/src/Distribution/Server/Features/PackageRank/Parser.hs @@ -0,0 +1,104 @@ +{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} +module Distribution.Server.Features.PackageRank.Parser + ( parseM + ) where + + +import Commonmark +import Commonmark.Extensions +import Control.Monad +import Control.Monad.Identity +import qualified Data.ByteString.Lazy as BS + ( ByteString + , toStrict + ) +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Encoding.Error as T + ( lenientDecode ) +import qualified Data.Text.IO as TIO +import qualified Data.Text.Lazy.IO as TLIO +import Data.Typeable ( Typeable ) +import System.FilePath + +type MarkdownRenderable a b + = (Typeable a, HasPipeTable a b, IsBlock a b, IsInline a) + +parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] +parseM md name = runIdentity + (commonmarkWith (pipeTableSpec <> defaultSyntaxSpec) name txt) + where txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md + +data MStats = MStats Int Int --number of pictures, number of chars + deriving Show + +instance Monoid MStats where + mempty = MStats 0 0 + +instance Rangeable MStats where + ranged = const id + +instance HasAttributes MStats where + addAttributes = const id + +instance Semigroup MStats where + (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) + +data MarkdownStats = NotImportant | + HCode MStats | + Code MStats | + Section | -- Int? + Table Int | + PText MStats | + List Int + deriving (Show) + +sumMStat [] = mempty +sumMStat (x : xs) = case x of + NotImportant -> sumMStat xs + Section -> sumMStat xs + (List a) -> sumMStat xs + (Table a) -> sumMStat xs + (HCode a) -> a <> sumMStat xs + (Code a) -> a <> sumMStat xs + (PText a) -> a <> sumMStat xs + +instance Rangeable [MarkdownStats] where + ranged = const id + +instance HasAttributes [MarkdownStats] where + addAttributes = const id + +instance HasPipeTable MStats [MarkdownStats] where + pipeTable _ _ rows = [Table $ length rows] + +instance IsInline MStats where + lineBreak = MStats 0 1 + softBreak = MStats 0 1 + str t = MStats 0 (T.length t) + entity t = MStats 0 (T.length t) + escapedChar _ = MStats 0 1 + emph = id + strong = id + link _ _ a = a + image _ _ (MStats a b) = MStats (a + 1) b + code t = MStats 0 (T.length t) + rawInline _ t = MStats 0 (T.length t) + +instance IsBlock MStats [MarkdownStats] where + paragraph a = [PText a] + plain a = [PText a] + thematicBreak = [NotImportant] + blockQuote = id + codeBlock language codeT | language == T.pack "haskell" = [HCode (code codeT)] + | otherwise = [Code (code codeT)] + heading _ _ = [Section] + rawBlock _ r = [NotImportant] + referenceLinkDefinition _ _ = [NotImportant] + list _ _ l = [List (length l + depSum l)] + +depSum [] = 0 +depSum ([] : xs) = depSum xs +depSum ((List a : ys) : xs) = a + depSum (ys : xs) +depSum ((_ : ys) : xs) = depSum (ys : xs) + From 2ba5071fa8a87b6aca0f23f2e49ae12a020f03ad Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 26 Aug 2022 23:38:50 +0200 Subject: [PATCH 54/62] finished readmeScore --- .../Server/Features/PackageRank.hs | 85 +++++++++++++------ .../Server/Features/PackageRank/Parser.hs | 82 +++++++++++------- 2 files changed, 112 insertions(+), 55 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0cef111f..dca027e0 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,8 +6,9 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageRank.Parser +import Distribution.Server.Features.PackageRank.Parser +import Data.TarIndex ( TarEntryOffset ) import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation @@ -17,9 +18,14 @@ import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import qualified Distribution.Server.Framework.BlobStorage as BlobStorage +import Distribution.Server.Framework.CacheControl import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types +import Distribution.Server.Util.Markdown + ( supposedToBeMarkdown ) +import Distribution.Server.Util.ServeTarball + ( loadTarEntry ) import Distribution.Simple.Utils ( safeHead , safeLast ) @@ -38,8 +44,6 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) --- import Debug.Trace (trace) - data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -54,7 +58,7 @@ scorer maxim scr = if maxim >= scr then Scorer maxim scr else Scorer maxim maxim fracScor :: Float -> Float -> Scorer -fracScor maxim frac = scorer maxim (maxim * frac) +fracScor maxim frac = scorer maxim (min (maxim * frac) maxim) boolScor :: Float -> Bool -> Scorer boolScor k True = Scorer k k @@ -102,10 +106,9 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) -cabalScore :: PackageDescription -> IO Bool -> IO Scorer +cabalScore :: PackageDescription -> Bool -> Scorer cabalScore p docum = - (<>) (tests <> benchs <> desc <> homeP <> sourceRp <> cats) - <$> (boolScor 30 <$> docum) + tests <> benchs <> desc <> homeP <> sourceRp <> cats <> boolScor 30 docum where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -115,9 +118,38 @@ cabalScore p docum = sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) -readmeScore _ = Scorer 0 0 +readmeScore + :: Maybe (FilePath, ETag, Data.TarIndex.TarEntryOffset, FilePath) + -> Bool + -> IO Scorer +readmeScore Nothing _ = return $ Scorer 1 0 -- readmeScore is scaled so it does not need correct max +readmeScore (Just (tarfile, _, offset, name)) app = do + entr <- loadTarEntry tarfile offset + case entr of + (Right (size, str)) -> return $ calcScore str size name + _ -> return $ Scorer 1 0 + where + calcScore str size filename = + scorer 75 (min 1 (fromInteger (toInteger size) / 3000)) + <> if supposedToBeMarkdown filename + then case parseM str filename of + Left _ -> Scorer 0 0 + Right mdStats -> format mdStats + else Scorer 0 0 + format stats = + fracScor (if app then 25 else 100) (min 1 $ int2Float hlength / 2000) + <> scorer (if app then 15 else 27) (int2Float blocks * 3) + <> boolScor (if app then 10 else 30) (clength > 150) + <> scorer 35 (int2Float images * 10) + <> scorer 30 (int2Float sections * 4) + <> scorer 25 (int2Float rows * 2) + where + (blocks, clength) = getCode stats + (_ , hlength) = getHCode stats + MStats _ images = sumMStat stats + rows = getListsTables stats + sections = getSections stats --- queryHasDocumentation baseScore :: VersionsFeature -> Int @@ -130,18 +162,25 @@ baseScore -> IO Scorer baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do - versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documSize srcLines - cabalS <- cabalScore pkg documHas + + readM <- readme + hasDocum <- documHas + documS <- documSize + srcL <- srcLines + + versS <- versionScore versionList vers lastUploads pkg + readmeS <- readmeScore readM isApp + return $ scale 5 versS - <> scale 2 codeS + <> scale 2 (codeScore documS srcL) <> scale 3 (authorScore maintainers pkg) - <> scale 2 cabalS - <> scale 5 (readmeScore readme) + <> scale 2 (cabalScore pkg hasDocum) + <> scale 5 readmeS where pkg = packageDescription $ pkgDesc pkgI pkgId = package pkg + isApp = (isNothing . library) pkg && (not . null . executables) pkg srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI filterLines (isExtensionOf ".hs") countLines @@ -165,6 +204,7 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do !lns = case Tar.entryContent entry of (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) _ -> l + -- TODO might need to decode/add the other separator countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countSize f entry l = if not . f . Tar.entryPath $ entry then l else s where @@ -185,15 +225,12 @@ authorScore maintainers desc = where maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) -codeScore :: IO Float -> IO Float -> IO Scorer -codeScore documentS haskellL = do - docum <- documentS - haskell <- haskellL - return - $ boolScor 1 (haskell > 700) - <> boolScor 1 (haskell < 80000) - <> fracScor 2 (min 1 (haskell / 5000)) - <> fracScor 2 (min 1 docum / ((3000 + haskell) * 200)) +codeScore :: Float -> Float -> Scorer +codeScore documentS haskellL = + boolScor 1 (haskellL > 700) + <> boolScor 1 (haskellL < 80000) + <> fracScor 2 (min 1 (haskellL / 5000)) + <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 200)) versionScore :: [Version] diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageRank/Parser.hs index 5b02ed59..431228d8 100644 --- a/src/Distribution/Server/Features/PackageRank/Parser.hs +++ b/src/Distribution/Server/Features/PackageRank/Parser.hs @@ -1,12 +1,17 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} module Distribution.Server.Features.PackageRank.Parser ( parseM + , sumMStat + , getListsTables + , getCode + , getHCode + , getSections + , MStats(..) ) where import Commonmark import Commonmark.Extensions -import Control.Monad import Control.Monad.Identity import qualified Data.ByteString.Lazy as BS ( ByteString @@ -16,13 +21,6 @@ import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding.Error as T ( lenientDecode ) -import qualified Data.Text.IO as TIO -import qualified Data.Text.Lazy.IO as TLIO -import Data.Typeable ( Typeable ) -import System.FilePath - -type MarkdownRenderable a b - = (Typeable a, HasPipeTable a b, IsBlock a b, IsInline a) parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] parseM md name = runIdentity @@ -44,24 +42,51 @@ instance HasAttributes MStats where instance Semigroup MStats where (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) -data MarkdownStats = NotImportant | +data MarkdownStats = NotImportant MStats | HCode MStats | Code MStats | - Section | -- Int? - Table Int | + Section MStats | + Table Int MStats | -- Int of rows PText MStats | - List Int + List Int MStats -- Int of elements deriving (Show) +getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code +getCode [] = (0, 0) +getCode (Code (MStats code _) : xs) = (1, code) >< getCode xs +getCode (HCode (MStats code _) : xs) = (1, code) >< getCode xs +getCode (_ : xs) = getCode xs + +getHCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code +getHCode [] = (0, 0) +getHCode (HCode (MStats code _) : xs) = (1, code) >< getHCode xs +getHCode (_ : xs) = getHCode xs + +getSections :: [MarkdownStats] -> Int -- number of code blocks, size of code +getSections [] = 0 +getSections (Section _ : xs) = 1 + getSections xs +getSections (_ : xs) = getSections xs + +(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) +(><) (a, b) (c, d) = (a + c, b + d) + + +sumMStat :: [MarkdownStats] -> MStats sumMStat [] = mempty sumMStat (x : xs) = case x of - NotImportant -> sumMStat xs - Section -> sumMStat xs - (List a) -> sumMStat xs - (Table a) -> sumMStat xs - (HCode a) -> a <> sumMStat xs - (Code a) -> a <> sumMStat xs - (PText a) -> a <> sumMStat xs + (NotImportant a) -> a <> sumMStat xs + (Section a) -> a <> sumMStat xs + (List _ a ) -> a <> sumMStat xs + (Table _ a ) -> a <> sumMStat xs + (HCode a ) -> a <> sumMStat xs + (Code a ) -> a <> sumMStat xs + (PText a ) -> a <> sumMStat xs + +getListsTables :: [MarkdownStats] -> Int +getListsTables [] = 0 +getListsTables ((List a _) : ys) = a + getListsTables ys +getListsTables ((Table a _) : ys) = a + getListsTables ys +getListsTables (_ : ys) = getListsTables ys instance Rangeable [MarkdownStats] where ranged = const id @@ -70,7 +95,7 @@ instance HasAttributes [MarkdownStats] where addAttributes = const id instance HasPipeTable MStats [MarkdownStats] where - pipeTable _ _ rows = [Table $ length rows] + pipeTable _ _ rows = [Table (length rows) (mconcat $ mconcat <$> rows)] instance IsInline MStats where lineBreak = MStats 0 1 @@ -88,17 +113,12 @@ instance IsInline MStats where instance IsBlock MStats [MarkdownStats] where paragraph a = [PText a] plain a = [PText a] - thematicBreak = [NotImportant] + thematicBreak = [NotImportant mempty] blockQuote = id codeBlock language codeT | language == T.pack "haskell" = [HCode (code codeT)] | otherwise = [Code (code codeT)] - heading _ _ = [Section] - rawBlock _ r = [NotImportant] - referenceLinkDefinition _ _ = [NotImportant] - list _ _ l = [List (length l + depSum l)] - -depSum [] = 0 -depSum ([] : xs) = depSum xs -depSum ((List a : ys) : xs) = a + depSum (ys : xs) -depSum ((_ : ys) : xs) = depSum (ys : xs) - + heading _ a = [Section a] + rawBlock _ _ = [NotImportant mempty] + referenceLinkDefinition _ _ = [NotImportant mempty] + list _ _ l = [List (length l + sumLT l) (mconcat $ sumMStat <$> l)] + where sumLT a = sum (getListsTables <$> a) From a3c81fa69595631ebc12e5016a200566d8e0572f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 27 Aug 2022 22:34:11 +0200 Subject: [PATCH 55/62] changed documentation parameter to get reasonable output --- src/Distribution/Server/Features/PackageRank.hs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index dca027e0..334427f8 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -113,7 +113,6 @@ cabalScore p docum = tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) - -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) @@ -230,7 +229,7 @@ codeScore documentS haskellL = boolScor 1 (haskellL > 700) <> boolScor 1 (haskellL < 80000) <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 200)) + <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 1600)) versionScore :: [Version] @@ -281,11 +280,10 @@ temporalScore p lastUploads versionList recentDownloads = do where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = Scorer 5 $ min + calcDownScore i = scorer 5 $ min ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) - 5 packageFreshness = case safeHead lastUploads of Nothing -> return 0 (Just l) -> freshness versionList l isApp From ead8f6b12fa2294e65e4cf6d43d7eb16abf78743 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 27 Aug 2022 23:35:03 +0200 Subject: [PATCH 56/62] changed some parameters to reflect hackage --- .../Server/Features/PackageRank.hs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 334427f8..341dacba 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -110,7 +110,7 @@ cabalScore :: PackageDescription -> Bool -> Scorer cabalScore p docum = tests <> benchs <> desc <> homeP <> sourceRp <> cats <> boolScor 30 docum where - tests = boolScor 50 (hasTests p) + tests = boolScor 30 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) homeP = boolScor 30 (not $ S.null $ homepage p) @@ -169,7 +169,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do versS <- versionScore versionList vers lastUploads pkg readmeS <- readmeScore readM isApp - return $ scale 5 versS <> scale 2 (codeScore documS srcL) @@ -229,7 +228,7 @@ codeScore documentS haskellL = boolScor 1 (haskellL > 700) <> boolScor 1 (haskellL < 80000) <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 1600)) + <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) versionScore :: [Version] @@ -252,10 +251,9 @@ versionScore versionList versions lastUploads desc = do (_, deprecN, _) <- partVers return deprecN calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer - calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) - <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) <> scorer 15 (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) @@ -276,13 +274,16 @@ temporalScore temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore tract <- tractionScore + -- Reverse dependencies are to be done + + f <- packageFreshness return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = scorer 5 $ min - ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) - / (if isApp then 5 else 6) + calcDownScore i = fracScor 5 + ( (logBase 2 (int2Float $ max 0 (i - 32) + 32) - 5) + / (if isApp then 6 else 8) ) packageFreshness = case safeHead lastUploads of Nothing -> return 0 @@ -291,7 +292,7 @@ temporalScore p lastUploads versionList recentDownloads = do -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) + return $ boolScor 1 (fresh * int2Float recentDownloads > 200) rankPackage :: VersionsFeature From 9d4d811402325c74d3ce4a83b385057f2fcbcfe4 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 28 Aug 2022 21:23:06 +0200 Subject: [PATCH 57/62] moved PackageRank into PackageList Feature and changed UI so packageRank will display as Int between 1000 and 0 --- hackage-server.cabal | 4 ++-- src/Distribution/Server/Features/Browse.hs | 3 ++- src/Distribution/Server/Features/PackageList.hs | 2 +- .../{PackageRank/Parser.hs => PackageList/MStats.hs} | 8 ++++---- .../Server/Features/{ => PackageList}/PackageRank.hs | 8 +++----- 5 files changed, 12 insertions(+), 13 deletions(-) rename src/Distribution/Server/Features/{PackageRank/Parser.hs => PackageList/MStats.hs} (94%) rename src/Distribution/Server/Features/{ => PackageList}/PackageRank.hs (98%) diff --git a/hackage-server.cabal b/hackage-server.cabal index f7a0c2ae..49093162 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -348,6 +348,8 @@ library Distribution.Server.Features.PackageCandidates.Backup Distribution.Server.Features.PackageFeed Distribution.Server.Features.PackageList + Distribution.Server.Features.PackageList.PackageRank + Distribution.Server.Features.PackageList.MStats Distribution.Server.Features.Distro Distribution.Server.Features.Distro.Distributions Distribution.Server.Features.Distro.Backup @@ -398,8 +400,6 @@ library Distribution.Server.Features.StaticFiles Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap - Distribution.Server.Features.PackageRank - Distribution.Server.Features.PackageRank.Parser Distribution.Server.Util.NLP.Snowball if flag(debug) diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index fa53d746..33c41f5e 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -10,6 +10,7 @@ import qualified Data.Set as S import Data.Time (getCurrentTime) import Data.Time.Format.ISO8601 (iso8601Show) import System.FilePath (()) +import GHC.Float.RealFracMethods (roundFloatInteger) import Data.Aeson (Value(Array), object, toJSON, (.=)) import qualified Data.Aeson.Key as Key @@ -150,7 +151,7 @@ packageIndexInfoToValue , Key.fromString "lastUpload" .= iso8601Show itemLastUpload , Key.fromString "referenceVersion" .= itemReferenceVersion , Key.fromString "maintainers" .= map renderUser itemMaintainer - , Key.fromString "packageRank" .= itemPackageRank + , Key.fromString "packageRank" .= (roundFloatInteger (1000 * itemPackageRank)) ] where renderTag :: Tag -> Value diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index c80dbfcc..8d3706dc 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -17,7 +17,7 @@ import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) import Distribution.Server.Features.Documentation (DocumentationFeature(..)) import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) -import Distribution.Server.Features.PackageRank +import Distribution.Server.Features.PackageList.PackageRank import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageList/MStats.hs similarity index 94% rename from src/Distribution/Server/Features/PackageRank/Parser.hs rename to src/Distribution/Server/Features/PackageList/MStats.hs index 431228d8..33934ebb 100644 --- a/src/Distribution/Server/Features/PackageRank/Parser.hs +++ b/src/Distribution/Server/Features/PackageList/MStats.hs @@ -1,5 +1,5 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} -module Distribution.Server.Features.PackageRank.Parser +module Distribution.Server.Features.PackageList.MStats ( parseM , sumMStat , getListsTables @@ -53,13 +53,13 @@ data MarkdownStats = NotImportant MStats | getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getCode [] = (0, 0) -getCode (Code (MStats code _) : xs) = (1, code) >< getCode xs -getCode (HCode (MStats code _) : xs) = (1, code) >< getCode xs +getCode (Code (MStats codeT _) : xs) = (1, codeT) >< getCode xs +getCode (HCode (MStats codeT _) : xs) = (1, codeT) >< getCode xs getCode (_ : xs) = getCode xs getHCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getHCode [] = (0, 0) -getHCode (HCode (MStats code _) : xs) = (1, code) >< getHCode xs +getHCode (HCode (MStats codeT _) : xs) = (1, codeT) >< getHCode xs getHCode (_ : xs) = getHCode xs getSections :: [MarkdownStats] -> Int -- number of code blocks, size of code diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs similarity index 98% rename from src/Distribution/Server/Features/PackageRank.hs rename to src/Distribution/Server/Features/PackageList/PackageRank.hs index 341dacba..367731be 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -2,11 +2,11 @@ -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank -module Distribution.Server.Features.PackageRank +module Distribution.Server.Features.PackageList.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageRank.Parser +import Distribution.Server.Features.PackageList.MStats import Data.TarIndex ( TarEntryOffset ) import Distribution.Package @@ -274,9 +274,7 @@ temporalScore temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore tract <- tractionScore - -- Reverse dependencies are to be done - - f <- packageFreshness + -- Reverse dependencies are added return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p From 2cd996b072931d222bb3edbab29bafcf4061338d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 30 Aug 2022 22:44:42 +0200 Subject: [PATCH 58/62] added some Exception handling --- .../Features/PackageList/PackageRank.hs | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index 367731be..c58f8288 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -1,24 +1,18 @@ -{-# LANGUAGE BangPatterns #-} - --- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank - +{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} module Distribution.Server.Features.PackageList.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageList.MStats - -import Data.TarIndex ( TarEntryOffset ) import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) +import Distribution.Server.Features.PackageList.MStats import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import qualified Distribution.Server.Framework.BlobStorage as BlobStorage -import Distribution.Server.Framework.CacheControl import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types @@ -33,6 +27,9 @@ import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar +import Control.Exception ( SomeException(..) + , handle + ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy @@ -44,6 +41,9 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) +handleConst :: a -> IO a -> IO a +handleConst c = handle (\(_ :: SomeException) -> return c) + data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -117,17 +117,16 @@ cabalScore p docum = sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) -readmeScore - :: Maybe (FilePath, ETag, Data.TarIndex.TarEntryOffset, FilePath) - -> Bool - -> IO Scorer -readmeScore Nothing _ = return $ Scorer 1 0 -- readmeScore is scaled so it does not need correct max -readmeScore (Just (tarfile, _, offset, name)) app = do - entr <- loadTarEntry tarfile offset +readmeScore :: TarIndexCacheFeature -> PkgInfo -> Bool -> IO Scorer +readmeScore tarCache pkgI app = do + Just (tarfile, _, offset, name) <- readme + entr <- loadTarEntry tarfile offset case entr of (Right (size, str)) -> return $ calcScore str size name _ -> return $ Scorer 1 0 where + readme = findToplevelFile tarCache pkgI isReadmeFile + >>= either (\_ -> return Nothing) (return . Just) calcScore str size filename = scorer 75 (min 1 (fromInteger (toInteger size) / 3000)) <> if supposedToBeMarkdown filename @@ -162,13 +161,13 @@ baseScore baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do - readM <- readme - hasDocum <- documHas - documS <- documSize - srcL <- srcLines + hasDocum <- handleConst False documHas -- Probably redundant + documS <- handleConst 0 documSize + srcL <- handleConst 0 srcLines - versS <- versionScore versionList vers lastUploads pkg - readmeS <- readmeScore readM isApp + versS <- handleConst (Scorer 1 0) + (versionScore versionList vers lastUploads pkg) + readmeS <- handleConst (Scorer 1 0) (readmeScore tarCache pkgI isApp) return $ scale 5 versS <> scale 2 (codeScore documS srcL) @@ -192,9 +191,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do filterLines (isExtensionOf ".html") countSize . Tar.read <$> BSL.readFile pth - readme = findToplevelFile tarCache pkgI isReadmeFile - >>= either (\_ -> return Nothing) (return . Just) - filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns @@ -279,15 +275,16 @@ temporalScore p lastUploads versionList recentDownloads = do where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = fracScor 5 + calcDownScore i = fracScor + 5 ( (logBase 2 (int2Float $ max 0 (i - 32) + 32) - 5) / (if isApp then 6 else 8) ) packageFreshness = case safeHead lastUploads of Nothing -> return 0 - (Just l) -> freshness versionList l isApp + (Just l) -> freshness versionList l isApp -- Getting time hopefully does not throw Exc. freshnessScore = fracScor 10 <$> packageFreshness --- Missing dependencyFreshnessScore for reasonable effectivity needs caching + -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 200) @@ -315,7 +312,7 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pk versionList uploads pkgUsed - depr <- deprP + depr <- handleConst Nothing deprP return $ sAverage t b * case depr of Nothing -> 1 _ -> 0.2 From 32995c87c887ab2a832e2dac6bcbb44249cdbbc2 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 4 Sep 2022 13:32:29 +0200 Subject: [PATCH 59/62] some comments and refactoring --- .../Server/Features/PackageList/MStats.hs | 32 ++--- .../Features/PackageList/PackageRank.hs | 119 +++++++++--------- 2 files changed, 78 insertions(+), 73 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList/MStats.hs b/src/Distribution/Server/Features/PackageList/MStats.hs index 33934ebb..b9dc0493 100644 --- a/src/Distribution/Server/Features/PackageList/MStats.hs +++ b/src/Distribution/Server/Features/PackageList/MStats.hs @@ -9,24 +9,32 @@ module Distribution.Server.Features.PackageList.MStats , MStats(..) ) where - import Commonmark import Commonmark.Extensions import Control.Monad.Identity import qualified Data.ByteString.Lazy as BS ( ByteString - , toStrict - ) + , toStrict ) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding.Error as T ( lenientDecode ) +-- parses markdown into statistics needed for readmeScore parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] parseM md name = runIdentity (commonmarkWith (pipeTableSpec <> defaultSyntaxSpec) name txt) where txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md +data MarkdownStats = NotImportant MStats | + HCode MStats | + Code MStats | + Section MStats | + Table Int MStats | -- Int of rows + PText MStats | + List Int MStats -- Int of elements + deriving (Show) + data MStats = MStats Int Int --number of pictures, number of chars deriving Show @@ -42,14 +50,7 @@ instance HasAttributes MStats where instance Semigroup MStats where (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) -data MarkdownStats = NotImportant MStats | - HCode MStats | - Code MStats | - Section MStats | - Table Int MStats | -- Int of rows - PText MStats | - List Int MStats -- Int of elements - deriving (Show) +-- Getter functions getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getCode [] = (0, 0) @@ -67,10 +68,6 @@ getSections [] = 0 getSections (Section _ : xs) = 1 + getSections xs getSections (_ : xs) = getSections xs -(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) -(><) (a, b) (c, d) = (a + c, b + d) - - sumMStat :: [MarkdownStats] -> MStats sumMStat [] = mempty sumMStat (x : xs) = case x of @@ -88,6 +85,11 @@ getListsTables ((List a _) : ys) = a + getListsTables ys getListsTables ((Table a _) : ys) = a + getListsTables ys getListsTables (_ : ys) = getListsTables ys +-- helper +(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) +(><) (a, b) (c, d) = (a + c, b + d) + +-- INSTANCES instance Rangeable [MarkdownStats] where ranged = const id diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index c58f8288..d259cdc2 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -21,19 +21,16 @@ import Distribution.Server.Util.Markdown import Distribution.Server.Util.ServeTarball ( loadTarEntry ) import Distribution.Simple.Utils ( safeHead - , safeLast - ) + , safeLast ) import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import Control.Exception ( SomeException(..) - , handle - ) + , handle ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy - ) + , sortBy ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL @@ -41,9 +38,12 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) +-- HELPER FUNCTIONS + handleConst :: a -> IO a -> IO a handleConst c = handle (\(_ :: SomeException) -> return c) +-- Scorer stores rank information data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -70,6 +70,7 @@ total (Scorer a b) = b / a scale :: Float -> Scorer -> Scorer scale mx sc = fracScor mx (total sc) +-- calculates number of versions from version list major :: Num a => [a] -> a major (x : _) = x major _ = 0 @@ -86,6 +87,8 @@ numDays (Just first) (Just end) = (toRational CL.nominalDay) numDays _ _ = 0 +-- Score Calculations + freshness :: [Version] -> CL.UTCTime -> Bool -> IO Float freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = @@ -148,6 +151,58 @@ readmeScore tarCache pkgI app = do rows = getListsTables stats sections = getSections stats +authorScore :: Int -> PackageDescription -> Scorer +authorScore maintainers desc = + boolScor 1 (not $ S.null $ author desc) <> maintScore + where + maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) + +codeScore :: Float -> Float -> Scorer +codeScore documentS haskellL = + boolScor 1 (haskellL > 700) + <> boolScor 1 (haskellL < 80000) + <> fracScor 2 (min 1 (haskellL / 5000)) + <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) + +versionScore + :: [Version] + -> VersionsFeature + -> [CL.UTCTime] + -> PackageDescription + -> IO Scorer +versionScore versionList versions lastUploads desc = do + use <- intUsable + depre <- deprec + return $ calculateScore depre lastUploads use + where + pkgNm = pkgName $ package desc + partVers = + flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm + intUsable = do + (norm, _, unpref) <- partVers + return $ versionNumbers <$> norm ++ unpref + deprec = do + (_, deprecN, _) <- partVers + return deprecN + calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer + calculateScore depre lUps intUse = + boolScor 20 (length intUse > 1) + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) + <> scorer + 15 + (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) + intUse + ) + <> scorer + 20 + (int2Float $ 4 * length + (filter (\x -> major x > 0 && patches x > 0) intUse) + ) + <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) + <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + <> boolScor 5 (not $ null depre) + baseScore :: VersionsFeature -> Int @@ -213,58 +268,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do return $ BlobStorage.filepath (serverBlobStore env) <$> blob documHas = queryHasDocumentation docs pkgId -authorScore :: Int -> PackageDescription -> Scorer -authorScore maintainers desc = - boolScor 1 (not $ S.null $ author desc) <> maintScore - where - maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) - -codeScore :: Float -> Float -> Scorer -codeScore documentS haskellL = - boolScor 1 (haskellL > 700) - <> boolScor 1 (haskellL < 80000) - <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) - -versionScore - :: [Version] - -> VersionsFeature - -> [CL.UTCTime] - -> PackageDescription - -> IO Scorer -versionScore versionList versions lastUploads desc = do - use <- intUsable - depre <- deprec - return $ calculateScore depre lastUploads use - where - pkgNm = pkgName $ package desc - partVers = - flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm - intUsable = do - (norm, _, unpref) <- partVers - return $ versionNumbers <$> norm ++ unpref - deprec = do - (_, deprecN, _) <- partVers - return deprecN - calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer - calculateScore depre lUps intUse = - boolScor 20 (length intUse > 1) - <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) - <> scorer - 15 - (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) - intUse - ) - <> scorer - 20 - (int2Float $ 4 * length - (filter (\x -> major x > 0 && patches x > 0) intUse) - ) - <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) - <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) - <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - <> boolScor 5 (not $ null depre) - temporalScore :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> IO Scorer temporalScore p lastUploads versionList recentDownloads = do From 39b28de4f746baa32774fc9804fca0c837390dc0 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 2 Oct 2022 21:46:33 +0200 Subject: [PATCH 60/62] test commit --- src/Distribution/Server/Features/PackageList/PackageRank.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index d259cdc2..655ef4cd 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -71,6 +71,7 @@ scale :: Float -> Scorer -> Scorer scale mx sc = fracScor mx (total sc) -- calculates number of versions from version list + major :: Num a => [a] -> a major (x : _) = x major _ = 0 From 5d354dadc8650ac1d72a1d58a69bf07caa6c198a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Kub=C3=A1nek?= Date: Sun, 21 Sep 2025 11:47:48 +0200 Subject: [PATCH 61/62] fixed dumb errors after rebase, changed memsize of packageitem to 12 --- src/Distribution/Server/Features/PackageList.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 8d3706dc..c3351aa1 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -95,13 +95,13 @@ data PackageItem = PackageItem { -- Hotness = recent downloads + stars + 2 * no rev deps itemHotness :: !Float, -- Reference version (non-deprecated highest numbered version) - itemReferenceVersion :: !String + itemReferenceVersion :: !String, -- heuristic way to sort packages itemPackageRank :: !Float } instance MemSize PackageItem where - memSize (PackageItem a b c d e f g h i j k l _m n o) = memSize11 a b c d e f g h i j (k, l, n, o) + memSize (PackageItem a b c d e f g h i j k l _m n o r) = memSize12 a b c d e f g h i j (k, l, n, o) r emptyPackageItem :: PackageName -> PackageItem @@ -121,7 +121,8 @@ emptyPackageItem pkg = itemNumBenchmarks = 0, itemLastUpload = UTCTime (toEnum 0) 0, itemHotness = 0, - itemReferenceVersion = "" + itemReferenceVersion = "", + itemPackageRank = 0 } initListFeature :: ServerEnv From fc4c52742eee5edc80cdb79647e0f20755ffc28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Kub=C3=A1nek?= Date: Mon, 22 Sep 2025 16:33:03 +0200 Subject: [PATCH 62/62] added reverse Dependencies to PackageRank and tried to scale it for Hackage --- .../Server/Features/PackageList.hs | 8 +++---- .../Features/PackageList/PackageRank.hs | 22 +++++++++++-------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index c3351aa1..d2ede35f 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -230,7 +230,7 @@ listFeature :: CoreFeature PackageName -> IO ()) listFeature CoreFeature{..} - ReverseFeature{revDirectCount} + ReverseFeature{revDirectCount, revPackageStats} DownloadFeature{..} VotesFeature{..} TagsFeature{..} @@ -299,7 +299,7 @@ listFeature CoreFeature{..} desc = pkgDesc pkg pkg = last pkgs -- [reverse index disabled] revCount <- query . GetReverseCount $ pkgname - intRevDirectCount <- revDirectCount pkgname + revCount@(ReverseCount intRevDirectCount _) <- revPackageStats pkgname users <- queryGetUserDb tags <- queryTagsForPackage pkgname downs <- recentPackageDownloads @@ -307,8 +307,8 @@ listFeature CoreFeature{..} deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) prefsinfo <- queryGetPreferredInfo pkgname - packageR <- rankPackage versions (cmFind pkgname downs) - (UserIdSet.size maintainers) documentation tar env pkgs (safeLast pkgs) + packageR <- rankPackage versions (cmFind pkgname downs) (UserIdSet.size maintainers) + documentation tar env pkgs (safeLast pkgs) revCount return $ (,) pkgname . updateReferenceVersion prefsinfo [pkgVersion (pkgInfoId pkg)] $ (updateDescriptionItem desc $ emptyPackageItem pkgname) { itemTags = tags diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index 655ef4cd..b105e247 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -8,6 +8,7 @@ import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) import Distribution.Server.Features.PackageList.MStats +import Distribution.Server.Features.ReverseDependencies (ReverseCount(..)) import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache @@ -270,19 +271,21 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do documHas = queryHasDocumentation docs pkgId temporalScore - :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> IO Scorer -temporalScore p lastUploads versionList recentDownloads = do + :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> ReverseCount -> IO Scorer +temporalScore p lastUploads versionList recentDownloads (ReverseCount dir tot) = do fresh <- freshnessScore tract <- tractionScore - -- Reverse dependencies are added - return $ tract <> fresh <> downloadScore + return $ tract <> fresh <> downloadScore <> (if isApp then scorer 0 2 else dirScore <> indirScore) where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads + dirScore = fracScor 5 (logBase 2 (int2Float $ max 0 (dir - 32) + 32) - 5) + indirScore = fracScor 2 (logBase 2 (int2Float $ max 0 (tot - dir - 32) + 32) + - 5 / 3) calcDownScore i = fracScor 5 - ( (logBase 2 (int2Float $ max 0 (i - 32) + 32) - 5) - / (if isApp then 6 else 8) + ( logBase 2 (int2Float $ max 0 (i - 16) + 16) + - (if isApp then 3 else 4) ) packageFreshness = case safeHead lastUploads of Nothing -> return 0 @@ -302,11 +305,12 @@ rankPackage -> ServerEnv -> [PkgInfo] -> Maybe PkgInfo + -> ReverseCount -> IO Float -rankPackage _ _ _ _ _ _ _ Nothing = return 0 -rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pkgUsed) +rankPackage _ _ _ _ _ _ _ Nothing _ = return 0 +rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pkgUsed) revCount = do - t <- temporalScore pkgD uploads versionList recentDownloads + t <- temporalScore pkgD uploads versionList recentDownloads revCount b <- baseScore versions maintainers