diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..f3f4b41 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,39 @@ +name: tests + +on: + push: + pull_request: + +jobs: + run: + runs-on: ${{ matrix.operating-system }} + strategy: + matrix: + operating-system: [ubuntu-latest] + php-versions: ['7.4'] + name: PHP ${{ matrix.php-versions }} Test on ${{ matrix.operating-system }} + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + coverage: xdebug + + - name: Validate composer files + run: composer validate + + - name: Install dependencies + if: steps.composer-cache.outputs.cache-hit != 'true' + run: composer install --prefer-dist --no-progress --no-suggest + + - name: Run test suite + run: composer test + + - name: Publish Analysis + uses: codecov/codecov-action@v1.0.13 + with: + name: Code Analysis + directory: ./var/code-coverage/clover/coverage.xml diff --git a/.gitignore b/.gitignore index ab27d1e..fcab08f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ /.idea -/vendor \ No newline at end of file +/.tmp +/tests/.phpunit.result.cache +/composer.lock +/vendor +/var diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0895ccb..0000000 --- a/.travis.yml +++ /dev/null @@ -1,11 +0,0 @@ -language: php -php: - - '7.1' - - '7.2' - -dist: trusty -sudo: required -group: edge - -before_script: - - composer install \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..29afed1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 PHP-Science + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/composer.json b/composer.json index 4d07d2e..3dc3015 100644 --- a/composer.json +++ b/composer.json @@ -10,19 +10,24 @@ } ], "require": { - "php": "7.*", + "php": "7.4.*", "ext-ctype": "*", - "ext-mbstring": "*" + "ext-mbstring": "*", + "php-science/pagerank": "1.*" }, "require-dev": { - "phpunit/phpunit": "^5.4" + "phpunit/phpunit": "^9" }, "autoload": { "psr-4": { - "PhpScience\\TextRank\\": ["src/", "tests/"] + "PhpScience\\TextRank\\": [ + "src/", + "tests/unit/", + "tests/functional/" + ] } }, "scripts": { - "test": "phpunit --colors='always' $(pwd)/tests" + "test": "vendor/bin/phpunit -c $(pwd)/tests/phpunit.xml --colors='always' --do-not-cache-result" } } diff --git a/composer.lock b/composer.lock deleted file mode 100644 index 65ae386..0000000 --- a/composer.lock +++ /dev/null @@ -1,1409 +0,0 @@ -{ - "_readme": [ - "This file locks the dependencies of your project to a known state", - "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", - "This file is @generated automatically" - ], - "content-hash": "97ff0c108502ab602ba79bf3a251e208", - "packages": [], - "packages-dev": [ - { - "name": "doctrine/instantiator", - "version": "1.1.0", - "source": { - "type": "git", - "url": "https://github.com/doctrine/instantiator.git", - "reference": "185b8868aa9bf7159f5f953ed5afb2d7fcdc3bda" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/doctrine/instantiator/zipball/185b8868aa9bf7159f5f953ed5afb2d7fcdc3bda", - "reference": "185b8868aa9bf7159f5f953ed5afb2d7fcdc3bda", - "shasum": "" - }, - "require": { - "php": "^7.1" - }, - "require-dev": { - "athletic/athletic": "~0.1.8", - "ext-pdo": "*", - "ext-phar": "*", - "phpunit/phpunit": "^6.2.3", - "squizlabs/php_codesniffer": "^3.0.2" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.2.x-dev" - } - }, - "autoload": { - "psr-4": { - "Doctrine\\Instantiator\\": "src/Doctrine/Instantiator/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Marco Pivetta", - "email": "ocramius@gmail.com", - "homepage": "http://ocramius.github.com/" - } - ], - "description": "A small, lightweight utility to instantiate objects in PHP without invoking their constructors", - "homepage": "https://github.com/doctrine/instantiator", - "keywords": [ - "constructor", - "instantiate" - ], - "time": "2017-07-22T11:58:36+00:00" - }, - { - "name": "myclabs/deep-copy", - "version": "1.8.1", - "source": { - "type": "git", - "url": "https://github.com/myclabs/DeepCopy.git", - "reference": "3e01bdad3e18354c3dce54466b7fbe33a9f9f7f8" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/3e01bdad3e18354c3dce54466b7fbe33a9f9f7f8", - "reference": "3e01bdad3e18354c3dce54466b7fbe33a9f9f7f8", - "shasum": "" - }, - "require": { - "php": "^7.1" - }, - "replace": { - "myclabs/deep-copy": "self.version" - }, - "require-dev": { - "doctrine/collections": "^1.0", - "doctrine/common": "^2.6", - "phpunit/phpunit": "^7.1" - }, - "type": "library", - "autoload": { - "psr-4": { - "DeepCopy\\": "src/DeepCopy/" - }, - "files": [ - "src/DeepCopy/deep_copy.php" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "description": "Create deep copies (clones) of your objects", - "keywords": [ - "clone", - "copy", - "duplicate", - "object", - "object graph" - ], - "time": "2018-06-11T23:09:50+00:00" - }, - { - "name": "phpdocumentor/reflection-common", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/ReflectionCommon.git", - "reference": "21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionCommon/zipball/21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6", - "reference": "21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6", - "shasum": "" - }, - "require": { - "php": ">=5.5" - }, - "require-dev": { - "phpunit/phpunit": "^4.6" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": [ - "src" - ] - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Jaap van Otterdijk", - "email": "opensource@ijaap.nl" - } - ], - "description": "Common reflection classes used by phpdocumentor to reflect the code structure", - "homepage": "http://www.phpdoc.org", - "keywords": [ - "FQSEN", - "phpDocumentor", - "phpdoc", - "reflection", - "static analysis" - ], - "time": "2017-09-11T18:02:19+00:00" - }, - { - "name": "phpdocumentor/reflection-docblock", - "version": "4.3.0", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/ReflectionDocBlock.git", - "reference": "94fd0001232e47129dd3504189fa1c7225010d08" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/94fd0001232e47129dd3504189fa1c7225010d08", - "reference": "94fd0001232e47129dd3504189fa1c7225010d08", - "shasum": "" - }, - "require": { - "php": "^7.0", - "phpdocumentor/reflection-common": "^1.0.0", - "phpdocumentor/type-resolver": "^0.4.0", - "webmozart/assert": "^1.0" - }, - "require-dev": { - "doctrine/instantiator": "~1.0.5", - "mockery/mockery": "^1.0", - "phpunit/phpunit": "^6.4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "4.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": [ - "src/" - ] - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Mike van Riel", - "email": "me@mikevanriel.com" - } - ], - "description": "With this component, a library can provide support for annotations via DocBlocks or otherwise retrieve information that is embedded in a DocBlock.", - "time": "2017-11-30T07:14:17+00:00" - }, - { - "name": "phpdocumentor/type-resolver", - "version": "0.4.0", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/TypeResolver.git", - "reference": "9c977708995954784726e25d0cd1dddf4e65b0f7" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/TypeResolver/zipball/9c977708995954784726e25d0cd1dddf4e65b0f7", - "reference": "9c977708995954784726e25d0cd1dddf4e65b0f7", - "shasum": "" - }, - "require": { - "php": "^5.5 || ^7.0", - "phpdocumentor/reflection-common": "^1.0" - }, - "require-dev": { - "mockery/mockery": "^0.9.4", - "phpunit/phpunit": "^5.2||^4.8.24" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": [ - "src/" - ] - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Mike van Riel", - "email": "me@mikevanriel.com" - } - ], - "time": "2017-07-14T14:27:02+00:00" - }, - { - "name": "phpspec/prophecy", - "version": "1.8.0", - "source": { - "type": "git", - "url": "https://github.com/phpspec/prophecy.git", - "reference": "4ba436b55987b4bf311cb7c6ba82aa528aac0a06" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpspec/prophecy/zipball/4ba436b55987b4bf311cb7c6ba82aa528aac0a06", - "reference": "4ba436b55987b4bf311cb7c6ba82aa528aac0a06", - "shasum": "" - }, - "require": { - "doctrine/instantiator": "^1.0.2", - "php": "^5.3|^7.0", - "phpdocumentor/reflection-docblock": "^2.0|^3.0.2|^4.0", - "sebastian/comparator": "^1.1|^2.0|^3.0", - "sebastian/recursion-context": "^1.0|^2.0|^3.0" - }, - "require-dev": { - "phpspec/phpspec": "^2.5|^3.2", - "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.5 || ^7.1" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.8.x-dev" - } - }, - "autoload": { - "psr-0": { - "Prophecy\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Konstantin Kudryashov", - "email": "ever.zet@gmail.com", - "homepage": "http://everzet.com" - }, - { - "name": "Marcello Duarte", - "email": "marcello.duarte@gmail.com" - } - ], - "description": "Highly opinionated mocking framework for PHP 5.3+", - "homepage": "https://github.com/phpspec/prophecy", - "keywords": [ - "Double", - "Dummy", - "fake", - "mock", - "spy", - "stub" - ], - "time": "2018-08-05T17:53:17+00:00" - }, - { - "name": "phpunit/php-code-coverage", - "version": "4.0.8", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-code-coverage.git", - "reference": "ef7b2f56815df854e66ceaee8ebe9393ae36a40d" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/ef7b2f56815df854e66ceaee8ebe9393ae36a40d", - "reference": "ef7b2f56815df854e66ceaee8ebe9393ae36a40d", - "shasum": "" - }, - "require": { - "ext-dom": "*", - "ext-xmlwriter": "*", - "php": "^5.6 || ^7.0", - "phpunit/php-file-iterator": "^1.3", - "phpunit/php-text-template": "^1.2", - "phpunit/php-token-stream": "^1.4.2 || ^2.0", - "sebastian/code-unit-reverse-lookup": "^1.0", - "sebastian/environment": "^1.3.2 || ^2.0", - "sebastian/version": "^1.0 || ^2.0" - }, - "require-dev": { - "ext-xdebug": "^2.1.4", - "phpunit/phpunit": "^5.7" - }, - "suggest": { - "ext-xdebug": "^2.5.1" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "4.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", - "role": "lead" - } - ], - "description": "Library that provides collection, processing, and rendering functionality for PHP code coverage information.", - "homepage": "https://github.com/sebastianbergmann/php-code-coverage", - "keywords": [ - "coverage", - "testing", - "xunit" - ], - "time": "2017-04-02T07:44:40+00:00" - }, - { - "name": "phpunit/php-file-iterator", - "version": "1.4.5", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-file-iterator.git", - "reference": "730b01bc3e867237eaac355e06a36b85dd93a8b4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-file-iterator/zipball/730b01bc3e867237eaac355e06a36b85dd93a8b4", - "reference": "730b01bc3e867237eaac355e06a36b85dd93a8b4", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.4.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", - "role": "lead" - } - ], - "description": "FilterIterator implementation that filters files based on a list of suffixes.", - "homepage": "https://github.com/sebastianbergmann/php-file-iterator/", - "keywords": [ - "filesystem", - "iterator" - ], - "time": "2017-11-27T13:52:08+00:00" - }, - { - "name": "phpunit/php-text-template", - "version": "1.2.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-text-template.git", - "reference": "31f8b717e51d9a2afca6c9f046f5d69fc27c8686" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-text-template/zipball/31f8b717e51d9a2afca6c9f046f5d69fc27c8686", - "reference": "31f8b717e51d9a2afca6c9f046f5d69fc27c8686", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "type": "library", - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "Simple template engine.", - "homepage": "https://github.com/sebastianbergmann/php-text-template/", - "keywords": [ - "template" - ], - "time": "2015-06-21T13:50:34+00:00" - }, - { - "name": "phpunit/php-timer", - "version": "1.0.9", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-timer.git", - "reference": "3dcf38ca72b158baf0bc245e9184d3fdffa9c46f" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-timer/zipball/3dcf38ca72b158baf0bc245e9184d3fdffa9c46f", - "reference": "3dcf38ca72b158baf0bc245e9184d3fdffa9c46f", - "shasum": "" - }, - "require": { - "php": "^5.3.3 || ^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", - "role": "lead" - } - ], - "description": "Utility class for timing", - "homepage": "https://github.com/sebastianbergmann/php-timer/", - "keywords": [ - "timer" - ], - "time": "2017-02-26T11:10:40+00:00" - }, - { - "name": "phpunit/php-token-stream", - "version": "2.0.2", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-token-stream.git", - "reference": "791198a2c6254db10131eecfe8c06670700904db" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-token-stream/zipball/791198a2c6254db10131eecfe8c06670700904db", - "reference": "791198a2c6254db10131eecfe8c06670700904db", - "shasum": "" - }, - "require": { - "ext-tokenizer": "*", - "php": "^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^6.2.4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Wrapper around PHP's tokenizer extension.", - "homepage": "https://github.com/sebastianbergmann/php-token-stream/", - "keywords": [ - "tokenizer" - ], - "time": "2017-11-27T05:48:46+00:00" - }, - { - "name": "phpunit/phpunit", - "version": "5.7.27", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "b7803aeca3ccb99ad0a506fa80b64cd6a56bbc0c" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/b7803aeca3ccb99ad0a506fa80b64cd6a56bbc0c", - "reference": "b7803aeca3ccb99ad0a506fa80b64cd6a56bbc0c", - "shasum": "" - }, - "require": { - "ext-dom": "*", - "ext-json": "*", - "ext-libxml": "*", - "ext-mbstring": "*", - "ext-xml": "*", - "myclabs/deep-copy": "~1.3", - "php": "^5.6 || ^7.0", - "phpspec/prophecy": "^1.6.2", - "phpunit/php-code-coverage": "^4.0.4", - "phpunit/php-file-iterator": "~1.4", - "phpunit/php-text-template": "~1.2", - "phpunit/php-timer": "^1.0.6", - "phpunit/phpunit-mock-objects": "^3.2", - "sebastian/comparator": "^1.2.4", - "sebastian/diff": "^1.4.3", - "sebastian/environment": "^1.3.4 || ^2.0", - "sebastian/exporter": "~2.0", - "sebastian/global-state": "^1.1", - "sebastian/object-enumerator": "~2.0", - "sebastian/resource-operations": "~1.0", - "sebastian/version": "^1.0.6|^2.0.1", - "symfony/yaml": "~2.1|~3.0|~4.0" - }, - "conflict": { - "phpdocumentor/reflection-docblock": "3.0.2" - }, - "require-dev": { - "ext-pdo": "*" - }, - "suggest": { - "ext-xdebug": "*", - "phpunit/php-invoker": "~1.1" - }, - "bin": [ - "phpunit" - ], - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "5.7.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "The PHP Unit Testing framework.", - "homepage": "https://phpunit.de/", - "keywords": [ - "phpunit", - "testing", - "xunit" - ], - "time": "2018-02-01T05:50:59+00:00" - }, - { - "name": "phpunit/phpunit-mock-objects", - "version": "3.4.4", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git", - "reference": "a23b761686d50a560cc56233b9ecf49597cc9118" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/a23b761686d50a560cc56233b9ecf49597cc9118", - "reference": "a23b761686d50a560cc56233b9ecf49597cc9118", - "shasum": "" - }, - "require": { - "doctrine/instantiator": "^1.0.2", - "php": "^5.6 || ^7.0", - "phpunit/php-text-template": "^1.2", - "sebastian/exporter": "^1.2 || ^2.0" - }, - "conflict": { - "phpunit/phpunit": "<5.4.0" - }, - "require-dev": { - "phpunit/phpunit": "^5.4" - }, - "suggest": { - "ext-soap": "*" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.2.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", - "role": "lead" - } - ], - "description": "Mock Object library for PHPUnit", - "homepage": "https://github.com/sebastianbergmann/phpunit-mock-objects/", - "keywords": [ - "mock", - "xunit" - ], - "time": "2017-06-30T09:13:00+00:00" - }, - { - "name": "sebastian/code-unit-reverse-lookup", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git", - "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/4419fcdb5eabb9caa61a27c7a1db532a6b55dd18", - "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18", - "shasum": "" - }, - "require": { - "php": "^5.6 || ^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^5.7 || ^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Looks up which function or method a line of code belongs to", - "homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/", - "time": "2017-03-04T06:30:41+00:00" - }, - { - "name": "sebastian/comparator", - "version": "1.2.4", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/comparator.git", - "reference": "2b7424b55f5047b47ac6e5ccb20b2aea4011d9be" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/2b7424b55f5047b47ac6e5ccb20b2aea4011d9be", - "reference": "2b7424b55f5047b47ac6e5ccb20b2aea4011d9be", - "shasum": "" - }, - "require": { - "php": ">=5.3.3", - "sebastian/diff": "~1.2", - "sebastian/exporter": "~1.2 || ~2.0" - }, - "require-dev": { - "phpunit/phpunit": "~4.4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.2.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Jeff Welch", - "email": "whatthejeff@gmail.com" - }, - { - "name": "Volker Dusch", - "email": "github@wallbash.com" - }, - { - "name": "Bernhard Schussek", - "email": "bschussek@2bepublished.at" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Provides the functionality to compare PHP values for equality", - "homepage": "http://www.github.com/sebastianbergmann/comparator", - "keywords": [ - "comparator", - "compare", - "equality" - ], - "time": "2017-01-29T09:50:25+00:00" - }, - { - "name": "sebastian/diff", - "version": "1.4.3", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/diff.git", - "reference": "7f066a26a962dbe58ddea9f72a4e82874a3975a4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/7f066a26a962dbe58ddea9f72a4e82874a3975a4", - "reference": "7f066a26a962dbe58ddea9f72a4e82874a3975a4", - "shasum": "" - }, - "require": { - "php": "^5.3.3 || ^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.4-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Kore Nordmann", - "email": "mail@kore-nordmann.de" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Diff implementation", - "homepage": "https://github.com/sebastianbergmann/diff", - "keywords": [ - "diff" - ], - "time": "2017-05-22T07:24:03+00:00" - }, - { - "name": "sebastian/environment", - "version": "2.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/environment.git", - "reference": "5795ffe5dc5b02460c3e34222fee8cbe245d8fac" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/5795ffe5dc5b02460c3e34222fee8cbe245d8fac", - "reference": "5795ffe5dc5b02460c3e34222fee8cbe245d8fac", - "shasum": "" - }, - "require": { - "php": "^5.6 || ^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^5.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Provides functionality to handle HHVM/PHP environments", - "homepage": "http://www.github.com/sebastianbergmann/environment", - "keywords": [ - "Xdebug", - "environment", - "hhvm" - ], - "time": "2016-11-26T07:53:53+00:00" - }, - { - "name": "sebastian/exporter", - "version": "2.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/exporter.git", - "reference": "ce474bdd1a34744d7ac5d6aad3a46d48d9bac4c4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/ce474bdd1a34744d7ac5d6aad3a46d48d9bac4c4", - "reference": "ce474bdd1a34744d7ac5d6aad3a46d48d9bac4c4", - "shasum": "" - }, - "require": { - "php": ">=5.3.3", - "sebastian/recursion-context": "~2.0" - }, - "require-dev": { - "ext-mbstring": "*", - "phpunit/phpunit": "~4.4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Jeff Welch", - "email": "whatthejeff@gmail.com" - }, - { - "name": "Volker Dusch", - "email": "github@wallbash.com" - }, - { - "name": "Bernhard Schussek", - "email": "bschussek@2bepublished.at" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - }, - { - "name": "Adam Harvey", - "email": "aharvey@php.net" - } - ], - "description": "Provides the functionality to export PHP variables for visualization", - "homepage": "http://www.github.com/sebastianbergmann/exporter", - "keywords": [ - "export", - "exporter" - ], - "time": "2016-11-19T08:54:04+00:00" - }, - { - "name": "sebastian/global-state", - "version": "1.1.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/global-state.git", - "reference": "bc37d50fea7d017d3d340f230811c9f1d7280af4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/global-state/zipball/bc37d50fea7d017d3d340f230811c9f1d7280af4", - "reference": "bc37d50fea7d017d3d340f230811c9f1d7280af4", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "require-dev": { - "phpunit/phpunit": "~4.2" - }, - "suggest": { - "ext-uopz": "*" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Snapshotting of global state", - "homepage": "http://www.github.com/sebastianbergmann/global-state", - "keywords": [ - "global state" - ], - "time": "2015-10-12T03:26:01+00:00" - }, - { - "name": "sebastian/object-enumerator", - "version": "2.0.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/object-enumerator.git", - "reference": "1311872ac850040a79c3c058bea3e22d0f09cbb7" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/1311872ac850040a79c3c058bea3e22d0f09cbb7", - "reference": "1311872ac850040a79c3c058bea3e22d0f09cbb7", - "shasum": "" - }, - "require": { - "php": ">=5.6", - "sebastian/recursion-context": "~2.0" - }, - "require-dev": { - "phpunit/phpunit": "~5" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Traverses array structures and object graphs to enumerate all referenced objects", - "homepage": "https://github.com/sebastianbergmann/object-enumerator/", - "time": "2017-02-18T15:18:39+00:00" - }, - { - "name": "sebastian/recursion-context", - "version": "2.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/recursion-context.git", - "reference": "2c3ba150cbec723aa057506e73a8d33bdb286c9a" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/2c3ba150cbec723aa057506e73a8d33bdb286c9a", - "reference": "2c3ba150cbec723aa057506e73a8d33bdb286c9a", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "require-dev": { - "phpunit/phpunit": "~4.4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Jeff Welch", - "email": "whatthejeff@gmail.com" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - }, - { - "name": "Adam Harvey", - "email": "aharvey@php.net" - } - ], - "description": "Provides functionality to recursively process PHP variables", - "homepage": "http://www.github.com/sebastianbergmann/recursion-context", - "time": "2016-11-19T07:33:16+00:00" - }, - { - "name": "sebastian/resource-operations", - "version": "1.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/resource-operations.git", - "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/resource-operations/zipball/ce990bb21759f94aeafd30209e8cfcdfa8bc3f52", - "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52", - "shasum": "" - }, - "require": { - "php": ">=5.6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Provides a list of PHP built-in functions that operate on resources", - "homepage": "https://www.github.com/sebastianbergmann/resource-operations", - "time": "2015-07-28T20:34:47+00:00" - }, - { - "name": "sebastian/version", - "version": "2.0.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/version.git", - "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/version/zipball/99732be0ddb3361e16ad77b68ba41efc8e979019", - "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019", - "shasum": "" - }, - "require": { - "php": ">=5.6" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "Library that helps with managing the version number of Git-hosted PHP projects", - "homepage": "https://github.com/sebastianbergmann/version", - "time": "2016-10-03T07:35:21+00:00" - }, - { - "name": "symfony/polyfill-ctype", - "version": "v1.9.0", - "source": { - "type": "git", - "url": "https://github.com/symfony/polyfill-ctype.git", - "reference": "e3d826245268269cd66f8326bd8bc066687b4a19" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/e3d826245268269cd66f8326bd8bc066687b4a19", - "reference": "e3d826245268269cd66f8326bd8bc066687b4a19", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "suggest": { - "ext-ctype": "For best performance" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.9-dev" - } - }, - "autoload": { - "psr-4": { - "Symfony\\Polyfill\\Ctype\\": "" - }, - "files": [ - "bootstrap.php" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Symfony Community", - "homepage": "https://symfony.com/contributors" - }, - { - "name": "Gert de Pagter", - "email": "BackEndTea@gmail.com" - } - ], - "description": "Symfony polyfill for ctype functions", - "homepage": "https://symfony.com", - "keywords": [ - "compatibility", - "ctype", - "polyfill", - "portable" - ], - "time": "2018-08-06T14:22:27+00:00" - }, - { - "name": "symfony/yaml", - "version": "v4.1.3", - "source": { - "type": "git", - "url": "https://github.com/symfony/yaml.git", - "reference": "46bc69aa91fc4ab78a96ce67873a6b0c148fd48c" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/symfony/yaml/zipball/46bc69aa91fc4ab78a96ce67873a6b0c148fd48c", - "reference": "46bc69aa91fc4ab78a96ce67873a6b0c148fd48c", - "shasum": "" - }, - "require": { - "php": "^7.1.3", - "symfony/polyfill-ctype": "~1.8" - }, - "conflict": { - "symfony/console": "<3.4" - }, - "require-dev": { - "symfony/console": "~3.4|~4.0" - }, - "suggest": { - "symfony/console": "For validating YAML files using the lint command" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "4.1-dev" - } - }, - "autoload": { - "psr-4": { - "Symfony\\Component\\Yaml\\": "" - }, - "exclude-from-classmap": [ - "/Tests/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Fabien Potencier", - "email": "fabien@symfony.com" - }, - { - "name": "Symfony Community", - "homepage": "https://symfony.com/contributors" - } - ], - "description": "Symfony Yaml Component", - "homepage": "https://symfony.com", - "time": "2018-07-26T11:24:31+00:00" - }, - { - "name": "webmozart/assert", - "version": "1.3.0", - "source": { - "type": "git", - "url": "https://github.com/webmozart/assert.git", - "reference": "0df1908962e7a3071564e857d86874dad1ef204a" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/webmozart/assert/zipball/0df1908962e7a3071564e857d86874dad1ef204a", - "reference": "0df1908962e7a3071564e857d86874dad1ef204a", - "shasum": "" - }, - "require": { - "php": "^5.3.3 || ^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^4.6", - "sebastian/version": "^1.0.1" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.3-dev" - } - }, - "autoload": { - "psr-4": { - "Webmozart\\Assert\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Bernhard Schussek", - "email": "bschussek@gmail.com" - } - ], - "description": "Assertions to validate method input/output with nice error messages.", - "keywords": [ - "assert", - "check", - "validate" - ], - "time": "2018-01-29T19:49:41+00:00" - } - ], - "aliases": [], - "minimum-stability": "stable", - "stability-flags": [], - "prefer-stable": false, - "prefer-lowest": false, - "platform": { - "php": "7.*", - "ext-ctype": "*", - "ext-mbstring": "*" - }, - "platform-dev": [] -} diff --git a/phpunit.xml b/phpunit.xml deleted file mode 100644 index 8c26e40..0000000 --- a/phpunit.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - ./tests/ - - - - - - src - - - \ No newline at end of file diff --git a/readme.md b/readme.md index fa8fa15..2206659 100644 --- a/readme.md +++ b/readme.md @@ -1,73 +1,32 @@ -# PHP.Science - TextRank - -[![Build Status](https://travis-ci.org/DavidBelicza/PHP-Science-TextRank.svg?branch=master)](https://travis-ci.org/DavidBelicza/PHP-Science-TextRank) -[![Latest Stable Version](https://poser.pugx.org/php-science/textrank/v/stable.svg)](https://packagist.org/packages/php-science/textrank) -[![License](https://img.shields.io/badge/license-MIT-33CCFF.svg)](https://opensource.org/licenses/MIT) -[![composer.lock](https://poser.pugx.org/php-science/textrank/composerlock)](https://packagist.org/packages/php-science/textrank) - -This source code is an implementation of the TextRank algorithm (Automatic summarization) on PHP7 strict mode. It can summarize a text, article for example to a short paragraph. Before it would start the summarizing it removes the junk words what are defined in the Stopwords namespace. It is possible to extend it with another languages. - - - -## Authors, Contributors - -Name | GitHub user ---- | --- -David Belicza | @DavidBelicza -Riccardo Marton | @riccardomarton -Syndesi | @Syndesi -vincentsch | @vincentsch -Andrew Welch | @khalwat -Andrey Astashov | @mvcaaa -Leo Toneff | @bragle - -## TextRank or Automatic summarization -> Automatic summarization is the process of reducing a text document with a computer program in order to create a summary that retains the most important points of the original document. Technologies that can make a coherent summary take into account variables such as length, writing style and syntax. Automatic data summarization is part of machine learning and data mining. The main idea of summarization is to find a representative subset of the data, which contains the information of the entire set. Summarization technologies are used in a large number of sectors in industry today. - Wikipedia - -The algorithm of this implementation is: -* Find sentences, -* Remove stopwords, -* Create integer values by find and count the matching words, -* Change the integer values by the related words' integer values, -* Normalize values to create scores, -* Order by scores - -## Install -``` -composer require php-science/textrank -``` - -## Test -``` -cd project-folder -composer test -``` -or -``` -cd project-folder -phpunit --colors='always' $(pwd)/tests -``` - -## Examples -```php - -use PhpScience\TextRank\Tool\StopWords\English; - -// String contains a long text, see the /res/sample1.txt file. -$text = "Lorem ipsum..."; - -$api = new TextRankFacade(); -// English implementation for stopwords/junk words: -$stopWords = new English(); -$api->setStopWords($stopWords); - -// Array of the most important keywords: -$result = $api->getOnlyKeyWords($text); - -// Array of the sentences from the most important part of the text: -$result = $api->getHighlights($text); - -// Array of the most important sentences from the text: -$result = $api->summarizeTextBasic($text); -``` -More examples: https://github.com/DoveID/PHP-Science-TextRank/blob/master/tests/TextRankFacadeTest.php +

+PHP.Science TextRank +

+ +

+ + + + + + + + + + + + + + + +

+ +

+This source code is an OOP implementation of the TextRank algorithm. +
The minimum required PHP version is 7.4. +
+
+

+ +## About + +v 2.0.0 WIP diff --git a/resource/stop-word/english.csv b/resource/stop-word/english.csv new file mode 100644 index 0000000..a338b15 --- /dev/null +++ b/resource/stop-word/english.csv @@ -0,0 +1,318 @@ +a +about +above +above +across +after +afterwards +again +against +all +almost +alone +along +already +also +although +always +am +among +amongst +amoungst +amount +an +and +another +any +anyhow +anyone +anything +anyway +anywhere +are +around +as +at +back +be +became +because +become +becomes +becoming +been +before +beforehand +behind +being +below +beside +besides +between +beyond +bill +both +bottom +but +by +call +can +cannot +cant +co +con +could +couldnt +cry +de +describe +detail +do +done +down +due +during +each +eg +eight +either +eleven +else +elsewhere +empty +enough +etc +even +ever +every +everyone +everything +everywhere +except +few +fifteen +fify +fill +find +fire +first +five +for +former +formerly +forty +found +four +from +front +full +further +get +give +go +had +has +hasnt +have +he +hence +her +here +hereafter +hereby +herein +hereupon +hers +herself +him +himself +his +how +however +hundred +ie +if +in +inc +indeed +interest +into +is +it +its +itself +keep +last +latter +latterly +least +less +ltd +made +many +may +me +meanwhile +might +mill +mine +more +moreover +most +mostly +move +much +must +my +myself +name +namely +neither +never +nevertheless +next +nine +no +nobody +none +noone +nor +not +nothing +now +nowhere +of +off +often +on +once +one +only +onto +or +other +others +otherwise +our +ours +ourselves +out +over +own +part +per +perhaps +please +put +rather +re +same +see +seem +seemed +seeming +seems +serious +several +she +should +show +side +since +sincere +six +sixty +so +some +somehow +someone +something +sometime +sometimes +somewhere +still +such +system +take +ten +than +that +the +their +them +themselves +then +thence +there +thereafter +thereby +therefore +therein +thereupon +these +they +thickv +thin +third +this +those +though +three +through +throughout +thru +thus +to +together +too +top +toward +towards +twelve +twenty +two +un +under +until +up +upon +us +very +via +was +we +well +were +what +whatever +when +whence +whenever +where +whereafter +whereas +whereby +wherein +whereupon +wherever +whether +which +while +whither +who +whoever +whole +whom +whose +why +will +with +within +without +would +yet +you +your +yours +yourself +yourselves diff --git a/resource/stop-word/french.csv b/resource/stop-word/french.csv new file mode 100644 index 0000000..3909d03 --- /dev/null +++ b/resource/stop-word/french.csv @@ -0,0 +1,689 @@ +a +abord +absolument +afin +ah +ai +aie +aient +aies +ailleurs +ainsi +ait +allaient +allo +allons +allô +alors +anterieur +anterieure +anterieures +apres +après +as +assez +attendu +au +aucun +aucune +aucuns +aujourd +aujourd\'hui +aupres +auquel +aura +aurai +auraient +aurais +aurait +auras +aurez +auriez +aurions +aurons +auront +aussi +autre +autrefois +autrement +autres +autrui +aux +auxquelles +auxquels +avaient +avais +avait +avant +avec +avez +aviez +avions +avoir +avons +ayant +ayez +ayons +b +bah +bas +basee +bat +beau +beaucoup +bien +bigre +bon +boum +bravo +brrr +c +car +ce +ceci +cela +celle +celle-ci +celle-là +celles +celles-ci +celles-là +celui +celui-ci +celui-là +celà +cent +cependant +certain +certaine +certaines +certains +certes +ces +cet +cette +ceux +ceux-ci +ceux-là +chacun +chacune +chaque +cher +chers +chez +chiche +chut +chère +chères +ci +cinq +cinquantaine +cinquante +cinquantième +cinquième +clac +clic +combien +comme +comment +comparable +comparables +compris +concernant +contre +couic +crac +d +da +dans +de +debout +dedans +dehors +deja +delà +depuis +dernier +derniere +derriere +derrière +des +desormais +desquelles +desquels +dessous +dessus +deux +deuxième +deuxièmement +devant +devers +devra +devrait +different +differentes +differents +différent +différente +différentes +différents +dire +directe +directement +dit +dite +dits +divers +diverse +diverses +dix +dix-huit +dix-neuf +dix-sept +dixième +doit +doivent +donc +dont +dos +douze +douzième +dring +droite +du +duquel +durant +dès +début +désormais +e +effet +egale +egalement +egales +eh +elle +elle-même +elles +elles-mêmes +en +encore +enfin +entre +envers +environ +es +essai +est +et +etant +etc +etre +eu +eue +eues +euh +eurent +eus +eusse +eussent +eusses +eussiez +eussions +eut +eux +eux-mêmes +exactement +excepté +extenso +exterieur +eûmes +eût +eûtes +f +fais +faisaient +faisant +fait +faites +façon +feront +fi +flac +floc +fois +font +force +furent +fus +fusse +fussent +fusses +fussiez +fussions +fut +fûmes +fût +fûtes +g +gens +h +ha +haut +hein +hem +hep +hi +ho +holà +hop +hormis +hors +hou +houp +hue +hui +huit +huitième +hum +hurrah +hé +hélas +i +ici +il +ils +importe +j +je +jusqu +jusque +juste +k +l +la +laisser +laquelle +las +le +lequel +les +lesquelles +lesquels +leur +leurs +longtemps +lors +lorsque +lui +lui-meme +lui-même +là +lès +m +ma +maint +maintenant +mais +malgre +malgré +maximale +me +meme +memes +merci +mes +mien +mienne +miennes +miens +mille +mince +mine +minimale +moi +moi-meme +moi-même +moindres +moins +mon +mot +moyennant +multiple +multiples +même +mêmes +n +na +naturel +naturelle +naturelles +ne +neanmoins +necessaire +necessairement +neuf +neuvième +ni +nombreuses +nombreux +nommés +non +nos +notamment +notre +nous +nous-mêmes +nouveau +nouveaux +nul +néanmoins +nôtre +nôtres +o +oh +ohé +ollé +olé +on +ont +onze +onzième +ore +ou +ouf +ouias +oust +ouste +outre +ouvert +ouverte +ouverts +o| +où +p +paf +pan +par +parce +parfois +parle +parlent +parler +parmi +parole +parseme +partant +particulier +particulière +particulièrement +pas +passé +pendant +pense +permet +personne +personnes +peu +peut +peuvent +peux +pff +pfft +pfut +pif +pire +pièce +plein +plouf +plupart +plus +plusieurs +plutôt +possessif +possessifs +possible +possibles +pouah +pour +pourquoi +pourrais +pourrait +pouvait +prealable +precisement +premier +première +premièrement +pres +probable +probante +procedant +proche +près +psitt +pu +puis +puisque +pur +pure +q +qu +quand +quant +quant-à-soi +quanta +quarante +quatorze +quatre +quatre-vingt +quatrième +quatrièmement +que +quel +quelconque +quelle +quelles +quelqu\'un +quelque +quelques +quels +qui +quiconque +quinze +quoi +quoique +r +rare +rarement +rares +relative +relativement +remarquable +rend +rendre +restant +reste +restent +restrictif +retour +revoici +revoilà +rien +s +sa +sacrebleu +sait +sans +sapristi +sauf +se +sein +seize +selon +semblable +semblaient +semble +semblent +sent +sept +septième +sera +serai +seraient +serais +serait +seras +serez +seriez +serions +serons +seront +ses +seul +seule +seulement +si +sien +sienne +siennes +siens +sinon +six +sixième +soi +soi-même +soient +sois +soit +soixante +sommes +son +sont +sous +souvent +soyez +soyons +specifique +specifiques +speculatif +stop +strictement +subtiles +suffisant +suffisante +suffit +suis +suit +suivant +suivante +suivantes +suivants +suivre +sujet +superpose +sur +surtout +t +ta +tac +tandis +tant +tardive +te +tel +telle +tellement +telles +tels +tenant +tend +tenir +tente +tes +tic +tien +tienne +tiennes +tiens +toc +toi +toi-même +ton +touchant +toujours +tous +tout +toute +toutefois +toutes +treize +trente +tres +trois +troisième +troisièmement +trop +très +tsoin +tsouin +tu +té +u +un +une +unes +uniformement +unique +uniques +uns +v +va +vais +valeur +vas +vers +via +vif +vifs +vingt +vivat +vive +vives +vlan +voici +voie +voient +voilà +vont +vos +votre +vous +vous-mêmes +vu +vé +vôtre +vôtres +w +x +y +z +zut +à +â +ça +ès +étaient +étais +était +étant +état +étiez +étions +été +étée +étées +étés +êtes +être +ô diff --git a/resource/stop-word/german.csv b/resource/stop-word/german.csv new file mode 100644 index 0000000..fce38c4 --- /dev/null +++ b/resource/stop-word/german.csv @@ -0,0 +1,598 @@ +ab +aber +alle +allein +allem +allen +aller +allerdings +allerlei +alles +allmählich +allzu +als +alsbald +also +am +an +and +ander +andere +anderem +anderen +anderer +andererseits +anderes +anderm +andern +andernfalls +anders +anstatt +auch +auf +aus +ausgenommen +ausser +ausserdem +außer +außerdem +außerhalb +bald +bei +beide +beiden +beiderlei +beides +beim +beinahe +bereits +besonders +besser +beträchtlich +bevor +bezüglich +bin +bis +bisher +bislang +bist +bloß +bsp. +bzw +ca +ca. +content +da +dabei +dadurch +dafür +dagegen +daher +dahin +damals +damit +danach +daneben +dann +daran +darauf +daraus +darin +darum +darunter +darüber +darüberhinaus +das +dass +dasselbe +davon +davor +dazu +daß +dein +deine +deinem +deinen +deiner +deines +dem +demnach +demselben +den +denen +denn +dennoch +denselben +der +derart +derartig +derem +deren +derer +derjenige +derjenigen +derselbe +derselben +derzeit +des +deshalb +desselben +dessen +desto +deswegen +dich +die +diejenige +dies +diese +dieselbe +dieselben +diesem +diesen +dieser +dieses +diesseits +dir +direkt +direkte +direkten +direkter +doch +dort +dorther +dorthin +drauf +drin +drunter +drüber +du +dunklen +durch +durchaus +eben +ebenfalls +ebenso +eher +eigenen +eigenes +eigentlich +ein +eine +einem +einen +einer +einerseits +eines +einfach +einführen +einführte +einführten +eingesetzt +einig +einige +einigem +einigen +einiger +einigermaßen +einiges +einmal +eins +einseitig +einseitige +einseitigen +einseitiger +einst +einstmals +einzig +entsprechend +entweder +er +erst +es +etc +etliche +etwa +etwas +euch +euer +eure +eurem +euren +eurer +eures +falls +fast +ferner +folgende +folgenden +folgender +folgendes +folglich +fuer +für +gab +ganze +ganzem +ganzen +ganzer +ganzes +gar +gegen +gemäss +ggf +gleich +gleichwohl +gleichzeitig +glücklicherweise +gänzlich +hab +habe +haben +haette +hast +hat +hatte +hatten +hattest +hattet +heraus +herein +hier +hier +hinter +hiermit +hiesige +hin +hinein +hinten +hinter +hinterher +http +hätt +hätte +hätten +höchstens +ich +igitt +ihm +ihn +ihnen +ihr +ihre +ihrem +ihren +ihrer +ihres +im +immer +immerhin +in +indem +indessen +infolge +innen +innerhalb +ins +insofern +inzwischen +irgend +irgendeine +irgendwas +irgendwen +irgendwer +irgendwie +irgendwo +ist +ja +je +jed +jede +jedem +jeden +jedenfalls +jeder +jederlei +jedes +jedoch +jemand +jene +jenem +jenen +jener +jenes +jenseits +jetzt +jährig +jährige +jährigen +jähriges +kam +kann +kannst +kaum +kein +keine +keinem +keinen +keiner +keinerlei +keines +keineswegs +klar +klare +klaren +klares +klein +kleinen +kleiner +kleines +koennen +koennt +koennte +koennten +komme +kommen +kommt +konkret +konkrete +konkreten +konkreter +konkretes +können +könnt +künftig +leider +machen +man +manche +manchem +manchen +mancher +mancherorts +manches +manchmal +mehr +mehrere +mein +meine +meinem +meinen +meiner +meines +mich +mir +mit +mithin +muessen +muesst +muesste +muss +musst +musste +mussten +muß +mußt +müssen +müsste +müssten +müßt +müßte +nach +nachdem +nachher +nachhinein +nahm +natürlich +neben +nebenan +nehmen +nein +nicht +nichts +nie +niemals +niemand +nirgends +nirgendwo +noch +nun +nur +nächste +nämlich +nötigenfalls +ob +oben +oberhalb +obgleich +obschon +obwohl +oder +oft +per +plötzlich +schließlich +schon +sehr +sehrwohl +seid +sein +seine +seinem +seinen +seiner +seines +seit +seitdem +seither +selber +selbst +sich +sicher +sicherlich +sie +sind +so +sobald +sodass +sodaß +soeben +sofern +sofort +sogar +solange +solch +solche +solchem +solchen +solcher +solches +soll +sollen +sollst +sollt +sollte +sollten +solltest +somit +sondern +sonst +sonstwo +sooft +soviel +soweit +sowie +sowohl +tatsächlich +tatsächlichen +tatsächlicher +tatsächliches +trotzdem +ueber +um +umso +unbedingt +und +unmöglich +unmögliche +unmöglichen +unmöglicher +uns +unser +unser +unsere +unsere +unserem +unseren +unserer +unseres +unter +usw +viel +viele +vielen +vieler +vieles +vielleicht +vielmals +vom +von +vor +voran +vorher +vorüber +völlig +wann +war +waren +warst +warum +was +weder +weil +weiter +weitere +weiterem +weiteren +weiterer +weiteres +weiterhin +weiß +welche +welchem +welchen +welcher +welches +wem +wen +wenig +wenige +weniger +wenigstens +wenn +wenngleich +wer +werde +werden +werdet +weshalb +wessen +wichtig +wie +wieder +wieso +wieviel +wiewohl +will +willst +wir +wird +wirklich +wirst +wo +wodurch +wogegen +woher +wohin +wohingegen +wohl +wohlweislich +womit +woraufhin +woraus +worin +wurde +wurden +während +währenddessen +wär +wäre +wären +würde +würden +z.B. +zB +zahlreich +zeitweise +zu +zudem +zuerst +zufolge +zugleich +zuletzt +zum +zumal +zur +zurück +zusammen +zuviel +zwar +zwischen +ähnlich +übel +über +überall +überallhin +überdies +übermorgen +übrig +übrigens diff --git a/resource/stop-word/italian.csv b/resource/stop-word/italian.csv new file mode 100644 index 0000000..bdedf2e --- /dev/null +++ b/resource/stop-word/italian.csv @@ -0,0 +1,660 @@ +a +abbastanza +abbia +abbiamo +abbiano +abbiate +accidenti +ad +adesso +affinche +agl +agli +ahime +ahim㨠+ahimè +ai +al +alcuna +alcuni +alcuno +all +alla +alle +allo +allora +altre +altri +altrimenti +altro +altrove +altrui +anche +ancora +anni +anno +ansa +anticipo +assai +attesa +attraverso +avanti +avemmo +avendo +avente +aver +avere +averlo +avesse +avessero +avessi +avessimo +aveste +avesti +avete +aveva +avevamo +avevano +avevate +avevi +avevo +avrai +avranno +avrebbe +avrebbero +avrei +avremmo +avremo +avreste +avresti +avrete +avrà +avrò +avuta +avute +avuti +avuto +basta +ben +bene +benissimo +berlusconi +brava +bravo +buono +c +casa +caso +cento +certa +certe +certi +certo +che +chi +chicchessia +chiunque +ci +ciascuna +ciascuno +cima +cinque +cio +cioe +cio㨠+cioè +circa +citta +città +cittã +ciã² +ciò +co +codesta +codesti +codesto +cogli +coi +col +colei +coll +coloro +colui +come +cominci +comprare +comunque +con +concernente +conciliarsi +conclusione +consecutivi +consecutivo +consiglio +contro +cortesia +cos +cosa +cosi +cos㬠+così +cui +d +da +dagl +dagli +dai +dal +dall +dalla +dalle +dallo +dappertutto +davanti +degl +degli +dei +del +dell +della +delle +dello +dentro +detto +deve +devo +di +dice +dietro +dire +dirimpetto +diventa +diventare +diventato +dopo +doppio +dov +dove +dovra +dovrà +dovrã +dovunque +due +dunque +durante +e +ebbe +ebbero +ebbi +ecc +ecco +ed +effettivamente +egli +ella +entrambi +eppure +era +erano +eravamo +eravate +eri +ero +esempio +esse +essendo +esser +essere +essi +ex +fa +faccia +facciamo +facciano +facciate +faccio +facemmo +facendo +facesse +facessero +facessi +facessimo +faceste +facesti +faceva +facevamo +facevano +facevate +facevi +facevo +fai +fanno +farai +faranno +fare +farebbe +farebbero +farei +faremmo +faremo +fareste +faresti +farete +farà +farò +fatto +favore +fece +fecero +feci +fin +finalmente +finche +fine +fino +forse +forza +fosse +fossero +fossi +fossimo +foste +fosti +fra +frattempo +fu +fui +fummo +fuori +furono +futuro +generale +gente +gia +giacche +giorni +giorno +giu +già +giã +gli +gliela +gliele +glieli +glielo +gliene +governo +grande +grazie +gruppo +ha +haha +hai +hanno +ho +i +ie +ieri +il +improvviso +in +inc +indietro +infatti +inoltre +insieme +intanto +intorno +invece +io +l +la +lasciato +lato +lavoro +le +lei +li +lo +lontano +loro +lui +lungo +luogo +là +lã +ma +macche +magari +maggior +mai +male +malgrado +malissimo +mancanza +marche +me +medesimo +mediante +meglio +meno +mentre +mesi +mezzo +mi +mia +mie +miei +mila +miliardi +milioni +minimi +ministro +mio +modo +molta +molti +moltissimo +molto +momento +mondo +mosto +nazionale +ne +negl +negli +nei +nel +nell +nella +nelle +nello +nemmeno +neppure +nessun +nessuna +nessuno +niente +no +noi +nome +non +nondimeno +nonostante +nonsia +nostra +nostre +nostri +nostro +novanta +nove +nulla +nuovi +nuovo +o +od +oggi +ogni +ognuna +ognuno +oltre +oppure +ora +ore +osi +ossia +ottanta +otto +paese +parecchi +parecchie +parecchio +parte +partendo +peccato +peggio +per +perche +perch㨠+perchè +perché +percio +perciã² +perciò +perfino +pero +persino +persone +perã² +però +piedi +pieno +piglia +piu +piuttosto +piã¹ +più +po +pochissimo +poco +poi +poiche +possa +possedere +posteriore +posto +potrebbe +preferibilmente +presa +press +prima +primo +principalmente +probabilmente +promesso +proprio +puo +pure +purtroppo +puã² +può +qua +qualche +qualcosa +qualcuna +qualcuno +quale +quali +qualunque +quando +quanta +quante +quanti +quanto +quantunque +quarto +quasi +quattro +quel +quella +quelle +quelli +quello +quest +questa +queste +questi +questo +qui +quindi +quinto +realmente +recente +recentemente +registrazione +relativo +riecco +rispetto +salvo +sara +sarai +saranno +sarebbe +sarebbero +sarei +saremmo +saremo +sareste +saresti +sarete +sarà +sarã +sarò +scola +scopo +scorso +se +secondo +seguente +seguito +sei +sembra +sembrare +sembrato +sembrava +sembri +sempre +senza +sette +si +sia +siamo +siano +siate +siete +sig +solito +solo +soltanto +sono +sopra +soprattutto +sotto +spesso +srl +sta +stai +stando +stanno +starai +staranno +starebbe +starebbero +starei +staremmo +staremo +stareste +staresti +starete +starà +starò +stata +state +stati +stato +stava +stavamo +stavano +stavate +stavi +stavo +stemmo +stessa +stesse +stessero +stessi +stessimo +stesso +steste +stesti +stette +stettero +stetti +stia +stiamo +stiano +stiate +sto +su +sua +subito +successivamente +successivo +sue +sugl +sugli +sui +sul +sull +sulla +sulle +sullo +suo +suoi +tale +tali +talvolta +tanto +te +tempo +terzo +th +ti +titolo +torino +tra +tranne +tre +trenta +triplo +troppo +trovato +tu +tua +tue +tuo +tuoi +tutta +tuttavia +tutte +tutti +tutto +uguali +ulteriore +ultimo +un +una +uno +uomo +va +vai +vale +vari +varia +varie +vario +verso +vi +via +vicino +visto +vita +voi +volta +volte +vostra +vostre +vostri +vostro +㨠+è diff --git a/resource/stop-word/norwegian.csv b/resource/stop-word/norwegian.csv new file mode 100644 index 0000000..e93528f --- /dev/null +++ b/resource/stop-word/norwegian.csv @@ -0,0 +1,221 @@ +alle +andre +arbeid +at +av +bare +begge +ble +blei +bli +blir +blitt +bort +bra +bruke +både +båe +da +de +deg +dei +deim +deira +deires +dem +den +denne +der +dere +deres +det +dette +di +din +disse +ditt +du +dykk +dykkar +då +eg +ein +eit +eitt +eller +elles +en +ene +eneste +enhver +enn +er +et +ett +etter +folk +for +fordi +forsûke +fra +få +før +fûr +fûrst +gjorde +gjûre +god +gå +ha +hadde +han +hans +har +hennar +henne +hennes +her +hjå +ho +hoe +honom +hoss +hossen +hun +hva +hvem +hver +hvilke +hvilken +hvis +hvor +hvordan +hvorfor +i +ikke +ikkje +ingen +ingi +inkje +inn +innen +inni +ja +jeg +kan +kom +korleis +korso +kun +kunne +kva +kvar +kvarhelst +kven +kvi +kvifor +lage +lang +lik +like +makt +man +mange +me +med +medan +meg +meget +mellom +men +mens +mer +mest +mi +min +mine +mitt +mot +mye +mykje +må +måte +navn +ned +nei +no +noe +noen +noka +noko +nokon +nokor +nokre +ny +nå +når +og +også +om +opp +oss +over +part +punkt +på +rett +riktig +samme +sant +seg +selv +si +sia +sidan +siden +sin +sine +sist +sitt +sjøl +skal +skulle +slik +slutt +so +som +somme +somt +start +stille +så +sånn +tid +til +tilbake +tilstand +um +under +upp +ut +uten +var +vart +varte +ved +verdi +vere +verte +vi +vil +ville +vite +vore +vors +vort +vår +være +vært +vöre +vört +å diff --git a/resource/stop-word/russian.csv b/resource/stop-word/russian.csv new file mode 100644 index 0000000..38c4f2a --- /dev/null +++ b/resource/stop-word/russian.csv @@ -0,0 +1,559 @@ +c +а +алло +без +белый +близко +более +больше +большой +будем +будет +будете +будешь +будто +буду +будут +будь +бы +бывает +бывь +был +была +были +было +быть +в +важная +важное +важные +важный +вам +вами +вас +ваш +ваша +ваше +ваши +вверх +вдали +вдруг +ведь +везде +вернуться +весь +вечер +взгляд +взять +вид +видел +видеть +вместе +вне +вниз +внизу +во +вода +война +вокруг +вон +вообще +вопрос +восемнадцатый +восемнадцать +восемь +восьмой +вот +впрочем +времени +время +все +все еще +всегда +всего +всем +всеми +всему +всех +всею +всю +всюду +вся +всё +второй +вы +выйти +г +где +главный +глаз +говорил +говорит +говорить +год +года +году +голова +голос +город +да +давать +давно +даже +далекий +далеко +дальше +даром +дать +два +двадцатый +двадцать +две +двенадцатый +двенадцать +дверь +двух +девятнадцатый +девятнадцать +девятый +девять +действительно +дел +делал +делать +делаю +дело +день +деньги +десятый +десять +для +до +довольно +долго +должен +должно +должный +дом +дорога +друг +другая +другие +других +друго +другое +другой +думать +душа +е +его +ее +ей +ему +если +есть +еще +ещё +ею +её +ж +ждать +же +жена +женщина +жизнь +жить +за +занят +занята +занято +заняты +затем +зато +зачем +здесь +земля +знать +значит +значить +и +иди +идти +из +или +им +имеет +имел +именно +иметь +ими +имя +иногда +их +к +каждая +каждое +каждые +каждый +кажется +казаться +как +какая +какой +кем +книга +когда +кого +ком +комната +кому +конец +конечно +которая +которого +которой +которые +который +которых +кроме +кругом +кто +куда +лежать +лет +ли +лицо +лишь +лучше +любить +люди +м +маленький +мало +мать +машина +между +меля +менее +меньше +меня +место +миллионов +мимо +минута +мир +мира +мне +много +многочисленная +многочисленное +многочисленные +многочисленный +мной +мною +мог +могу +могут +мож +может +может быть +можно +можхо +мои +мой +мор +москва +мочь +моя +моё +мы +на +наверху +над +надо +назад +наиболее +найти +наконец +нам +нами +народ +нас +начала +начать +наш +наша +наше +наши +не +него +недавно +недалеко +нее +ней +некоторый +нельзя +нем +немного +нему +непрерывно +нередко +несколько +нет +нею +неё +ни +нибудь +ниже +низко +никакой +никогда +никто +никуда +ним +ними +них +ничего +ничто +но +новый +нога +ночь +ну +нужно +нужный +нх +о +об +оба +обычно +один +одиннадцатый +одиннадцать +однажды +однако +одного +одной +оказаться +окно +около +он +она +они +оно +опять +особенно +остаться +от +ответить +отец +откуда +отовсюду +отсюда +очень +первый +перед +писать +плечо +по +под +подойди +подумать +пожалуйста +позже +пойти +пока +пол +получить +помнить +понимать +понять +пор +пора +после +последний +посмотреть +посреди +потом +потому +почему +почти +правда +прекрасно +при +про +просто +против +процентов +путь +пятнадцатый +пятнадцать +пятый +пять +работа +работать +раз +разве +рано +раньше +ребенок +решить +россия +рука +русский +ряд +рядом +с +с кем +сам +сама +сами +самим +самими +самих +само +самого +самой +самом +самому +саму +самый +свет +свое +своего +своей +свои +своих +свой +свою +сделать +сеаой +себе +себя +сегодня +седьмой +сейчас +семнадцатый +семнадцать +семь +сидеть +сила +сих +сказал +сказала +сказать +сколько +слишком +слово +случай +смотреть +сначала +снова +со +собой +собою +советский +совсем +спасибо +спросить +сразу +стал +старый +стать +стол +сторона +стоять +страна +суть +считать +т +та +так +такая +также +таки +такие +такое +такой +там +твои +твой +твоя +твоё +те +тебе +тебя +тем +теми +теперь +тех +то +тобой +тобою +товарищ +тогда +того +тоже +только +том +тому +тот +тою +третий +три +тринадцатый +тринадцать +ту +туда +тут +ты +тысяч +у +увидеть +уж +уже +улица +уметь +утро +хороший +хорошо +хотел бы +хотеть +хоть +хотя +хочешь +час +часто +часть +чаще +чего +человек +чем +чему +через +четвертый +четыре +четырнадцатый +четырнадцать +что +чтоб +чтобы +чуть +шестнадцатый +шестнадцать +шестой +шесть +эта +эти +этим +этими +этих +это +этого +этой +этом +этому +этот +эту +я +являюсь diff --git a/resource/stop-word/spanish.csv b/resource/stop-word/spanish.csv new file mode 100644 index 0000000..3b44c2c --- /dev/null +++ b/resource/stop-word/spanish.csv @@ -0,0 +1,721 @@ +a +actualmente +acuerdo +adelante +ademas +además +adrede +afirmó +agregó +ahi +ahora +ahí +al +algo +alguna +algunas +alguno +algunos +algún +alli +allí +alrededor +ambos +ampleamos +antano +antaño +ante +anterior +antes +apenas +aproximadamente +aquel +aquella +aquellas +aquello +aquellos +aqui +aquél +aquélla +aquéllas +aquéllos +aquí +arriba +arribaabajo +aseguró +asi +así +atras +aun +aunque +ayer +añadió +aún +b +bajo +bastante +bien +breve +buen +buena +buenas +bueno +buenos +c +cada +casi +cerca +cierta +ciertas +cierto +ciertos +cinco +claro +comentó +como +con +conmigo +conocer +conseguimos +conseguir +considera +consideró +consigo +consigue +consiguen +consigues +contigo +contra +cosas +creo +cual +cuales +cualquier +cuando +cuanta +cuantas +cuanto +cuantos +cuatro +cuenta +cuál +cuáles +cuándo +cuánta +cuántas +cuánto +cuántos +cómo +d +da +dado +dan +dar +de +debajo +debe +deben +debido +decir +dejó +del +delante +demasiado +demás +dentro +deprisa +desde +despacio +despues +después +detras +detrás +dia +dias +dice +dicen +dicho +dieron +diferente +diferentes +dijeron +dijo +dio +donde +dos +durante +día +días +dónde +e +ejemplo +el +ella +ellas +ello +ellos +embargo +empleais +emplean +emplear +empleas +empleo +en +encima +encuentra +enfrente +enseguida +entonces +entre +era +erais +eramos +eran +eras +eres +es +esa +esas +ese +eso +esos +esta +estaba +estabais +estaban +estabas +estad +estada +estadas +estado +estados +estais +estamos +estan +estando +estar +estaremos +estará +estarán +estarás +estaré +estaréis +estaría +estaríais +estaríamos +estarían +estarías +estas +este +estemos +esto +estos +estoy +estuve +estuviera +estuvierais +estuvieran +estuvieras +estuvieron +estuviese +estuvieseis +estuviesen +estuvieses +estuvimos +estuviste +estuvisteis +estuviéramos +estuviésemos +estuvo +está +estábamos +estáis +están +estás +esté +estéis +estén +estés +ex +excepto +existe +existen +explicó +expresó +f +fin +final +fue +fuera +fuerais +fueran +fueras +fueron +fuese +fueseis +fuesen +fueses +fui +fuimos +fuiste +fuisteis +fuéramos +fuésemos +g +general +gran +grandes +gueno +h +ha +haber +habia +habida +habidas +habido +habidos +habiendo +habla +hablan +habremos +habrá +habrán +habrás +habré +habréis +habría +habríais +habríamos +habrían +habrías +habéis +había +habíais +habíamos +habían +habías +hace +haceis +hacemos +hacen +hacer +hacerlo +haces +hacia +haciendo +hago +han +has +hasta +hay +haya +hayamos +hayan +hayas +hayáis +he +hecho +hemos +hicieron +hizo +horas +hoy +hube +hubiera +hubierais +hubieran +hubieras +hubieron +hubiese +hubieseis +hubiesen +hubieses +hubimos +hubiste +hubisteis +hubiéramos +hubiésemos +hubo +i +igual +incluso +indicó +informo +informó +intenta +intentais +intentamos +intentan +intentar +intentas +intento +ir +j +junto +k +l +la +lado +largo +las +le +lejos +les +llegó +lleva +llevar +lo +los +luego +lugar +m +mal +manera +manifestó +mas +mayor +me +mediante +medio +mejor +mencionó +menos +menudo +mi +mia +mias +mientras +mio +mios +mis +misma +mismas +mismo +mismos +modo +momento +mucha +muchas +mucho +muchos +muy +más +mí +mía +mías +mío +míos +n +nada +nadie +ni +ninguna +ningunas +ninguno +ningunos +ningún +no +nos +nosotras +nosotros +nuestra +nuestras +nuestro +nuestros +nueva +nuevas +nuevo +nuevos +nunca +o +ocho +os +otra +otras +otro +otros +p +pais +para +parece +parte +partir +pasada +pasado +paìs +peor +pero +pesar +poca +pocas +poco +pocos +podeis +podemos +poder +podria +podriais +podriamos +podrian +podrias +podrá +podrán +podría +podrían +poner +por +por qué +porque +posible +primer +primera +primero +primeros +principalmente +pronto +propia +propias +propio +propios +proximo +próximo +próximos +pudo +pueda +puede +pueden +puedo +pues +q +qeu +que +quedó +queremos +quien +quienes +quiere +quiza +quizas +quizá +quizás +quién +quiénes +qué +r +raras +realizado +realizar +realizó +repente +respecto +s +sabe +sabeis +sabemos +saben +saber +sabes +sal +salvo +se +sea +seamos +sean +seas +segun +segunda +segundo +según +seis +ser +sera +seremos +será +serán +serás +seré +seréis +sería +seríais +seríamos +serían +serías +seáis +señaló +si +sido +siempre +siendo +siete +sigue +siguiente +sin +sino +sobre +sois +sola +solamente +solas +solo +solos +somos +son +soy +soyos +su +supuesto +sus +suya +suyas +suyo +suyos +sé +sí +sólo +t +tal +tambien +también +tampoco +tan +tanto +tarde +te +temprano +tendremos +tendrá +tendrán +tendrás +tendré +tendréis +tendría +tendríais +tendríamos +tendrían +tendrías +tened +teneis +tenemos +tener +tenga +tengamos +tengan +tengas +tengo +tengáis +tenida +tenidas +tenido +tenidos +teniendo +tenéis +tenía +teníais +teníamos +tenían +tenías +tercera +ti +tiempo +tiene +tienen +tienes +toda +todas +todavia +todavía +todo +todos +total +trabaja +trabajais +trabajamos +trabajan +trabajar +trabajas +trabajo +tras +trata +través +tres +tu +tus +tuve +tuviera +tuvierais +tuvieran +tuvieras +tuvieron +tuviese +tuvieseis +tuviesen +tuvieses +tuvimos +tuviste +tuvisteis +tuviéramos +tuviésemos +tuvo +tuya +tuyas +tuyo +tuyos +tú +u +ultimo +un +una +unas +uno +unos +usa +usais +usamos +usan +usar +usas +uso +usted +ustedes +v +va +vais +valor +vamos +van +varias +varios +vaya +veces +ver +verdad +verdadera +verdadero +vez +vosotras +vosotros +voy +vuestra +vuestras +vuestro +vuestros +w +x +y +ya +yo +z +él +éramos +ésa +ésas +ése +ésos +ésta +éstas +éste +éstos +última +últimas +último +últimos diff --git a/src/Builder/AlgorithmOutputBuilder.php b/src/Builder/AlgorithmOutputBuilder.php new file mode 100644 index 0000000..a4f2b25 --- /dev/null +++ b/src/Builder/AlgorithmOutputBuilder.php @@ -0,0 +1,68 @@ +sortRankDataList = $getTopNodes; + } + + public function build( + TextInterface $text, + NodeCollectionInterface $nodeCollection, + array $sentences, + int $maxKeywords, + int $maxSentences + ): AlgorithmOutputInterface { + $words = $this->createWordList($text, $nodeCollection, $maxKeywords); + $sentences = array_slice( + $this->sortRankDataList->sort($sentences), + 0, + $maxSentences + ); + + $textRankOutput = new AlgorithmOutput(); + $textRankOutput->setKeyWords($words); + $textRankOutput->setSentences($sentences); + + return $textRankOutput; + } + + private function createWordList( + TextInterface $text, + NodeCollectionInterface $nodeCollection, + int $maxKeywords + ): array { + $nodes = $this + ->sortRankDataList + ->sort(array_values($nodeCollection->getNodes())); + $words = []; + + for ($i = 0; $i < $maxKeywords; $i++) { + $nodeId = $nodes[$i]->getId(); + $token = $text->getTokenMap()->getToken($nodeId); + $word = new RankDataObject(); + $word->setId($nodeId); + $word->setValue($token); + $word->setRank($nodes[$i]->getRank()); + + $words[] = $word; + } + + return $words; + } +} diff --git a/src/Builder/AlgorithmOutputBuilderInterface.php b/src/Builder/AlgorithmOutputBuilderInterface.php new file mode 100644 index 0000000..b556aee --- /dev/null +++ b/src/Builder/AlgorithmOutputBuilderInterface.php @@ -0,0 +1,20 @@ +getSentences() as $sentence) { + foreach ($sentence->getVector() as $index => $tokenId) { + if (!isset($dataSource[$tokenId])) { + $dataSource[$tokenId] = [ + self::ID => $tokenId, + self::LEFT => [], + self::RIGHT => [] + ]; + } + + if ($sentence->isIndexExists($index - 1)) { + $previousTokenId = $sentence->getTokenId($index - 1); + if ($text->getTokenMap()->isExists($previousTokenId)) { + $dataSource[$tokenId][self::LEFT][] = $previousTokenId; + } + } + + if ($sentence->isIndexExists($index + 1)) { + $nextTokenId = $sentence->getTokenId($index + 1); + if ($text->getTokenMap()->isExists($nextTokenId)) { + $dataSource[$tokenId][self::RIGHT][] = $nextTokenId; + } + } + } + } + + return $dataSource; + } +} diff --git a/src/Builder/StopWordCollectionBuilder.php b/src/Builder/StopWordCollectionBuilder.php new file mode 100644 index 0000000..dd78e46 --- /dev/null +++ b/src/Builder/StopWordCollectionBuilder.php @@ -0,0 +1,31 @@ +reader = $reader; + } + + public function build(string $path): StopWordCollectionInterface + { + $words = []; + + foreach ($this->reader->read($path) as $row) { + $words[] = current($row); + } + + return new StopWordCollection($words); + } +} diff --git a/src/Builder/StopWordCollectionBuilderInterface.php b/src/Builder/StopWordCollectionBuilderInterface.php new file mode 100644 index 0000000..0bbb820 --- /dev/null +++ b/src/Builder/StopWordCollectionBuilderInterface.php @@ -0,0 +1,12 @@ + $sentenceTokenList) { + $sentenceVector = []; + foreach ($sentenceTokenList as $token) { + $token = (string)$token; + if (!isset($tokens[$token])) { + $tokens[$token] = $i; + $tokenId = $i; + $i++; + } else { + $tokenId = $tokens[$token]; + } + + $sentenceVector[] = $tokenId; + } + + $sentence = new Sentence(); + $sentence->setId($sentenceIndex); + $sentence->setVector($sentenceVector); + $sentence->setOriginalValue($originalSentences[$sentenceIndex]); + $sentences[] = $sentence; + } + + $tokenMap = new TokenMap(); + $tokenMap->setTokenMap( + array_map( + 'strval', + array_flip($tokens) + ) + ); + + return new Text( + $tokenMap, + $sentences + ); + } +} diff --git a/src/Builder/TextBuilderInterface.php b/src/Builder/TextBuilderInterface.php new file mode 100644 index 0000000..3bf18b3 --- /dev/null +++ b/src/Builder/TextBuilderInterface.php @@ -0,0 +1,12 @@ +keyWords = $keywords; + } + + public function getKeyWords(): ?array + { + return $this->keyWords; + } + + public function setSentences(array $sentences): void + { + $this->sentences = $sentences; + } + + public function getSentences(): ?array + { + return $this->sentences; + } +} diff --git a/src/Data/AlgorithmOutputInterface.php b/src/Data/AlgorithmOutputInterface.php new file mode 100644 index 0000000..0a10c9e --- /dev/null +++ b/src/Data/AlgorithmOutputInterface.php @@ -0,0 +1,29 @@ +stopWordCsvPath = $stopWordCsvPath; + $this->minKeywordLength = $minKeywordLength; + $this->maxKeywords = $maxKeywords; + $this->maxKeySentences = $maxKeySentences; + $this->pageRankPowerIteration = $pageRankPowerIteration; + } + + public function getStopWordCsvPath(): string + { + return $this->stopWordCsvPath; + } + + public function setStopWordCsvPath(string $stopWordCsvPath): void + { + $this->stopWordCsvPath = $stopWordCsvPath; + } + + public function getMinKeywordLength(): int + { + return $this->minKeywordLength; + } + + public function setMinKeywordLength(int $minKeywordLength): void + { + $this->minKeywordLength = $minKeywordLength; + } + + public function getRawText(): string + { + return $this->rawText; + } + + public function setRawText(string $rawText): void + { + $this->rawText = $rawText; + } + + public function getMaxKeywords(): int + { + return $this->maxKeywords; + } + + public function setMaxKeywords(int $maxKeywords): void + { + $this->maxKeywords = $maxKeywords; + } + + public function getMaxKeySentences(): int + { + return $this->maxKeySentences; + } + + public function setMaxKeySentences(int $maxKeySentences): void + { + $this->maxKeySentences = $maxKeySentences; + } + + public function setPageRankPowerIteration(int $pageRankPowerIteration): void + { + $this->pageRankPowerIteration = $pageRankPowerIteration; + } + + public function getPageRankPowerIteration(): int + { + return $this->pageRankPowerIteration; + } +} diff --git a/src/Data/AlgorithmRequestInterface.php b/src/Data/AlgorithmRequestInterface.php new file mode 100644 index 0000000..3da182a --- /dev/null +++ b/src/Data/AlgorithmRequestInterface.php @@ -0,0 +1,68 @@ +id = $id; + } + + public function getId(): int + { + return $this->id; + } + + public function setValue(string $value): void + { + $this->value = $value; + } + + public function getValue(): string + { + return $this->value; + } + + public function setRank(float $rank): void + { + $this->rank = $rank; + } + + public function getRank(): float + { + return $this->rank; + } +} diff --git a/src/Data/RankDataObjectInterface.php b/src/Data/RankDataObjectInterface.php new file mode 100644 index 0000000..88f0d5d --- /dev/null +++ b/src/Data/RankDataObjectInterface.php @@ -0,0 +1,20 @@ +words = $words; + } + + public function isExist(string $word): bool + { + return array_search($word, $this->words) !== false; + } +} diff --git a/src/Data/StopWordCollectionInterface.php b/src/Data/StopWordCollectionInterface.php new file mode 100644 index 0000000..595e568 --- /dev/null +++ b/src/Data/StopWordCollectionInterface.php @@ -0,0 +1,10 @@ +tokenMap = $tokenMap; + $this->sentences = $sentences; + } + + public function getTokenMap(): TokenMapInterface + { + return $this->tokenMap; + } + + public function getSentences(): array + { + return $this->sentences; + } +} diff --git a/src/Data/Text/Sentence.php b/src/Data/Text/Sentence.php new file mode 100644 index 0000000..ee947a4 --- /dev/null +++ b/src/Data/Text/Sentence.php @@ -0,0 +1,52 @@ +id = $id; + } + + public function getId(): int + { + return $this->id; + } + + public function setOriginalValue(string $originalValue): void + { + $this->originalValue = $originalValue; + } + + public function getOriginalValue(): string + { + return $this->originalValue; + } + + public function setVector(array $vector): void + { + $this->vector = $vector; + } + + public function getVector(): array + { + return $this->vector; + } + + public function isIndexExists(int $index): bool + { + return isset($this->vector[$index]); + } + + public function getTokenId(int $index): int + { + return $this->vector[$index]; + } +} diff --git a/src/Data/Text/SentenceInterface.php b/src/Data/Text/SentenceInterface.php new file mode 100644 index 0000000..c239ffe --- /dev/null +++ b/src/Data/Text/SentenceInterface.php @@ -0,0 +1,30 @@ +tokenMap = $tokenMap; + } + + public function isExists(int $tokenId): bool + { + return isset($this->tokenMap[$tokenId]); + } + + public function getToken(int $tokenId): string + { + return $this->tokenMap[$tokenId]; + } +} diff --git a/src/Data/Text/TokenMapInterface.php b/src/Data/Text/TokenMapInterface.php new file mode 100644 index 0000000..d966eb5 --- /dev/null +++ b/src/Data/Text/TokenMapInterface.php @@ -0,0 +1,17 @@ +parser = $generalFactory->createParser(); + $this->pageRankAlgorithm = $generalFactory->createAlgorithmStrategy(); + $this->algorithmOutputBuilder = $generalFactory->createAlgorithmBuilder(); + $this->sentenceWeighting = $generalFactory->createSentenceWeighting(); + } + + public function rank( + AlgorithmRequestInterface $algorithmRequest + ): AlgorithmOutputInterface { + + $text = $this->parser->parse( + $algorithmRequest->getRawText(), + $algorithmRequest->getStopWordCsvPath(), + $algorithmRequest->getMinKeywordLength() + ); + + $nodeCollection = $this->pageRankAlgorithm->rank( + $text, + $algorithmRequest->getPageRankPowerIteration() + ); + + $sentences = $this->sentenceWeighting->weight( + $text, + $nodeCollection + ); + + return $this->algorithmOutputBuilder->build( + $text, + $nodeCollection, + $sentences, + $algorithmRequest->getMaxKeywords(), + $algorithmRequest->getMaxKeySentences() + ); + } +} diff --git a/src/Factory/GeneralFactory.php b/src/Factory/GeneralFactory.php new file mode 100644 index 0000000..5394225 --- /dev/null +++ b/src/Factory/GeneralFactory.php @@ -0,0 +1,59 @@ +getResource($path); + + while (false !== ($row = fgetcsv($resource))) { + yield array_values($row); + } + + fclose($resource); + } + + private function getResource(string $path) + { + $resource = fopen($path, 'r'); + + if (false === $resource) { + throw new IoException(sprintf('Can\'t read file [%s]', $path)); + } + + return $resource; + } +} diff --git a/src/Service/Parser.php b/src/Service/Parser.php new file mode 100644 index 0000000..59d51b5 --- /dev/null +++ b/src/Service/Parser.php @@ -0,0 +1,78 @@ +textBuilder = $textBuilder; + $this->stopWordCollectionBuilder = $stopWordCollectionBuilder; + } + + public function parse( + string $rawText, + string $stopWordsPath, + int $minimumTokenLength + ): TextInterface { + $stopWordCollection = $this + ->stopWordCollectionBuilder + ->build($stopWordsPath); + + $sentences = preg_split( + '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/', + $rawText, + -1, + PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE + ); + + foreach ($sentences as $sentenceIndex => $sentence) { + if (1 === strlen(trim($sentence))) { + unset($sentences[$sentenceIndex]); + } + } + + $textMap = []; + + foreach ($sentences as $sentenceIndex => $sentence) { + $tokens = preg_split( + '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/', + $sentence, + -1, + PREG_SPLIT_NO_EMPTY + ); + + foreach ($tokens as $tokenIndex => $token) { + $token = mb_strtolower(trim($token)); + + if ( + ctype_punct($token) + || mb_strlen($token) < $minimumTokenLength + || $stopWordCollection->isExist($token) + ) { + unset($tokens[$tokenIndex]); + } else { + $tokens[$tokenIndex] = $token; + } + } + + $textMap[$sentenceIndex] = $tokens; + } + + return $this->textBuilder->build( + $sentences, + $textMap + ); + } +} diff --git a/src/Service/ParserInterface.php b/src/Service/ParserInterface.php new file mode 100644 index 0000000..b432975 --- /dev/null +++ b/src/Service/ParserInterface.php @@ -0,0 +1,16 @@ +createRankMap($nodeCollection); + $sentenceOutputList = []; + + foreach ($text->getSentences() as $sentence) { + $vector = $sentence->getVector(); + $weight = .0; + + foreach ($vector as $tokenId) { + $weight += $rankMap[$tokenId]; + } + + $weight = $weight / max(1, count($vector)); + + $sentenceOutputList[] = $this + ->createSentence( + $sentence->getId(), + $weight, + $sentence->getOriginalValue() + ); + } + + return $sentenceOutputList; + } + + private function createSentence( + int $id, + float $rank, + string $originalValue + ): RankDataObjectInterface { + + $sentence = new RankDataObject(); + $sentence->setId($id); + $sentence->setRank($rank); + $sentence->setValue($originalValue); + + return $sentence; + } + + private function createRankMap( + NodeCollectionInterface $nodeCollection + ): array { + $rankMap = []; + + foreach ($nodeCollection->getNodes() as $node) { + $rankMap[$node->getId()] = $node->getRank(); + } + + return $rankMap; + } +} diff --git a/src/Service/SentenceWeightingInterface.php b/src/Service/SentenceWeightingInterface.php new file mode 100644 index 0000000..c41a669 --- /dev/null +++ b/src/Service/SentenceWeightingInterface.php @@ -0,0 +1,23 @@ +getIndexedRank($rankList); + arsort($rankIndex); + $rankCollection = []; + + foreach ($rankIndex as $index => $rank) { + $rankCollection[] = $rankList[$index]; + } + + return $rankCollection; + } + + private function getIndexedRank(array $rankList): array + { + $rankIndex = []; + + foreach ($rankList as $index => $rankObject) { + $rankIndex[$index] = $rankObject->getRank(); + } + + return $rankIndex; + } +} diff --git a/src/Service/SortRankDataListInterface.php b/src/Service/SortRankDataListInterface.php new file mode 100644 index 0000000..929c056 --- /dev/null +++ b/src/Service/SortRankDataListInterface.php @@ -0,0 +1,18 @@ +pageRankDataSourceBuilder = $pageRankDataSourceBuilder; + } + + public function rank( + TextInterface $text, + int $iteration + ): NodeCollectionInterface { + $dataSource = $this->pageRankDataSourceBuilder->build($text); + $strategy = $this->createPageRankStrategy($dataSource); + $ranking = $this->createRanking($strategy); + $pageRankAlgorithm = $this->createPageRankAlgorithm($ranking, $strategy); + + return $pageRankAlgorithm->run($iteration); + } + + private function createPageRankAlgorithm( + RankingInterface $ranking, + NodeDataSourceStrategyInterface $strategy + ): PageRankAlgorithmInterface { + + $normalizer = new Normalizer(); + + return new PageRankAlgorithm( + $ranking, + $strategy, + $normalizer + ); + } + + private function createPageRankStrategy( + array $dataSource + ): NodeDataSourceStrategyInterface { + + $nodeBuilder = new NodeBuilder(); + $nodeCollectionBuilder = new NodeCollectionBuilder(); + + return new MemorySourceStrategy( + $nodeBuilder, + $nodeCollectionBuilder, + $dataSource + ); + } + + private function createRanking( + NodeDataSourceStrategyInterface $strategy + ): RankingInterface { + + $rankComparator = new RankComparator(); + + return new Ranking( + $rankComparator, + $strategy + ); + } +} diff --git a/src/Strategy/RankingAlgorithmStrategyInterface.php b/src/Strategy/RankingAlgorithmStrategyInterface.php new file mode 100644 index 0000000..e771d72 --- /dev/null +++ b/src/Strategy/RankingAlgorithmStrategyInterface.php @@ -0,0 +1,16 @@ + - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank; - -use PhpScience\TextRank\Tool\Graph; -use PhpScience\TextRank\Tool\Parser; -use PhpScience\TextRank\Tool\Score; -use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract; -use PhpScience\TextRank\Tool\Summarize; - -/** - * Class TextRankFacade - * - * This Facade class is capable to find the keywords in a raw text, weigh them - * and retrieve the most important sentences from the whole text. It is an - * implementation of the TextRank algorithm. - * - * - * $stopWords = new English(); - * - * $textRank = new TextRankFacade(); - * $textRank->setStopWords($stopWords); - * - * $sentences = $textRank->summarizeTextFreely( - * $rawText, - * 5, - * 2, - * Summarize::GET_ALL_IMPORTANT - * ); - * - * - * @package PhpScience\TextRank - */ -class TextRankFacade -{ - /** - * Stop Words - * - * Stop Words to ignore because of dummy words. These words will not be Key - * Words. A, like, no yes, one, two, I, you for example. - * - * @see \PhpScience\TextRank\Tool\StopWords\English - * - * @var StopWordsAbstract - */ - protected $stopWords; - - /** - * Set Stop Words. - * - * @param StopWordsAbstract $stopWords Stop Words to ignore because of - * dummy words. - */ - public function setStopWords(StopWordsAbstract $stopWords) - { - $this->stopWords = $stopWords; - } - - /** - * Only Keywords - * - * It retrieves the possible keywords with their scores from a text. - * - * @param string $rawText A single raw text. - * - * @return array Array from Keywords. Key is the parsed word, value is the - * word score. - */ - public function getOnlyKeyWords(string $rawText): array - { - $parser = new Parser(); - $parser->setMinimumWordLength(3); - $parser->setRawText($rawText); - - if ($this->stopWords) { - $parser->setStopWords($this->stopWords); - } - - $text = $parser->parse(); - - $graph = new Graph(); - $graph->createGraph($text); - - $score = new Score(); - - return $score->calculate( - $graph, $text - ); - } - - /** - * Highlighted Texts - * - * It finds the most important sentences from a text by the most important - * keywords and these keywords also found by automatically. It retrieves - * the most important sentences what are 20 percent of the full text. - * - * @param string $rawText A single raw text. - * - * @return array An array from sentences. - */ - public function getHighlights(string $rawText): array - { - $parser = new Parser(); - $parser->setMinimumWordLength(3); - $parser->setRawText($rawText); - - if ($this->stopWords) { - $parser->setStopWords($this->stopWords); - } - - $text = $parser->parse(); - $maximumSentences = (int) (count($text->getSentences()) * 0.2); - - $graph = new Graph(); - $graph->createGraph($text); - - $score = new Score(); - $scores = $score->calculate($graph, $text); - - $summarize = new Summarize(); - - return $summarize->getSummarize( - $scores, - $graph, - $text, - 12, - $maximumSentences, - Summarize::GET_ALL_IMPORTANT - ); - } - - /** - * Compounds a Summarized Text - * - * It finds the three most important sentences from a text by the most - * important keywords and these keywords also found by automatically. It - * retrieves these important sentences. - * - * @param string $rawText A single raw text. - * - * @return array An array from sentences. - */ - public function summarizeTextCompound(string $rawText): array - { - $parser = new Parser(); - $parser->setMinimumWordLength(3); - $parser->setRawText($rawText); - - if ($this->stopWords) { - $parser->setStopWords($this->stopWords); - } - - $text = $parser->parse(); - - $graph = new Graph(); - $graph->createGraph($text); - - $score = new Score(); - $scores = $score->calculate($graph, $text); - - $summarize = new Summarize(); - - return $summarize->getSummarize( - $scores, - $graph, - $text, - 10, - 3, - Summarize::GET_ALL_IMPORTANT - ); - } - - /** - * Summarized Text - * - * It finds the most important sentence from a text by the most important - * keywords and these keywords also found by automatically. It retrieves - * the most important sentence and its following sentences. - * - * @param string $rawText A single raw text. - * - * @return array An array from sentences. - */ - public function summarizeTextBasic(string $rawText): array - { - $parser = new Parser(); - $parser->setMinimumWordLength(3); - $parser->setRawText($rawText); - - if ($this->stopWords) { - $parser->setStopWords($this->stopWords); - } - - $text = $parser->parse(); - - $graph = new Graph(); - $graph->createGraph($text); - - $score = new Score(); - $scores = $score->calculate($graph, $text); - - $summarize = new Summarize(); - - return $summarize->getSummarize( - $scores, - $graph, - $text, - 10, - 3, - Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS - ); - } - - /** - * Freely Summarized Text. - * - * It retrieves the most important sentences from a text by the most important - * keywords and these keywords also found by automatically. - * - * @param string $rawText A single raw text. - * @param int $analyzedKeyWords Maximum number of the most important - * Key Words to analyze the text. - * @param int $expectedSentences How many sentence should be retrieved. - * @param int $summarizeType Highlights from the text or a part of - * the text. - * - * @return array An array from sentences. - */ - public function summarizeTextFreely( - string $rawText, - int $analyzedKeyWords, - int $expectedSentences, - int $summarizeType - ): array { - $parser = new Parser(); - $parser->setMinimumWordLength(3); - $parser->setRawText($rawText); - - if ($this->stopWords) { - $parser->setStopWords($this->stopWords); - } - - $text = $parser->parse(); - - $graph = new Graph(); - $graph->createGraph($text); - - $score = new Score(); - $scores = $score->calculate($graph, $text); - - $summarize = new Summarize(); - - return $summarize->getSummarize( - $scores, - $graph, - $text, - $analyzedKeyWords, - $expectedSentences, - $summarizeType - ); - } -} diff --git a/src/Tool/Graph.php b/src/Tool/Graph.php deleted file mode 100644 index 738cde0..0000000 --- a/src/Tool/Graph.php +++ /dev/null @@ -1,98 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool; - -/** - * Class Graph - * - * This graph store the sentences and their words with the indexes. This graph - * is the full map of the whole text. - * - * @package PhpScience\TextRank\Tool - */ -class Graph -{ - /** - * Key is the word, value is an array with the sentence IDs. - * - * @var array - */ - protected $graph = []; - - /** - * Create Graph. - * - * It creates a graph and save it into the graph property. - * - * @param Text $text Text object contains the parsed and prepared text - * data. - */ - public function createGraph(Text &$text) - { - $wordMatrix = $text->getWordMatrix(); - - foreach ($wordMatrix as $sentenceIdx => $words) { - $idxArray = array_keys($words); - - foreach ($idxArray as $idxKey => $idxValue) { - $connections = []; - - if (isset($idxArray[$idxKey - 1])) { - $connections[] = $idxArray[$idxKey - 1]; - } - - if (isset($idxArray[$idxKey + 1])) { - $connections[] = $idxArray[$idxKey + 1]; - } - - $this->graph[$words[$idxValue]][$sentenceIdx][$idxValue] = $connections; - } - } - } - - /** - * Graph. - * - * It retrieves the graph. Key is the word, value is an array with the - * sentence IDs. - * - * - * array( - * 'apple' => array( // word - * 2 => array( // ID of the sentence - * 52 => array( // ID of the word in the sentence - * 51, 53 // IDs of the closest words to the apple word - * ), - * 10 => array( // IDs of the closest words to the apple word - * 9, 11 // IDs of the closest words to the apple word - * ), - * 5 => array(6) - * ), - * 6 => array( - * 9 => array(8, 10) - * ), - * ), - * 'orange' => array( - * 1 => array( - * 30 => array(29, 31) - * ) - * ) - * ); - * - * - * @return array - */ - public function getGraph(): array - { - return $this->graph; - } -} diff --git a/src/Tool/Parser.php b/src/Tool/Parser.php deleted file mode 100644 index fdd1112..0000000 --- a/src/Tool/Parser.php +++ /dev/null @@ -1,227 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool; - -use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract; - -/** - * Class Parser - * - * This class purpose to parse a real text to sentences and array. - * - * @package PhpScience\TextRank\Tool - */ -class Parser -{ - /** - * The number of length of the smallest word. Words bellow it will be - * ignored. - * - * @var int - */ - protected $minimumWordLength = 0; - - /** - * A single text, article, book for example. - * - * @var string - */ - protected $rawText = ''; - - /** - * The array of the punctuations. The punctuation is the value. The key - * refers to the key of its sentence. - * - * @var array - */ - protected $marks = []; - - /** - * Stop Words to ignore. These words will not be keywords. - * - * @var StopWordsAbstract - */ - protected $stopWords; - - /** - * It sets the minimum word length. Words bellow it will be ignored. - * - * @param int $wordLength - */ - public function setMinimumWordLength(int $wordLength) - { - $this->minimumWordLength = $wordLength; - } - - /** - * It sets the raw text. - * - * @param string $rawText - */ - public function setRawText(string $rawText) - { - $this->rawText = $rawText; - } - - /** - * Set Stop Words. - * - * It sets the stop words to remove them from the found keywords. - * - * @param StopWordsAbstract $words Stop Words to ignore. These words will - * not be keywords. - */ - public function setStopWords(StopWordsAbstract $words) - { - $this->stopWords = $words; - } - - /** - * It retrieves the punctuations. - * - * @return array Array from punctuations where key is the index to link to - * the sentence and value is the punctuation. - */ - public function getMarks(): array - { - return $this->marks; - } - - /** - * Parse. - * - * It parses the text from the property and retrieves in Text object - * prepared to scoring and to searching. - * - * @return Text Parsed text prepared to scoring. - */ - public function parse(): Text - { - $matrix = []; - $sentences = $this->getSentences(); - - foreach ($sentences as $sentenceIdx => $sentence) { - $matrix[$sentenceIdx] = $this->getWords($sentence); - } - - $text = new Text(); - $text->setSentences($sentences); - $text->setWordMatrix($matrix); - $text->setMarks($this->marks); - - return $text; - } - - /** - * Sentences. - * - * It retrieves the sentences in array without junk data. - * - * @return array Array from sentences. - */ - protected function getSentences(): array - { - $sentences = $sentences = preg_split( - '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/', - $this->rawText, - -1, - PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE - ); - - return array_values( - array_filter( - array_map( - [$this, 'cleanSentence'], - $sentences - ) - ) - ); - } - - /** - * Possible Keywords. - * - * It retrieves an array of possible keywords without junk characters, - * spaces and stop words. - * - * @param string $subText It should be a sentence. - * - * @return array The array of the possible keywords. - */ - protected function getWords(string $subText): array - { - $words = preg_split( - '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/', - $subText, - -1, - PREG_SPLIT_NO_EMPTY - ); - - $words = array_values( - array_filter( - array_map( - [$this, 'cleanWord'], - $words - ) - ) - ); - - if ($this->stopWords) { - return array_filter($words, function($word) { - return !ctype_punct($word) - && strlen($word) > $this->minimumWordLength - && !$this->stopWords->exist($word); - }); - } else { - return array_filter($words, function($word) { - return !ctype_punct($word) - && strlen($word) > $this->minimumWordLength; - }); - } - } - - /** - * Clean Sentence. - * - * It clean the sentence. If it is a punctuation it will be stored in the - * property $marks. - * - * @param string $sentence A sentence as a string. - * - * @return string It is empty string when it's punctuation. Otherwise it's - * the trimmed sentence itself. - */ - protected function cleanSentence(string $sentence): string - { - if (strlen(trim($sentence)) == 1) { - $this->marks[] = trim($sentence); - return ''; - - } else { - return trim($sentence); - } - } - - /** - * Clean Word. - * - * It removes the junk spaces from the word and retrieves it. - * - * @param string $word - * - * @return string Cleaned word. - */ - protected function cleanWord(string $word): string - { - return mb_strtolower(trim($word)); - } -} diff --git a/src/Tool/Score.php b/src/Tool/Score.php deleted file mode 100644 index 26c790a..0000000 --- a/src/Tool/Score.php +++ /dev/null @@ -1,190 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool; - -/** - * Class Score - * - * It handles words and assigns weighted numbers to them. - * - * @package PhpScience\TextRank\Tool - */ -class Score -{ - /** - * The maximum connections by a word in the current text. - * - * @var int - */ - protected $maximumValue = 0; - - /** - * The minimum connection by a word in the current text. - * - * @var int - */ - protected $minimumValue = 0; - - /** - * Calculate Scores. - * - * It calculates the scores from word's connections and the connections' - * scores. It retrieves the scores in a form of a matrix where the key is - * the word and value is the score. The score is between 0 and 1. - * - * @param Graph $graph The graph of the text. - * @param Text $text Text object what stores all text data. - * - * @return array Key is the word and value is the float or int type score - * between 1 and 0. - */ - public function calculate(Graph $graph, Text &$text): array - { - $graphData = $graph->getGraph(); - $wordMatrix = $text->getWordMatrix(); - $wordConnections = $this->calculateConnectionNumbers($graphData); - $scores = $this->calculateScores( - $graphData, - $wordMatrix, - $wordConnections - ); - - return $this->normalizeAndSortScores($scores); - } - - /** - * Connection Numbers. - * - * It calculates the number of connections for each word and retrieves it - * in array where key is the word and value is the number of connections. - * - * @param array $graphData Graph data from a Graph type object. - * - * @return array Key is the word and value is the number of the connected - * words. - */ - protected function calculateConnectionNumbers(array &$graphData): array - { - $wordConnections = []; - - foreach ($graphData as $wordKey => $sentences) { - $connectionCount = 0; - - foreach ($sentences as $sentenceIdx => $wordInstances) { - foreach ($wordInstances as $connections) { - $connectionCount += count($connections); - } - } - - $wordConnections[$wordKey] = $connectionCount; - } - - return $wordConnections; - } - - /** - * Calculate Scores. - * - * It calculates the score of the words and retrieves it in array where key - * is the word and value is the score. The score depends on the number of - * the connections and the closest word's connection numbers. - * - * @param array $graphData Graph data from a Graph type object. - * @param array $wordMatrix Multidimensional array from integer keys - * and string values. - * @param array $wordConnections Key is the word and value is the number of - * the connected words. - * - * @return array Scores where key is the word and value is the score. - */ - protected function calculateScores( - array &$graphData, - array &$wordMatrix, - array &$wordConnections - ): array { - $scores = []; - - foreach ($graphData as $wordKey => $sentences) { - $value = 0; - - foreach ($sentences as $sentenceIdx => $wordInstances) { - foreach ($wordInstances as $connections) { - foreach ($connections as $wordIdx) { - $word = $wordMatrix[$sentenceIdx][$wordIdx]; - $value += $wordConnections[$word]; - } - } - } - - $scores[$wordKey] = $value; - - if ($value > $this->maximumValue) { - $this->maximumValue = $value; - } - - if ($value < $this->minimumValue || $this->minimumValue == 0) { - $this->minimumValue = $value; - } - } - - return $scores; - } - - /** - * Normalize and Sort Scores. - * - * It recalculates the scores by normalize the score numbers to between 0 - * and 1. - * - * @param array $scores Keywords with scores. Score is the key. - * - * @return array Keywords with normalized and ordered scores. - */ - protected function normalizeAndSortScores(array &$scores): array - { - foreach ($scores as $key => $value) { - $v = $this->normalize( - $value, - $this->minimumValue, - $this->maximumValue - ); - - $scores[$key] = $v; - } - - arsort($scores); - - return $scores; - } - - /** - * It normalizes a number. - * - * @param int $value Current weight. - * @param int $min Minimum weight. - * @param int $max Maximum weight. - * - * @return float|int Normalized weight aka score. - */ - protected function normalize(int $value, int $min, int $max): float - { - $divisor = $max - $min; - - if ($divisor == 0) { - return 0.0; - } - - $normalized = ($value - $min) / $divisor; - - return $normalized; - } -} diff --git a/src/Tool/StopWords/English.php b/src/Tool/StopWords/English.php deleted file mode 100644 index 09a0828..0000000 --- a/src/Tool/StopWords/English.php +++ /dev/null @@ -1,346 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool\StopWords; - -/** - * Class English - * - * @package PhpScience\TextRank\Tool\StopWords - */ -class English extends StopWordsAbstract -{ - /** - * Stop words for avoid dummy keywords for Language English. - * - * @var array - */ - protected $words = [ - 'a', - 'about', - 'above', - 'above', - 'across', - 'after', - 'afterwards', - 'again', - 'against', - 'all', - 'almost', - 'alone', - 'along', - 'already', - 'also', - 'although', - 'always', - 'am', - 'among', - 'amongst', - 'amoungst', - 'amount', - 'an', - 'and', - 'another', - 'any', - 'anyhow', - 'anyone', - 'anything', - 'anyway', - 'anywhere', - 'are', - 'around', - 'as', - 'at', - 'back', - 'be', - 'became', - 'because', - 'become', - 'becomes', - 'becoming', - 'been', - 'before', - 'beforehand', - 'behind', - 'being', - 'below', - 'beside', - 'besides', - 'between', - 'beyond', - 'bill', - 'both', - 'bottom', - 'but', - 'by', - 'call', - 'can', - 'cannot', - 'cant', - 'co', - 'con', - 'could', - 'couldnt', - 'cry', - 'de', - 'describe', - 'detail', - 'do', - 'done', - 'down', - 'due', - 'during', - 'each', - 'eg', - 'eight', - 'either', - 'eleven', - 'else', - 'elsewhere', - 'empty', - 'enough', - 'etc', - 'even', - 'ever', - 'every', - 'everyone', - 'everything', - 'everywhere', - 'except', - 'few', - 'fifteen', - 'fify', - 'fill', - 'find', - 'fire', - 'first', - 'five', - 'for', - 'former', - 'formerly', - 'forty', - 'found', - 'four', - 'from', - 'front', - 'full', - 'further', - 'get', - 'give', - 'go', - 'had', - 'has', - 'hasnt', - 'have', - 'he', - 'hence', - 'her', - 'here', - 'hereafter', - 'hereby', - 'herein', - 'hereupon', - 'hers', - 'herself', - 'him', - 'himself', - 'his', - 'how', - 'however', - 'hundred', - 'ie', - 'if', - 'in', - 'inc', - 'indeed', - 'interest', - 'into', - 'is', - 'it', - 'its', - 'itself', - 'keep', - 'last', - 'latter', - 'latterly', - 'least', - 'less', - 'ltd', - 'made', - 'many', - 'may', - 'me', - 'meanwhile', - 'might', - 'mill', - 'mine', - 'more', - 'moreover', - 'most', - 'mostly', - 'move', - 'much', - 'must', - 'my', - 'myself', - 'name', - 'namely', - 'neither', - 'never', - 'nevertheless', - 'next', - 'nine', - 'no', - 'nobody', - 'none', - 'noone', - 'nor', - 'not', - 'nothing', - 'now', - 'nowhere', - 'of', - 'off', - 'often', - 'on', - 'once', - 'one', - 'only', - 'onto', - 'or', - 'other', - 'others', - 'otherwise', - 'our', - 'ours', - 'ourselves', - 'out', - 'over', - 'own', - 'part', - 'per', - 'perhaps', - 'please', - 'put', - 'rather', - 're', - 'same', - 'see', - 'seem', - 'seemed', - 'seeming', - 'seems', - 'serious', - 'several', - 'she', - 'should', - 'show', - 'side', - 'since', - 'sincere', - 'six', - 'sixty', - 'so', - 'some', - 'somehow', - 'someone', - 'something', - 'sometime', - 'sometimes', - 'somewhere', - 'still', - 'such', - 'system', - 'take', - 'ten', - 'than', - 'that', - 'the', - 'their', - 'them', - 'themselves', - 'then', - 'thence', - 'there', - 'thereafter', - 'thereby', - 'therefore', - 'therein', - 'thereupon', - 'these', - 'they', - 'thickv', - 'thin', - 'third', - 'this', - 'those', - 'though', - 'three', - 'through', - 'throughout', - 'thru', - 'thus', - 'to', - 'together', - 'too', - 'top', - 'toward', - 'towards', - 'twelve', - 'twenty', - 'two', - 'un', - 'under', - 'until', - 'up', - 'upon', - 'us', - 'very', - 'via', - 'was', - 'we', - 'well', - 'were', - 'what', - 'whatever', - 'when', - 'whence', - 'whenever', - 'where', - 'whereafter', - 'whereas', - 'whereby', - 'wherein', - 'whereupon', - 'wherever', - 'whether', - 'which', - 'while', - 'whither', - 'who', - 'whoever', - 'whole', - 'whom', - 'whose', - 'why', - 'will', - 'with', - 'within', - 'without', - 'would', - 'yet', - 'you', - 'your', - 'yours', - 'yourself', - 'yourselves' - ]; -} diff --git a/src/Tool/StopWords/French.php b/src/Tool/StopWords/French.php deleted file mode 100644 index 0e67ad7..0000000 --- a/src/Tool/StopWords/French.php +++ /dev/null @@ -1,718 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool\StopWords; - -/** - * Class French - * - * @package PhpScience\TextRank\Tool\StopWords - */ -class French extends StopWordsAbstract -{ - /** - * Stop words for avoid dummy keywords for Language French. - * Source: https://github.com/stopwords-iso/stopwords-fr - * - * @var array - */ - protected $words = [ - 'a', - 'abord', - 'absolument', - 'afin', - 'ah', - 'ai', - 'aie', - 'aient', - 'aies', - 'ailleurs', - 'ainsi', - 'ait', - 'allaient', - 'allo', - 'allons', - 'allô', - 'alors', - 'anterieur', - 'anterieure', - 'anterieures', - 'apres', - 'après', - 'as', - 'assez', - 'attendu', - 'au', - 'aucun', - 'aucune', - 'aucuns', - 'aujourd', - 'aujourd\'hui', - 'aupres', - 'auquel', - 'aura', - 'aurai', - 'auraient', - 'aurais', - 'aurait', - 'auras', - 'aurez', - 'auriez', - 'aurions', - 'aurons', - 'auront', - 'aussi', - 'autre', - 'autrefois', - 'autrement', - 'autres', - 'autrui', - 'aux', - 'auxquelles', - 'auxquels', - 'avaient', - 'avais', - 'avait', - 'avant', - 'avec', - 'avez', - 'aviez', - 'avions', - 'avoir', - 'avons', - 'ayant', - 'ayez', - 'ayons', - 'b', - 'bah', - 'bas', - 'basee', - 'bat', - 'beau', - 'beaucoup', - 'bien', - 'bigre', - 'bon', - 'boum', - 'bravo', - 'brrr', - 'c', - 'car', - 'ce', - 'ceci', - 'cela', - 'celle', - 'celle-ci', - 'celle-là', - 'celles', - 'celles-ci', - 'celles-là', - 'celui', - 'celui-ci', - 'celui-là', - 'celà', - 'cent', - 'cependant', - 'certain', - 'certaine', - 'certaines', - 'certains', - 'certes', - 'ces', - 'cet', - 'cette', - 'ceux', - 'ceux-ci', - 'ceux-là', - 'chacun', - 'chacune', - 'chaque', - 'cher', - 'chers', - 'chez', - 'chiche', - 'chut', - 'chère', - 'chères', - 'ci', - 'cinq', - 'cinquantaine', - 'cinquante', - 'cinquantième', - 'cinquième', - 'clac', - 'clic', - 'combien', - 'comme', - 'comment', - 'comparable', - 'comparables', - 'compris', - 'concernant', - 'contre', - 'couic', - 'crac', - 'd', - 'da', - 'dans', - 'de', - 'debout', - 'dedans', - 'dehors', - 'deja', - 'delà', - 'depuis', - 'dernier', - 'derniere', - 'derriere', - 'derrière', - 'des', - 'desormais', - 'desquelles', - 'desquels', - 'dessous', - 'dessus', - 'deux', - 'deuxième', - 'deuxièmement', - 'devant', - 'devers', - 'devra', - 'devrait', - 'different', - 'differentes', - 'differents', - 'différent', - 'différente', - 'différentes', - 'différents', - 'dire', - 'directe', - 'directement', - 'dit', - 'dite', - 'dits', - 'divers', - 'diverse', - 'diverses', - 'dix', - 'dix-huit', - 'dix-neuf', - 'dix-sept', - 'dixième', - 'doit', - 'doivent', - 'donc', - 'dont', - 'dos', - 'douze', - 'douzième', - 'dring', - 'droite', - 'du', - 'duquel', - 'durant', - 'dès', - 'début', - 'désormais', - 'e', - 'effet', - 'egale', - 'egalement', - 'egales', - 'eh', - 'elle', - 'elle-même', - 'elles', - 'elles-mêmes', - 'en', - 'encore', - 'enfin', - 'entre', - 'envers', - 'environ', - 'es', - 'essai', - 'est', - 'et', - 'etant', - 'etc', - 'etre', - 'eu', - 'eue', - 'eues', - 'euh', - 'eurent', - 'eus', - 'eusse', - 'eussent', - 'eusses', - 'eussiez', - 'eussions', - 'eut', - 'eux', - 'eux-mêmes', - 'exactement', - 'excepté', - 'extenso', - 'exterieur', - 'eûmes', - 'eût', - 'eûtes', - 'f', - 'fais', - 'faisaient', - 'faisant', - 'fait', - 'faites', - 'façon', - 'feront', - 'fi', - 'flac', - 'floc', - 'fois', - 'font', - 'force', - 'furent', - 'fus', - 'fusse', - 'fussent', - 'fusses', - 'fussiez', - 'fussions', - 'fut', - 'fûmes', - 'fût', - 'fûtes', - 'g', - 'gens', - 'h', - 'ha', - 'haut', - 'hein', - 'hem', - 'hep', - 'hi', - 'ho', - 'holà', - 'hop', - 'hormis', - 'hors', - 'hou', - 'houp', - 'hue', - 'hui', - 'huit', - 'huitième', - 'hum', - 'hurrah', - 'hé', - 'hélas', - 'i', - 'ici', - 'il', - 'ils', - 'importe', - 'j', - 'je', - 'jusqu', - 'jusque', - 'juste', - 'k', - 'l', - 'la', - 'laisser', - 'laquelle', - 'las', - 'le', - 'lequel', - 'les', - 'lesquelles', - 'lesquels', - 'leur', - 'leurs', - 'longtemps', - 'lors', - 'lorsque', - 'lui', - 'lui-meme', - 'lui-même', - 'là', - 'lès', - 'm', - 'ma', - 'maint', - 'maintenant', - 'mais', - 'malgre', - 'malgré', - 'maximale', - 'me', - 'meme', - 'memes', - 'merci', - 'mes', - 'mien', - 'mienne', - 'miennes', - 'miens', - 'mille', - 'mince', - 'mine', - 'minimale', - 'moi', - 'moi-meme', - 'moi-même', - 'moindres', - 'moins', - 'mon', - 'mot', - 'moyennant', - 'multiple', - 'multiples', - 'même', - 'mêmes', - 'n', - 'na', - 'naturel', - 'naturelle', - 'naturelles', - 'ne', - 'neanmoins', - 'necessaire', - 'necessairement', - 'neuf', - 'neuvième', - 'ni', - 'nombreuses', - 'nombreux', - 'nommés', - 'non', - 'nos', - 'notamment', - 'notre', - 'nous', - 'nous-mêmes', - 'nouveau', - 'nouveaux', - 'nul', - 'néanmoins', - 'nôtre', - 'nôtres', - 'o', - 'oh', - 'ohé', - 'ollé', - 'olé', - 'on', - 'ont', - 'onze', - 'onzième', - 'ore', - 'ou', - 'ouf', - 'ouias', - 'oust', - 'ouste', - 'outre', - 'ouvert', - 'ouverte', - 'ouverts', - 'o|', - 'où', - 'p', - 'paf', - 'pan', - 'par', - 'parce', - 'parfois', - 'parle', - 'parlent', - 'parler', - 'parmi', - 'parole', - 'parseme', - 'partant', - 'particulier', - 'particulière', - 'particulièrement', - 'pas', - 'passé', - 'pendant', - 'pense', - 'permet', - 'personne', - 'personnes', - 'peu', - 'peut', - 'peuvent', - 'peux', - 'pff', - 'pfft', - 'pfut', - 'pif', - 'pire', - 'pièce', - 'plein', - 'plouf', - 'plupart', - 'plus', - 'plusieurs', - 'plutôt', - 'possessif', - 'possessifs', - 'possible', - 'possibles', - 'pouah', - 'pour', - 'pourquoi', - 'pourrais', - 'pourrait', - 'pouvait', - 'prealable', - 'precisement', - 'premier', - 'première', - 'premièrement', - 'pres', - 'probable', - 'probante', - 'procedant', - 'proche', - 'près', - 'psitt', - 'pu', - 'puis', - 'puisque', - 'pur', - 'pure', - 'q', - 'qu', - 'quand', - 'quant', - 'quant-à-soi', - 'quanta', - 'quarante', - 'quatorze', - 'quatre', - 'quatre-vingt', - 'quatrième', - 'quatrièmement', - 'que', - 'quel', - 'quelconque', - 'quelle', - 'quelles', - 'quelqu\'un', - 'quelque', - 'quelques', - 'quels', - 'qui', - 'quiconque', - 'quinze', - 'quoi', - 'quoique', - 'r', - 'rare', - 'rarement', - 'rares', - 'relative', - 'relativement', - 'remarquable', - 'rend', - 'rendre', - 'restant', - 'reste', - 'restent', - 'restrictif', - 'retour', - 'revoici', - 'revoilà', - 'rien', - 's', - 'sa', - 'sacrebleu', - 'sait', - 'sans', - 'sapristi', - 'sauf', - 'se', - 'sein', - 'seize', - 'selon', - 'semblable', - 'semblaient', - 'semble', - 'semblent', - 'sent', - 'sept', - 'septième', - 'sera', - 'serai', - 'seraient', - 'serais', - 'serait', - 'seras', - 'serez', - 'seriez', - 'serions', - 'serons', - 'seront', - 'ses', - 'seul', - 'seule', - 'seulement', - 'si', - 'sien', - 'sienne', - 'siennes', - 'siens', - 'sinon', - 'six', - 'sixième', - 'soi', - 'soi-même', - 'soient', - 'sois', - 'soit', - 'soixante', - 'sommes', - 'son', - 'sont', - 'sous', - 'souvent', - 'soyez', - 'soyons', - 'specifique', - 'specifiques', - 'speculatif', - 'stop', - 'strictement', - 'subtiles', - 'suffisant', - 'suffisante', - 'suffit', - 'suis', - 'suit', - 'suivant', - 'suivante', - 'suivantes', - 'suivants', - 'suivre', - 'sujet', - 'superpose', - 'sur', - 'surtout', - 't', - 'ta', - 'tac', - 'tandis', - 'tant', - 'tardive', - 'te', - 'tel', - 'telle', - 'tellement', - 'telles', - 'tels', - 'tenant', - 'tend', - 'tenir', - 'tente', - 'tes', - 'tic', - 'tien', - 'tienne', - 'tiennes', - 'tiens', - 'toc', - 'toi', - 'toi-même', - 'ton', - 'touchant', - 'toujours', - 'tous', - 'tout', - 'toute', - 'toutefois', - 'toutes', - 'treize', - 'trente', - 'tres', - 'trois', - 'troisième', - 'troisièmement', - 'trop', - 'très', - 'tsoin', - 'tsouin', - 'tu', - 'té', - 'u', - 'un', - 'une', - 'unes', - 'uniformement', - 'unique', - 'uniques', - 'uns', - 'v', - 'va', - 'vais', - 'valeur', - 'vas', - 'vers', - 'via', - 'vif', - 'vifs', - 'vingt', - 'vivat', - 'vive', - 'vives', - 'vlan', - 'voici', - 'voie', - 'voient', - 'voilà', - 'vont', - 'vos', - 'votre', - 'vous', - 'vous-mêmes', - 'vu', - 'vé', - 'vôtre', - 'vôtres', - 'w', - 'x', - 'y', - 'z', - 'zut', - 'à', - 'â', - 'ça', - 'ès', - 'étaient', - 'étais', - 'était', - 'étant', - 'état', - 'étiez', - 'étions', - 'été', - 'étée', - 'étées', - 'étés', - 'êtes', - 'être', - 'ô' - ]; -} diff --git a/src/Tool/StopWords/German.php b/src/Tool/StopWords/German.php deleted file mode 100644 index 6faf7b0..0000000 --- a/src/Tool/StopWords/German.php +++ /dev/null @@ -1,625 +0,0 @@ - - */ -declare(strict_types=1); -namespace PhpScience\TextRank\Tool\StopWords; -/** - * Class German - * - * @package PhpScience\TextRank\Tool\StopWords - */ -class German extends StopWordsAbstract -{ - /** - * Stop words for avoid dummy keywords for Language German. - * Word list created by Marco Götze, Steffen Geyer. - * Source: https://solariz.de/de/downloads/6/german-enhanced-stopwords.htm - * - * @var array - */ - protected $words = [ - 'ab', - 'aber', - 'alle', - 'allein', - 'allem', - 'allen', - 'aller', - 'allerdings', - 'allerlei', - 'alles', - 'allmählich', - 'allzu', - 'als', - 'alsbald', - 'also', - 'am', - 'an', - 'and', - 'ander', - 'andere', - 'anderem', - 'anderen', - 'anderer', - 'andererseits', - 'anderes', - 'anderm', - 'andern', - 'andernfalls', - 'anders', - 'anstatt', - 'auch', - 'auf', - 'aus', - 'ausgenommen', - 'ausser', - 'ausserdem', - 'außer', - 'außerdem', - 'außerhalb', - 'bald', - 'bei', - 'beide', - 'beiden', - 'beiderlei', - 'beides', - 'beim', - 'beinahe', - 'bereits', - 'besonders', - 'besser', - 'beträchtlich', - 'bevor', - 'bezüglich', - 'bin', - 'bis', - 'bisher', - 'bislang', - 'bist', - 'bloß', - 'bsp.', - 'bzw', - 'ca', - 'ca.', - 'content', - 'da', - 'dabei', - 'dadurch', - 'dafür', - 'dagegen', - 'daher', - 'dahin', - 'damals', - 'damit', - 'danach', - 'daneben', - 'dann', - 'daran', - 'darauf', - 'daraus', - 'darin', - 'darum', - 'darunter', - 'darüber', - 'darüberhinaus', - 'das', - 'dass', - 'dasselbe', - 'davon', - 'davor', - 'dazu', - 'daß', - 'dein', - 'deine', - 'deinem', - 'deinen', - 'deiner', - 'deines', - 'dem', - 'demnach', - 'demselben', - 'den', - 'denen', - 'denn', - 'dennoch', - 'denselben', - 'der', - 'derart', - 'derartig', - 'derem', - 'deren', - 'derer', - 'derjenige', - 'derjenigen', - 'derselbe', - 'derselben', - 'derzeit', - 'des', - 'deshalb', - 'desselben', - 'dessen', - 'desto', - 'deswegen', - 'dich', - 'die', - 'diejenige', - 'dies', - 'diese', - 'dieselbe', - 'dieselben', - 'diesem', - 'diesen', - 'dieser', - 'dieses', - 'diesseits', - 'dir', - 'direkt', - 'direkte', - 'direkten', - 'direkter', - 'doch', - 'dort', - 'dorther', - 'dorthin', - 'drauf', - 'drin', - 'drunter', - 'drüber', - 'du', - 'dunklen', - 'durch', - 'durchaus', - 'eben', - 'ebenfalls', - 'ebenso', - 'eher', - 'eigenen', - 'eigenes', - 'eigentlich', - 'ein', - 'eine', - 'einem', - 'einen', - 'einer', - 'einerseits', - 'eines', - 'einfach', - 'einführen', - 'einführte', - 'einführten', - 'eingesetzt', - 'einig', - 'einige', - 'einigem', - 'einigen', - 'einiger', - 'einigermaßen', - 'einiges', - 'einmal', - 'eins', - 'einseitig', - 'einseitige', - 'einseitigen', - 'einseitiger', - 'einst', - 'einstmals', - 'einzig', - 'entsprechend', - 'entweder', - 'er', - 'erst', - 'es', - 'etc', - 'etliche', - 'etwa', - 'etwas', - 'euch', - 'euer', - 'eure', - 'eurem', - 'euren', - 'eurer', - 'eures', - 'falls', - 'fast', - 'ferner', - 'folgende', - 'folgenden', - 'folgender', - 'folgendes', - 'folglich', - 'fuer', - 'für', - 'gab', - 'ganze', - 'ganzem', - 'ganzen', - 'ganzer', - 'ganzes', - 'gar', - 'gegen', - 'gemäss', - 'ggf', - 'gleich', - 'gleichwohl', - 'gleichzeitig', - 'glücklicherweise', - 'gänzlich', - 'hab', - 'habe', - 'haben', - 'haette', - 'hast', - 'hat', - 'hatte', - 'hatten', - 'hattest', - 'hattet', - 'heraus', - 'herein', - 'hier', - 'hier', - 'hinter', - 'hiermit', - 'hiesige', - 'hin', - 'hinein', - 'hinten', - 'hinter', - 'hinterher', - 'http', - 'hätt', - 'hätte', - 'hätten', - 'höchstens', - 'ich', - 'igitt', - 'ihm', - 'ihn', - 'ihnen', - 'ihr', - 'ihre', - 'ihrem', - 'ihren', - 'ihrer', - 'ihres', - 'im', - 'immer', - 'immerhin', - 'in', - 'indem', - 'indessen', - 'infolge', - 'innen', - 'innerhalb', - 'ins', - 'insofern', - 'inzwischen', - 'irgend', - 'irgendeine', - 'irgendwas', - 'irgendwen', - 'irgendwer', - 'irgendwie', - 'irgendwo', - 'ist', - 'ja', - 'je', - 'jed', - 'jede', - 'jedem', - 'jeden', - 'jedenfalls', - 'jeder', - 'jederlei', - 'jedes', - 'jedoch', - 'jemand', - 'jene', - 'jenem', - 'jenen', - 'jener', - 'jenes', - 'jenseits', - 'jetzt', - 'jährig', - 'jährige', - 'jährigen', - 'jähriges', - 'kam', - 'kann', - 'kannst', - 'kaum', - 'kein', - 'keine', - 'keinem', - 'keinen', - 'keiner', - 'keinerlei', - 'keines', - 'keineswegs', - 'klar', - 'klare', - 'klaren', - 'klares', - 'klein', - 'kleinen', - 'kleiner', - 'kleines', - 'koennen', - 'koennt', - 'koennte', - 'koennten', - 'komme', - 'kommen', - 'kommt', - 'konkret', - 'konkrete', - 'konkreten', - 'konkreter', - 'konkretes', - 'können', - 'könnt', - 'künftig', - 'leider', - 'machen', - 'man', - 'manche', - 'manchem', - 'manchen', - 'mancher', - 'mancherorts', - 'manches', - 'manchmal', - 'mehr', - 'mehrere', - 'mein', - 'meine', - 'meinem', - 'meinen', - 'meiner', - 'meines', - 'mich', - 'mir', - 'mit', - 'mithin', - 'muessen', - 'muesst', - 'muesste', - 'muss', - 'musst', - 'musste', - 'mussten', - 'muß', - 'mußt', - 'müssen', - 'müsste', - 'müssten', - 'müßt', - 'müßte', - 'nach', - 'nachdem', - 'nachher', - 'nachhinein', - 'nahm', - 'natürlich', - 'neben', - 'nebenan', - 'nehmen', - 'nein', - 'nicht', - 'nichts', - 'nie', - 'niemals', - 'niemand', - 'nirgends', - 'nirgendwo', - 'noch', - 'nun', - 'nur', - 'nächste', - 'nämlich', - 'nötigenfalls', - 'ob', - 'oben', - 'oberhalb', - 'obgleich', - 'obschon', - 'obwohl', - 'oder', - 'oft', - 'per', - 'plötzlich', - 'schließlich', - 'schon', - 'sehr', - 'sehrwohl', - 'seid', - 'sein', - 'seine', - 'seinem', - 'seinen', - 'seiner', - 'seines', - 'seit', - 'seitdem', - 'seither', - 'selber', - 'selbst', - 'sich', - 'sicher', - 'sicherlich', - 'sie', - 'sind', - 'so', - 'sobald', - 'sodass', - 'sodaß', - 'soeben', - 'sofern', - 'sofort', - 'sogar', - 'solange', - 'solch', - 'solche', - 'solchem', - 'solchen', - 'solcher', - 'solches', - 'soll', - 'sollen', - 'sollst', - 'sollt', - 'sollte', - 'sollten', - 'solltest', - 'somit', - 'sondern', - 'sonst', - 'sonstwo', - 'sooft', - 'soviel', - 'soweit', - 'sowie', - 'sowohl', - 'tatsächlich', - 'tatsächlichen', - 'tatsächlicher', - 'tatsächliches', - 'trotzdem', - 'ueber', - 'um', - 'umso', - 'unbedingt', - 'und', - 'unmöglich', - 'unmögliche', - 'unmöglichen', - 'unmöglicher', - 'uns', - 'unser', - 'unser', - 'unsere', - 'unsere', - 'unserem', - 'unseren', - 'unserer', - 'unseres', - 'unter', - 'usw', - 'viel', - 'viele', - 'vielen', - 'vieler', - 'vieles', - 'vielleicht', - 'vielmals', - 'vom', - 'von', - 'vor', - 'voran', - 'vorher', - 'vorüber', - 'völlig', - 'wann', - 'war', - 'waren', - 'warst', - 'warum', - 'was', - 'weder', - 'weil', - 'weiter', - 'weitere', - 'weiterem', - 'weiteren', - 'weiterer', - 'weiteres', - 'weiterhin', - 'weiß', - 'welche', - 'welchem', - 'welchen', - 'welcher', - 'welches', - 'wem', - 'wen', - 'wenig', - 'wenige', - 'weniger', - 'wenigstens', - 'wenn', - 'wenngleich', - 'wer', - 'werde', - 'werden', - 'werdet', - 'weshalb', - 'wessen', - 'wichtig', - 'wie', - 'wieder', - 'wieso', - 'wieviel', - 'wiewohl', - 'will', - 'willst', - 'wir', - 'wird', - 'wirklich', - 'wirst', - 'wo', - 'wodurch', - 'wogegen', - 'woher', - 'wohin', - 'wohingegen', - 'wohl', - 'wohlweislich', - 'womit', - 'woraufhin', - 'woraus', - 'worin', - 'wurde', - 'wurden', - 'während', - 'währenddessen', - 'wär', - 'wäre', - 'wären', - 'würde', - 'würden', - 'z.B.', - 'zB', - 'zahlreich', - 'zeitweise', - 'zu', - 'zudem', - 'zuerst', - 'zufolge', - 'zugleich', - 'zuletzt', - 'zum', - 'zumal', - 'zur', - 'zurück', - 'zusammen', - 'zuviel', - 'zwar', - 'zwischen', - 'ähnlich', - 'übel', - 'über', - 'überall', - 'überallhin', - 'überdies', - 'übermorgen', - 'übrig', - 'übrigens' - ]; -} \ No newline at end of file diff --git a/src/Tool/StopWords/Italian.php b/src/Tool/StopWords/Italian.php deleted file mode 100644 index 6aa3093..0000000 --- a/src/Tool/StopWords/Italian.php +++ /dev/null @@ -1,681 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool\StopWords; - -/** - * Class Norwegian - * - * @package PhpScience\TextRank\Tool\StopWords - */ -class Norwegian extends StopWordsAbstract -{ - /** - * Stop words for avoid dummy keywords for Language Norwegian. - * Source: https://github.com/stopwords-iso/stopwords-no - * - * @var array - */ - protected $words = [ - 'alle', - 'andre', - 'arbeid', - 'at', - 'av', - 'bare', - 'begge', - 'ble', - 'blei', - 'bli', - 'blir', - 'blitt', - 'bort', - 'bra', - 'bruke', - 'både', - 'båe', - 'da', - 'de', - 'deg', - 'dei', - 'deim', - 'deira', - 'deires', - 'dem', - 'den', - 'denne', - 'der', - 'dere', - 'deres', - 'det', - 'dette', - 'di', - 'din', - 'disse', - 'ditt', - 'du', - 'dykk', - 'dykkar', - 'då', - 'eg', - 'ein', - 'eit', - 'eitt', - 'eller', - 'elles', - 'en', - 'ene', - 'eneste', - 'enhver', - 'enn', - 'er', - 'et', - 'ett', - 'etter', - 'folk', - 'for', - 'fordi', - 'forsûke', - 'fra', - 'få', - 'før', - 'fûr', - 'fûrst', - 'gjorde', - 'gjûre', - 'god', - 'gå', - 'ha', - 'hadde', - 'han', - 'hans', - 'har', - 'hennar', - 'henne', - 'hennes', - 'her', - 'hjå', - 'ho', - 'hoe', - 'honom', - 'hoss', - 'hossen', - 'hun', - 'hva', - 'hvem', - 'hver', - 'hvilke', - 'hvilken', - 'hvis', - 'hvor', - 'hvordan', - 'hvorfor', - 'i', - 'ikke', - 'ikkje', - 'ingen', - 'ingi', - 'inkje', - 'inn', - 'innen', - 'inni', - 'ja', - 'jeg', - 'kan', - 'kom', - 'korleis', - 'korso', - 'kun', - 'kunne', - 'kva', - 'kvar', - 'kvarhelst', - 'kven', - 'kvi', - 'kvifor', - 'lage', - 'lang', - 'lik', - 'like', - 'makt', - 'man', - 'mange', - 'me', - 'med', - 'medan', - 'meg', - 'meget', - 'mellom', - 'men', - 'mens', - 'mer', - 'mest', - 'mi', - 'min', - 'mine', - 'mitt', - 'mot', - 'mye', - 'mykje', - 'må', - 'måte', - 'navn', - 'ned', - 'nei', - 'no', - 'noe', - 'noen', - 'noka', - 'noko', - 'nokon', - 'nokor', - 'nokre', - 'ny', - 'nå', - 'når', - 'og', - 'også', - 'om', - 'opp', - 'oss', - 'over', - 'part', - 'punkt', - 'på', - 'rett', - 'riktig', - 'samme', - 'sant', - 'seg', - 'selv', - 'si', - 'sia', - 'sidan', - 'siden', - 'sin', - 'sine', - 'sist', - 'sitt', - 'sjøl', - 'skal', - 'skulle', - 'slik', - 'slutt', - 'so', - 'som', - 'somme', - 'somt', - 'start', - 'stille', - 'så', - 'sånn', - 'tid', - 'til', - 'tilbake', - 'tilstand', - 'um', - 'under', - 'upp', - 'ut', - 'uten', - 'var', - 'vart', - 'varte', - 'ved', - 'verdi', - 'vere', - 'verte', - 'vi', - 'vil', - 'ville', - 'vite', - 'vore', - 'vors', - 'vort', - 'vår', - 'være', - 'vært', - 'vöre', - 'vört', - 'å' - ]; -} diff --git a/src/Tool/StopWords/Russian.php b/src/Tool/StopWords/Russian.php deleted file mode 100644 index 82edf0f..0000000 --- a/src/Tool/StopWords/Russian.php +++ /dev/null @@ -1,588 +0,0 @@ - - * @author Andrey Astashov (Russian StopWords) - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool\StopWords; - -/** - * Class Russian - * - * @package PhpScience\TextRank\Tool\StopWords - */ -class Russian extends StopWordsAbstract -{ - /** - * Stop words for avoid dummy keywords for Language Russian. - * - * @var array - */ - protected $words = [ - 'c', - 'а', - 'алло', - 'без', - 'белый', - 'близко', - 'более', - 'больше', - 'большой', - 'будем', - 'будет', - 'будете', - 'будешь', - 'будто', - 'буду', - 'будут', - 'будь', - 'бы', - 'бывает', - 'бывь', - 'был', - 'была', - 'были', - 'было', - 'быть', - 'в', - 'важная', - 'важное', - 'важные', - 'важный', - 'вам', - 'вами', - 'вас', - 'ваш', - 'ваша', - 'ваше', - 'ваши', - 'вверх', - 'вдали', - 'вдруг', - 'ведь', - 'везде', - 'вернуться', - 'весь', - 'вечер', - 'взгляд', - 'взять', - 'вид', - 'видел', - 'видеть', - 'вместе', - 'вне', - 'вниз', - 'внизу', - 'во', - 'вода', - 'война', - 'вокруг', - 'вон', - 'вообще', - 'вопрос', - 'восемнадцатый', - 'восемнадцать', - 'восемь', - 'восьмой', - 'вот', - 'впрочем', - 'времени', - 'время', - 'все', - 'все еще', - 'всегда', - 'всего', - 'всем', - 'всеми', - 'всему', - 'всех', - 'всею', - 'всю', - 'всюду', - 'вся', - 'всё', - 'второй', - 'вы', - 'выйти', - 'г', - 'где', - 'главный', - 'глаз', - 'говорил', - 'говорит', - 'говорить', - 'год', - 'года', - 'году', - 'голова', - 'голос', - 'город', - 'да', - 'давать', - 'давно', - 'даже', - 'далекий', - 'далеко', - 'дальше', - 'даром', - 'дать', - 'два', - 'двадцатый', - 'двадцать', - 'две', - 'двенадцатый', - 'двенадцать', - 'дверь', - 'двух', - 'девятнадцатый', - 'девятнадцать', - 'девятый', - 'девять', - 'действительно', - 'дел', - 'делал', - 'делать', - 'делаю', - 'дело', - 'день', - 'деньги', - 'десятый', - 'десять', - 'для', - 'до', - 'довольно', - 'долго', - 'должен', - 'должно', - 'должный', - 'дом', - 'дорога', - 'друг', - 'другая', - 'другие', - 'других', - 'друго', - 'другое', - 'другой', - 'думать', - 'душа', - 'е', - 'его', - 'ее', - 'ей', - 'ему', - 'если', - 'есть', - 'еще', - 'ещё', - 'ею', - 'её', - 'ж', - 'ждать', - 'же', - 'жена', - 'женщина', - 'жизнь', - 'жить', - 'за', - 'занят', - 'занята', - 'занято', - 'заняты', - 'затем', - 'зато', - 'зачем', - 'здесь', - 'земля', - 'знать', - 'значит', - 'значить', - 'и', - 'иди', - 'идти', - 'из', - 'или', - 'им', - 'имеет', - 'имел', - 'именно', - 'иметь', - 'ими', - 'имя', - 'иногда', - 'их', - 'к', - 'каждая', - 'каждое', - 'каждые', - 'каждый', - 'кажется', - 'казаться', - 'как', - 'какая', - 'какой', - 'кем', - 'книга', - 'когда', - 'кого', - 'ком', - 'комната', - 'кому', - 'конец', - 'конечно', - 'которая', - 'которого', - 'которой', - 'которые', - 'который', - 'которых', - 'кроме', - 'кругом', - 'кто', - 'куда', - 'лежать', - 'лет', - 'ли', - 'лицо', - 'лишь', - 'лучше', - 'любить', - 'люди', - 'м', - 'маленький', - 'мало', - 'мать', - 'машина', - 'между', - 'меля', - 'менее', - 'меньше', - 'меня', - 'место', - 'миллионов', - 'мимо', - 'минута', - 'мир', - 'мира', - 'мне', - 'много', - 'многочисленная', - 'многочисленное', - 'многочисленные', - 'многочисленный', - 'мной', - 'мною', - 'мог', - 'могу', - 'могут', - 'мож', - 'может', - 'может быть', - 'можно', - 'можхо', - 'мои', - 'мой', - 'мор', - 'москва', - 'мочь', - 'моя', - 'моё', - 'мы', - 'на', - 'наверху', - 'над', - 'надо', - 'назад', - 'наиболее', - 'найти', - 'наконец', - 'нам', - 'нами', - 'народ', - 'нас', - 'начала', - 'начать', - 'наш', - 'наша', - 'наше', - 'наши', - 'не', - 'него', - 'недавно', - 'недалеко', - 'нее', - 'ней', - 'некоторый', - 'нельзя', - 'нем', - 'немного', - 'нему', - 'непрерывно', - 'нередко', - 'несколько', - 'нет', - 'нею', - 'неё', - 'ни', - 'нибудь', - 'ниже', - 'низко', - 'никакой', - 'никогда', - 'никто', - 'никуда', - 'ним', - 'ними', - 'них', - 'ничего', - 'ничто', - 'но', - 'новый', - 'нога', - 'ночь', - 'ну', - 'нужно', - 'нужный', - 'нх', - 'о', - 'об', - 'оба', - 'обычно', - 'один', - 'одиннадцатый', - 'одиннадцать', - 'однажды', - 'однако', - 'одного', - 'одной', - 'оказаться', - 'окно', - 'около', - 'он', - 'она', - 'они', - 'оно', - 'опять', - 'особенно', - 'остаться', - 'от', - 'ответить', - 'отец', - 'откуда', - 'отовсюду', - 'отсюда', - 'очень', - 'первый', - 'перед', - 'писать', - 'плечо', - 'по', - 'под', - 'подойди', - 'подумать', - 'пожалуйста', - 'позже', - 'пойти', - 'пока', - 'пол', - 'получить', - 'помнить', - 'понимать', - 'понять', - 'пор', - 'пора', - 'после', - 'последний', - 'посмотреть', - 'посреди', - 'потом', - 'потому', - 'почему', - 'почти', - 'правда', - 'прекрасно', - 'при', - 'про', - 'просто', - 'против', - 'процентов', - 'путь', - 'пятнадцатый', - 'пятнадцать', - 'пятый', - 'пять', - 'работа', - 'работать', - 'раз', - 'разве', - 'рано', - 'раньше', - 'ребенок', - 'решить', - 'россия', - 'рука', - 'русский', - 'ряд', - 'рядом', - 'с', - 'с кем', - 'сам', - 'сама', - 'сами', - 'самим', - 'самими', - 'самих', - 'само', - 'самого', - 'самой', - 'самом', - 'самому', - 'саму', - 'самый', - 'свет', - 'свое', - 'своего', - 'своей', - 'свои', - 'своих', - 'свой', - 'свою', - 'сделать', - 'сеаой', - 'себе', - 'себя', - 'сегодня', - 'седьмой', - 'сейчас', - 'семнадцатый', - 'семнадцать', - 'семь', - 'сидеть', - 'сила', - 'сих', - 'сказал', - 'сказала', - 'сказать', - 'сколько', - 'слишком', - 'слово', - 'случай', - 'смотреть', - 'сначала', - 'снова', - 'со', - 'собой', - 'собою', - 'советский', - 'совсем', - 'спасибо', - 'спросить', - 'сразу', - 'стал', - 'старый', - 'стать', - 'стол', - 'сторона', - 'стоять', - 'страна', - 'суть', - 'считать', - 'т', - 'та', - 'так', - 'такая', - 'также', - 'таки', - 'такие', - 'такое', - 'такой', - 'там', - 'твои', - 'твой', - 'твоя', - 'твоё', - 'те', - 'тебе', - 'тебя', - 'тем', - 'теми', - 'теперь', - 'тех', - 'то', - 'тобой', - 'тобою', - 'товарищ', - 'тогда', - 'того', - 'тоже', - 'только', - 'том', - 'тому', - 'тот', - 'тою', - 'третий', - 'три', - 'тринадцатый', - 'тринадцать', - 'ту', - 'туда', - 'тут', - 'ты', - 'тысяч', - 'у', - 'увидеть', - 'уж', - 'уже', - 'улица', - 'уметь', - 'утро', - 'хороший', - 'хорошо', - 'хотел бы', - 'хотеть', - 'хоть', - 'хотя', - 'хочешь', - 'час', - 'часто', - 'часть', - 'чаще', - 'чего', - 'человек', - 'чем', - 'чему', - 'через', - 'четвертый', - 'четыре', - 'четырнадцатый', - 'четырнадцать', - 'что', - 'чтоб', - 'чтобы', - 'чуть', - 'шестнадцатый', - 'шестнадцать', - 'шестой', - 'шесть', - 'эта', - 'эти', - 'этим', - 'этими', - 'этих', - 'это', - 'этого', - 'этой', - 'этом', - 'этому', - 'этот', - 'эту', - 'я', - 'являюсь' - ]; -} diff --git a/src/Tool/StopWords/Spanish.php b/src/Tool/StopWords/Spanish.php deleted file mode 100644 index f7cdffd..0000000 --- a/src/Tool/StopWords/Spanish.php +++ /dev/null @@ -1,750 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool\StopWords; - -/** - * Class Spanish - * - * @package PhpScience\TextRank\Tool\StopWords - */ -class Spanish extends StopWordsAbstract -{ - /** - * Stop words for avoid dummy keywords for Language Spanish. - * Source: https://github.com/stopwords-iso/stopwords-es - * - * @var array - */ - protected $words = [ - 'a', - 'actualmente', - 'acuerdo', - 'adelante', - 'ademas', - 'además', - 'adrede', - 'afirmó', - 'agregó', - 'ahi', - 'ahora', - 'ahí', - 'al', - 'algo', - 'alguna', - 'algunas', - 'alguno', - 'algunos', - 'algún', - 'alli', - 'allí', - 'alrededor', - 'ambos', - 'ampleamos', - 'antano', - 'antaño', - 'ante', - 'anterior', - 'antes', - 'apenas', - 'aproximadamente', - 'aquel', - 'aquella', - 'aquellas', - 'aquello', - 'aquellos', - 'aqui', - 'aquél', - 'aquélla', - 'aquéllas', - 'aquéllos', - 'aquí', - 'arriba', - 'arribaabajo', - 'aseguró', - 'asi', - 'así', - 'atras', - 'aun', - 'aunque', - 'ayer', - 'añadió', - 'aún', - 'b', - 'bajo', - 'bastante', - 'bien', - 'breve', - 'buen', - 'buena', - 'buenas', - 'bueno', - 'buenos', - 'c', - 'cada', - 'casi', - 'cerca', - 'cierta', - 'ciertas', - 'cierto', - 'ciertos', - 'cinco', - 'claro', - 'comentó', - 'como', - 'con', - 'conmigo', - 'conocer', - 'conseguimos', - 'conseguir', - 'considera', - 'consideró', - 'consigo', - 'consigue', - 'consiguen', - 'consigues', - 'contigo', - 'contra', - 'cosas', - 'creo', - 'cual', - 'cuales', - 'cualquier', - 'cuando', - 'cuanta', - 'cuantas', - 'cuanto', - 'cuantos', - 'cuatro', - 'cuenta', - 'cuál', - 'cuáles', - 'cuándo', - 'cuánta', - 'cuántas', - 'cuánto', - 'cuántos', - 'cómo', - 'd', - 'da', - 'dado', - 'dan', - 'dar', - 'de', - 'debajo', - 'debe', - 'deben', - 'debido', - 'decir', - 'dejó', - 'del', - 'delante', - 'demasiado', - 'demás', - 'dentro', - 'deprisa', - 'desde', - 'despacio', - 'despues', - 'después', - 'detras', - 'detrás', - 'dia', - 'dias', - 'dice', - 'dicen', - 'dicho', - 'dieron', - 'diferente', - 'diferentes', - 'dijeron', - 'dijo', - 'dio', - 'donde', - 'dos', - 'durante', - 'día', - 'días', - 'dónde', - 'e', - 'ejemplo', - 'el', - 'ella', - 'ellas', - 'ello', - 'ellos', - 'embargo', - 'empleais', - 'emplean', - 'emplear', - 'empleas', - 'empleo', - 'en', - 'encima', - 'encuentra', - 'enfrente', - 'enseguida', - 'entonces', - 'entre', - 'era', - 'erais', - 'eramos', - 'eran', - 'eras', - 'eres', - 'es', - 'esa', - 'esas', - 'ese', - 'eso', - 'esos', - 'esta', - 'estaba', - 'estabais', - 'estaban', - 'estabas', - 'estad', - 'estada', - 'estadas', - 'estado', - 'estados', - 'estais', - 'estamos', - 'estan', - 'estando', - 'estar', - 'estaremos', - 'estará', - 'estarán', - 'estarás', - 'estaré', - 'estaréis', - 'estaría', - 'estaríais', - 'estaríamos', - 'estarían', - 'estarías', - 'estas', - 'este', - 'estemos', - 'esto', - 'estos', - 'estoy', - 'estuve', - 'estuviera', - 'estuvierais', - 'estuvieran', - 'estuvieras', - 'estuvieron', - 'estuviese', - 'estuvieseis', - 'estuviesen', - 'estuvieses', - 'estuvimos', - 'estuviste', - 'estuvisteis', - 'estuviéramos', - 'estuviésemos', - 'estuvo', - 'está', - 'estábamos', - 'estáis', - 'están', - 'estás', - 'esté', - 'estéis', - 'estén', - 'estés', - 'ex', - 'excepto', - 'existe', - 'existen', - 'explicó', - 'expresó', - 'f', - 'fin', - 'final', - 'fue', - 'fuera', - 'fuerais', - 'fueran', - 'fueras', - 'fueron', - 'fuese', - 'fueseis', - 'fuesen', - 'fueses', - 'fui', - 'fuimos', - 'fuiste', - 'fuisteis', - 'fuéramos', - 'fuésemos', - 'g', - 'general', - 'gran', - 'grandes', - 'gueno', - 'h', - 'ha', - 'haber', - 'habia', - 'habida', - 'habidas', - 'habido', - 'habidos', - 'habiendo', - 'habla', - 'hablan', - 'habremos', - 'habrá', - 'habrán', - 'habrás', - 'habré', - 'habréis', - 'habría', - 'habríais', - 'habríamos', - 'habrían', - 'habrías', - 'habéis', - 'había', - 'habíais', - 'habíamos', - 'habían', - 'habías', - 'hace', - 'haceis', - 'hacemos', - 'hacen', - 'hacer', - 'hacerlo', - 'haces', - 'hacia', - 'haciendo', - 'hago', - 'han', - 'has', - 'hasta', - 'hay', - 'haya', - 'hayamos', - 'hayan', - 'hayas', - 'hayáis', - 'he', - 'hecho', - 'hemos', - 'hicieron', - 'hizo', - 'horas', - 'hoy', - 'hube', - 'hubiera', - 'hubierais', - 'hubieran', - 'hubieras', - 'hubieron', - 'hubiese', - 'hubieseis', - 'hubiesen', - 'hubieses', - 'hubimos', - 'hubiste', - 'hubisteis', - 'hubiéramos', - 'hubiésemos', - 'hubo', - 'i', - 'igual', - 'incluso', - 'indicó', - 'informo', - 'informó', - 'intenta', - 'intentais', - 'intentamos', - 'intentan', - 'intentar', - 'intentas', - 'intento', - 'ir', - 'j', - 'junto', - 'k', - 'l', - 'la', - 'lado', - 'largo', - 'las', - 'le', - 'lejos', - 'les', - 'llegó', - 'lleva', - 'llevar', - 'lo', - 'los', - 'luego', - 'lugar', - 'm', - 'mal', - 'manera', - 'manifestó', - 'mas', - 'mayor', - 'me', - 'mediante', - 'medio', - 'mejor', - 'mencionó', - 'menos', - 'menudo', - 'mi', - 'mia', - 'mias', - 'mientras', - 'mio', - 'mios', - 'mis', - 'misma', - 'mismas', - 'mismo', - 'mismos', - 'modo', - 'momento', - 'mucha', - 'muchas', - 'mucho', - 'muchos', - 'muy', - 'más', - 'mí', - 'mía', - 'mías', - 'mío', - 'míos', - 'n', - 'nada', - 'nadie', - 'ni', - 'ninguna', - 'ningunas', - 'ninguno', - 'ningunos', - 'ningún', - 'no', - 'nos', - 'nosotras', - 'nosotros', - 'nuestra', - 'nuestras', - 'nuestro', - 'nuestros', - 'nueva', - 'nuevas', - 'nuevo', - 'nuevos', - 'nunca', - 'o', - 'ocho', - 'os', - 'otra', - 'otras', - 'otro', - 'otros', - 'p', - 'pais', - 'para', - 'parece', - 'parte', - 'partir', - 'pasada', - 'pasado', - 'paìs', - 'peor', - 'pero', - 'pesar', - 'poca', - 'pocas', - 'poco', - 'pocos', - 'podeis', - 'podemos', - 'poder', - 'podria', - 'podriais', - 'podriamos', - 'podrian', - 'podrias', - 'podrá', - 'podrán', - 'podría', - 'podrían', - 'poner', - 'por', - 'por qué', - 'porque', - 'posible', - 'primer', - 'primera', - 'primero', - 'primeros', - 'principalmente', - 'pronto', - 'propia', - 'propias', - 'propio', - 'propios', - 'proximo', - 'próximo', - 'próximos', - 'pudo', - 'pueda', - 'puede', - 'pueden', - 'puedo', - 'pues', - 'q', - 'qeu', - 'que', - 'quedó', - 'queremos', - 'quien', - 'quienes', - 'quiere', - 'quiza', - 'quizas', - 'quizá', - 'quizás', - 'quién', - 'quiénes', - 'qué', - 'r', - 'raras', - 'realizado', - 'realizar', - 'realizó', - 'repente', - 'respecto', - 's', - 'sabe', - 'sabeis', - 'sabemos', - 'saben', - 'saber', - 'sabes', - 'sal', - 'salvo', - 'se', - 'sea', - 'seamos', - 'sean', - 'seas', - 'segun', - 'segunda', - 'segundo', - 'según', - 'seis', - 'ser', - 'sera', - 'seremos', - 'será', - 'serán', - 'serás', - 'seré', - 'seréis', - 'sería', - 'seríais', - 'seríamos', - 'serían', - 'serías', - 'seáis', - 'señaló', - 'si', - 'sido', - 'siempre', - 'siendo', - 'siete', - 'sigue', - 'siguiente', - 'sin', - 'sino', - 'sobre', - 'sois', - 'sola', - 'solamente', - 'solas', - 'solo', - 'solos', - 'somos', - 'son', - 'soy', - 'soyos', - 'su', - 'supuesto', - 'sus', - 'suya', - 'suyas', - 'suyo', - 'suyos', - 'sé', - 'sí', - 'sólo', - 't', - 'tal', - 'tambien', - 'también', - 'tampoco', - 'tan', - 'tanto', - 'tarde', - 'te', - 'temprano', - 'tendremos', - 'tendrá', - 'tendrán', - 'tendrás', - 'tendré', - 'tendréis', - 'tendría', - 'tendríais', - 'tendríamos', - 'tendrían', - 'tendrías', - 'tened', - 'teneis', - 'tenemos', - 'tener', - 'tenga', - 'tengamos', - 'tengan', - 'tengas', - 'tengo', - 'tengáis', - 'tenida', - 'tenidas', - 'tenido', - 'tenidos', - 'teniendo', - 'tenéis', - 'tenía', - 'teníais', - 'teníamos', - 'tenían', - 'tenías', - 'tercera', - 'ti', - 'tiempo', - 'tiene', - 'tienen', - 'tienes', - 'toda', - 'todas', - 'todavia', - 'todavía', - 'todo', - 'todos', - 'total', - 'trabaja', - 'trabajais', - 'trabajamos', - 'trabajan', - 'trabajar', - 'trabajas', - 'trabajo', - 'tras', - 'trata', - 'través', - 'tres', - 'tu', - 'tus', - 'tuve', - 'tuviera', - 'tuvierais', - 'tuvieran', - 'tuvieras', - 'tuvieron', - 'tuviese', - 'tuvieseis', - 'tuviesen', - 'tuvieses', - 'tuvimos', - 'tuviste', - 'tuvisteis', - 'tuviéramos', - 'tuviésemos', - 'tuvo', - 'tuya', - 'tuyas', - 'tuyo', - 'tuyos', - 'tú', - 'u', - 'ultimo', - 'un', - 'una', - 'unas', - 'uno', - 'unos', - 'usa', - 'usais', - 'usamos', - 'usan', - 'usar', - 'usas', - 'uso', - 'usted', - 'ustedes', - 'v', - 'va', - 'vais', - 'valor', - 'vamos', - 'van', - 'varias', - 'varios', - 'vaya', - 'veces', - 'ver', - 'verdad', - 'verdadera', - 'verdadero', - 'vez', - 'vosotras', - 'vosotros', - 'voy', - 'vuestra', - 'vuestras', - 'vuestro', - 'vuestros', - 'w', - 'x', - 'y', - 'ya', - 'yo', - 'z', - 'él', - 'éramos', - 'ésa', - 'ésas', - 'ése', - 'ésos', - 'ésta', - 'éstas', - 'éste', - 'éstos', - 'última', - 'últimas', - 'último', - 'últimos', - ]; -} diff --git a/src/Tool/StopWords/StopWordsAbstract.php b/src/Tool/StopWords/StopWordsAbstract.php deleted file mode 100644 index 33b2128..0000000 --- a/src/Tool/StopWords/StopWordsAbstract.php +++ /dev/null @@ -1,39 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool\StopWords; - -/** - * Class StopWordsAbstract - * - * @package PhpScience\TextRank\Tool\StopWords - */ -abstract class StopWordsAbstract -{ - /** - * Stop words for avoid dummy keywords. - * - * @var array - */ - protected $words = []; - - /** - * It retrieves the word exists or does not in the list of Stop words. - * - * @param string $word - * - * @return bool It is True when it exists. - */ - public function exist(string $word): bool - { - return array_search($word, $this->words) !== false; - } -} diff --git a/src/Tool/Summarize.php b/src/Tool/Summarize.php deleted file mode 100644 index f63c0d0..0000000 --- a/src/Tool/Summarize.php +++ /dev/null @@ -1,238 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool; - -/** - * Class Summarize - * - * This is for summarize the text from parsed data. - * - * @package PhpScience\TextRank\Tool - */ -class Summarize -{ - /** - * To find all important sentences. - * - * @var int - */ - const GET_ALL_IMPORTANT = 0; - - /** - * To find the most important sentence and its following sentences. - * - * @var int - */ - const GET_FIRST_IMPORTANT_AND_FOLLOWINGS = 1; - - /** - * Array of sentence weight. Key is the index of the sentence and value is - * the weight of the sentence. - * - * @var array - */ - protected $sentenceWeight = []; - - /** - * Summarize text. - * - * It retrieves the summarized text in array. - * - * @param array $scores Keywords with scores. Score is the key. - * @param Graph $graph The graph of the text. - * @param Text $text Text object what stores all text data. - * @param int $keyWordLimit How many keyword should be used to find the - * important sentences. - * @param int $sentenceLimit How many sentence should be retrieved. - * @param int $type The type of summarizing. Possible values are - * the constants of this class. - * - * @return array An array from sentences. - */ - public function getSummarize( - array &$scores, - Graph &$graph, - Text &$text, - int $keyWordLimit, - int $sentenceLimit, - int $type - ): array { - - $graphData = $graph->getGraph(); - $sentences = $text->getSentences(); - $marks = $text->getMarks(); - $this->findAndWeightSentences($scores, $graphData, $keyWordLimit); - - if ($type == Summarize::GET_ALL_IMPORTANT) { - return $this->getAllImportant($sentences, $marks, $sentenceLimit); - - } else if ($type == Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS) { - return $this->getFirstImportantAndFollowings( - $sentences, - $marks, - $sentenceLimit - ); - } - - return []; - } - - /** - * Find and Weight Sentences. - * - * It finds the most important sentences and stores them into the property. - * - * @param array $scores Keywords with scores. Score is the key. - * @param array $graphData Graph data from a Graph type object. - * @param int $keyWordLimit How many keyword should be used to find the - * important sentences. - */ - protected function findAndWeightSentences( - array &$scores, - array &$graphData, - int $keyWordLimit - ) { - $i = 0; - - foreach ($scores as $word => $score) { - if ($i >= $keyWordLimit) { - break; - } - - $i++; - $wordMap = $graphData[$word]; - - foreach ($wordMap as $key => $value) { - $this->updateSentenceWeight($key); - } - } - - arsort($this->sentenceWeight); - } - - /** - * Important Sentences. - * - * It retrieves the important sentences. - * - * @param array $sentences Sentences, ordered by weights. - * @param array $marks Array of punctuations. Key is the reference - * to the sentence, value is the punctuation. - * @param int $sentenceLimit How many sentence should be retrieved. - * - * @return array An array from sentences what are the most important - * sentences. - */ - protected function getAllImportant( - array &$sentences, - array &$marks, - int $sentenceLimit - ): array { - - $summary = []; - $i = 0; - - foreach ($this->sentenceWeight as $sentenceIdx => $weight) { - if ($i >= $sentenceLimit) { - break; - } - - $i++; - $summary[$sentenceIdx] = $sentences[$sentenceIdx] - . $this->getMark($marks, $sentenceIdx); - } - - ksort($summary); - - return $summary; - } - - /** - * Most Important Sentence and Next. - * - * It retrieves the first most important sentence and its following - * sentences. - * - * @param array $sentences Sentences, ordered by weights. - * @param array $marks Array of punctuations. Key is the reference - * to the sentence, value is the punctuation. - * @param int $sentenceLimit How many sentence should be retrieved. - * - * @return array An array from sentences what contains the most important - * sentence and its following sentences. - */ - protected function getFirstImportantAndFollowings( - array &$sentences, - array &$marks, - int $sentenceLimit - ): array { - - $summary = []; - $startIdx = 0; - - foreach ($this->sentenceWeight as $sentenceIdx => $weight) { - $summary[$sentenceIdx] = $sentences[$sentenceIdx] . - $this->getMark($marks, $sentenceIdx); - - $startIdx = $sentenceIdx; - break; - } - - $i = 0; - - foreach ($sentences as $sentenceIdx => $sentence) { - if ($sentenceIdx <= $startIdx) { - continue; - } else if ($i >= $sentenceLimit - 1) { - break; - } - - $i++; - $summary[$sentenceIdx] = $sentences[$sentenceIdx] . - $this->getMark($marks, $sentenceIdx); - } - - return $summary; - } - - /** - * Update Sentence Weight. - * - * It updates the sentence weight what is stored in the property. - * - * @param int $sentenceIdx Index of the sentence. - */ - protected function updateSentenceWeight(int $sentenceIdx) - { - if (isset($this->sentenceWeight[$sentenceIdx])) { - $this->sentenceWeight[$sentenceIdx] = $this->sentenceWeight[$sentenceIdx] + 1; - } else { - $this->sentenceWeight[$sentenceIdx] = 1; - } - } - - /** - * Punctuations. - * - * It retrieves the punctuation of the sentence. - * - * @param array $marks The punctuation. Key is the reference to the - * sentence, value is the punctuation. - * @param int $idx Key of the punctuation. - * - * @return string The punctuation of the sentence. - */ - protected function getMark(array &$marks, int $idx) - { - return isset($marks[$idx]) ? $marks[$idx] : ''; - } -} diff --git a/src/Tool/Text.php b/src/Tool/Text.php deleted file mode 100644 index 037cacf..0000000 --- a/src/Tool/Text.php +++ /dev/null @@ -1,113 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank\Tool; - -/** - * Class Text - * - * This class is for store the parsed texts. - * - * @package PhpScience\TextRank\Tool - */ -class Text -{ - /** - * Multidimensional array from words of the text. Key is index of the - * sentence, value is an array from words where key is the index of the - * word and value is the word. - * - * @var array - */ - protected $wordMatrix = []; - - /** - * Array from sentences where key is the index and value is the sentence. - * - * @var array - */ - protected $sentences = []; - - /** - * Array from punctuations where key is the index to link to the sentence - * and value is the punctuation. - * - * @var array - */ - protected $marks = []; - - /** - * It set the Words' matrix to the property. - * - * @param array $wordMatrix Multidimensional array from integer keys and - * string values. - */ - public function setWordMatrix(array $wordMatrix) - { - $this->wordMatrix = $wordMatrix; - } - - /** - * It sets the sentences. - * - * @param array $sentences Array's key should be an int and value should be - * string. - */ - public function setSentences(array $sentences) - { - $this->sentences = $sentences; - } - - /** - * It set the punctuations to the property. - * - * @param array $marks Array's key should be an int and value should be - * string. - */ - public function setMarks(array $marks) - { - $this->marks = $marks; - } - - /** - * It retrieves the words in sentence groups. - * - * @return array Multidimensional array from words of the text. Key is - * index of the sentence, value is an array from words - * where key is the index of the word and value is the word. - */ - public function getWordMatrix(): array - { - return $this->wordMatrix; - } - - /** - * It retrieves the sentences. - * - * @return array Array from sentences where key is the index and value is - * the sentence. - */ - public function getSentences(): array - { - return $this->sentences; - } - - /** - * It retrieves the punctuations. - * - * @return array Array from punctuations where key is the index to link to - * the sentence and value is the punctuation. - */ - public function getMarks(): array - { - return $this->marks; - } -} diff --git a/tests/TextRankFacadeTest.php b/tests/TextRankFacadeTest.php deleted file mode 100644 index d1ef4e5..0000000 --- a/tests/TextRankFacadeTest.php +++ /dev/null @@ -1,158 +0,0 @@ - - */ - -declare(strict_types=1); - -namespace PhpScience\TextRank; - -use PhpScience\TextRank\Tool\StopWords\English; -use PhpScience\TextRank\Tool\StopWords\Russian; -use PhpScience\TextRank\Tool\Summarize; - -class TextRankFacadeTest extends \PHPUnit\Framework\TestCase -{ - protected $sampleText1; - - public function setUp() - { - parent::setUp(); - - $path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'res' - . DIRECTORY_SEPARATOR . 'sample1.txt'; - $file = fopen($path, 'r'); - - $this->sampleText1 = fread($file, filesize($path)); - - fclose($file); - } - - public function testGetOnlyKeyWords() - { - $api = new TextRankFacade(); - $stopWords = new English(); - $api->setStopWords($stopWords); - - $result = $api->getOnlyKeyWords($this->sampleText1); - - $this->assertTrue(count($result) > 0); - $this->assertTrue(array_values($result)[0] == 1); - } - - public function testGetHighlights() - { - $api = new TextRankFacade(); - $stopWords = new English(); - $api->setStopWords($stopWords); - - $result = $api->getHighlights($this->sampleText1); - - $this->assertTrue(count($result) > 0); - } - - public function testSummarizeTextCompound() - { - $api = new TextRankFacade(); - $stopWords = new English(); - $api->setStopWords($stopWords); - - $result = $api->summarizeTextCompound($this->sampleText1); - - $this->assertTrue(count($result) > 0); - } - - public function testSummarizeTextBasic() - { - $api = new TextRankFacade(); - $stopWords = new English(); - $api->setStopWords($stopWords); - - $result = $api->summarizeTextBasic($this->sampleText1); - - $this->assertTrue(count($result) > 0); - } - - public function testSummarizeTextFreely() - { - $api = new TextRankFacade(); - $stopWords = new English(); - $api->setStopWords($stopWords); - - $result = $api->summarizeTextFreely( - $this->sampleText1, - 5, - 2, - Summarize::GET_ALL_IMPORTANT - ); - - $this->assertTrue(count($result) == 2); - - $result = $api->summarizeTextFreely( - $this->sampleText1, - 10, - 1, - Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS - ); - - $this->assertTrue(count($result) == 1); - - // Stop words. - $result = $api->summarizeTextFreely( - 'one two. one two. three four.', - 2, - 10, - Summarize::GET_ALL_IMPORTANT - ); - - $this->assertTrue(count($result) == 0); - - // Less sentences then expected. - $result = $api->summarizeTextFreely( - 'lorem ipsum. lorem holy ipsum. sit dolor amet.', - 2, - 10, - Summarize::GET_ALL_IMPORTANT - ); - - $this->assertTrue(count($result) == 2); - } - - public function testSmallText() - { - $api = new TextRankFacade(); - $stopWords = new English(); - $api->setStopWords($stopWords); - - $result = $api->getOnlyKeyWords('lorem ipsum sit'); - - $this->assertEquals(2, count($result)); - - $result = $api->getOnlyKeyWords('sit'); - - $this->assertEquals(0, count($result)); - - $result = $api->getOnlyKeyWords(''); - - $this->assertEquals(0, count($result)); - } - - public function testSmallTextRu() - { - $api = new TextRankFacade(); - $stopWords = new Russian(); - $api->setStopWords($stopWords); - $result = $api->getOnlyKeyWords('между холодными ладонями'); - $this->assertCount(2, $result); - - $result = $api->getOnlyKeyWords('конец'); - $this->assertCount(0, $result); - - $result = $api->getOnlyKeyWords(''); - $this->assertCount(0, $result); - } -} diff --git a/tests/phpunit.xml b/tests/phpunit.xml new file mode 100644 index 0000000..dca7fc5 --- /dev/null +++ b/tests/phpunit.xml @@ -0,0 +1,34 @@ + + + + + + ../tests/unit + + + ../tests/functional + + + + + + + ../src + + + + + + + + + \ No newline at end of file diff --git a/res/sample1.txt b/tests/resource/sample1.txt similarity index 99% rename from res/sample1.txt rename to tests/resource/sample1.txt index 849e2a3..bab87cf 100644 --- a/res/sample1.txt +++ b/tests/resource/sample1.txt @@ -1 +1,2 @@ Over the past fortnight we asked you to nominate your top extensions for the GNOME desktop. And you did just that. Having now sifted through the hundreds of entries, we’re ready to reveal your favourite GNOME Shell extensions. GNOME 3 (which is more commonly used with the GNOME Shell) has an extension framework that lets developers (and users) extend, build on, and shape how the desktop looks, acts and functions. Dash to Dock takes the GNOME Dash — this is the ‘favourites bar’ that appears on the left-hand side of the screen in the Activities overlay — and transforms it into a desktop dock. And just like Plank, Docky or AWN you can add app launchers, rearrange them, and use them to minimise, restore and switch between app windows. Dash to Dock has many of the common “Dock” features you’d expect, including autohide and intellihide, a fixed-width mode, adjustable icon size, and custom themes. My biggest pet peeve with GNOME Shell is its legacy app tray that hides in the bottom left of the screen. All extraneous non-system applets, indicators and tray icons hide down here. This makes it a little harder to use applications that rely on a system tray presence, like Skype, Franz, Telegram, and Dropbox. TopIcons Plus is the quick way to put GNOME system tray icons back where they belong: on show and in reach. The extension moves legacy tray icons from the bottom left of Gnome Shell to the right-hand side of the top panel. A well-stocked settings panel lets you adjust icon opacity, color, padding, size and tray position. Dive into the settings to adjust the sizing, styling and positioning of icons. Like the popular daily stimulant of choice, the Caffeine GNOME extension keeps your computer awake. It couldn’t be simpler to use: just click the empty mug icon. An empty cup means you’re using normal auto suspend rules – e.g., a screensaver – while a freshly brewed cup of coffee means auto suspend and screensaver are turned off. The Caffeine GNOME extension supports GNOME Shell 3.4 or later. Familiar with applications like Guake and Tilda? If so, you’ll instantly see the appeal of the (superbly named) Drop Down Terminal GNOME extension. When installed just tap the key above the tab key (though it can be changed to almost any key you wish) to get instant access to the command line. Want to speed up using workspaces? This simple tool lets you do just that. Once installed you can quickly switch between workspaces by scrolling over the top panel - no need to enter the Activities Overlay! +t \ No newline at end of file