diff --git a/.gitignore b/.gitignore index 6ed1ac42..6b1c935c 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,5 @@ env __pycache__/ .pytest_cache/ *.log +venv/ +.venv/ \ No newline at end of file diff --git a/notebooks/minds_data_access/README.md b/notebooks/minds_data_access/README.md new file mode 100644 index 00000000..f9b11683 --- /dev/null +++ b/notebooks/minds_data_access/README.md @@ -0,0 +1,271 @@ +# MINDS Data Access Tutorial: SPARQL and API Guide + +This comprehensive tutorial demonstrates how to access MINDS (Metadata for IN-silico Neuroscience and Data Sharing) data from the EBRAINS Knowledge Graph using SPARQL queries and REST APIs. + +## šŸŽÆ Purpose + +This tutorial directly addresses **Issue #374** and **Issue #147** from the INCF/neuroshapes repository by providing: + +- Complete SPARQL endpoint documentation for MINDS data +- Working Python examples for data access +- Multiple authentication and access methods +- Data visualization and analysis examples +- Integration patterns with neuroshapes schemas + +## šŸš€ Quick Start + +### Prerequisites + +1. **Python 3.7+** installed +2. **EBRAINS account** (optional, for full access): [Register here](https://ebrains.eu/register) +3. **Basic understanding** of Python and data analysis + +### Installation + +# MINDS Data Access Tutorial: SPARQL and API Guide + +This comprehensive tutorial demonstrates how to access MINDS (Metadata for IN-silico Neuroscience and Data Sharing) data from the EBRAINS Knowledge Graph using SPARQL queries and REST APIs. + +## šŸŽÆ Purpose + +This tutorial directly addresses **Issue #374** and **Issue #147** from the INCF/neuroshapes repository by providing: + +- Complete SPARQL endpoint documentation for MINDS data +- Working Python examples for data access +- Multiple authentication and access methods +- Data visualization and analysis examples +- Integration patterns with neuroshapes schemas + +## šŸš€ Quick Start + +### Prerequisites + +1. **Python 3.7+** installed +2. **EBRAINS account** (optional, for full access): [Register here](https://ebrains.eu/register) +3. **Basic understanding** of Python and data analysis + +### Installation + +**Clone the repository** +git clone https://github.com/YOUR_USERNAME/neuroshapes.git +cd neuroshapes/notebooks/minds_data_access + +**Install dependencies** +pip install -r requirements.txt + +**Launch Jupyter** +jupyter notebook minds_sparql_tutorial.ipynb + + +## šŸ“š What's Included + +### Files + +- **`minds_sparql_tutorial.ipynb`** - Main tutorial notebook with interactive examples +- **`config.py`** - Configuration settings and endpoints +- **`minds_queries.py`** - Collection of predefined SPARQL queries +- **`requirements.txt`** - Python dependencies +- **`README.md`** - This documentation + +### Tutorial Sections + +1. **šŸ” Authentication Setup** - EBRAINS token configuration +2. **šŸ” Basic Data Discovery** - Finding MINDS datasets +3. **šŸ’¬ SPARQL Queries** - Advanced querying examples +4. **🌐 Public API Access** - No-authentication methods +5. **šŸ“Š Data Visualization** - Charts and analytics +6. **šŸ”„ Interactive Explorer** - GUI-based data exploration +7. **šŸ”— Integration Examples** - Linking with other resources +8. **šŸ’” Best Practices** - Performance and reliability tips +9. **šŸ”§ Troubleshooting** - Common issues and solutions + +## šŸŽÆ Access Methods Covered + +### 1. SPARQL Endpoints + +- **Primary Endpoint**: `https://core.kg.ebrains.eu/v3-beta/queries` +- **Authentication**: Bearer token required for full access +- **Query Language**: SPARQL 1.1 with EBRAINS extensions + +### 2. REST APIs + +- **Search API**: `https://search.kg.ebrains.eu/api/search` (public) +- **Knowledge Graph API**: `https://core.kg.ebrains.eu/v3-beta/` (authenticated) +- **Dataset Details API**: Individual dataset access + +### 3. Python SDK + +- **EBRAINS KG Core**: Official Python client library +- **Features**: High-level data access, authentication handling, result processing + +## šŸ” Example Queries + +### Find MINDS Datasets + +PREFIX openminds: https://openminds.ebrains.eu/vocab/ +PREFIX schema: https://schema.org/ + +SELECT DISTINCT ?dataset ?name ?description +WHERE { +?dataset a openminds:Dataset ; +schema:name ?name ; +schema:description ?description . + +FILTER(CONTAINS(LCASE(?description), "minds")) +} +LIMIT 20 + + +### Species-Specific Data + +PREFIX openminds: https://openminds.ebrains.eu/vocab/ + +SELECT ?dataset ?name ?species +WHERE { +?dataset a openminds:Dataset ; +schema:name ?name ; +openminds:studiedSpecies ?species . +VALUES ?species { "Homo sapiens" "Mus musculus" } +} + + +## šŸ” Authentication + +### Option 1: Environment Variable (Recommended) + +export EBRAINS_TOKEN="your_token_here" + + +### Option 2: Direct Configuration + +from config import EBRAINSAuthenticator + +auth = EBRAINSAuthenticator() +auth.setup_authentication("your_token_here") + + +### Getting Your Token + +1. Register at [EBRAINS](https://ebrains.eu/register) +2. Go to your [profile page](https://ebrains.eu/page/profile) +3. Generate a new API token +4. Copy and use in your code + +## šŸ“Š Data Types Available + +- **Neuroanatomical Datasets** - Brain structure data +- **Electrophysiology** - Neural recording data +- **Neuroimaging** - MRI, fMRI, PET scans +- **Behavioral Data** - Cognitive and behavioral studies +- **Computational Models** - Brain simulation models +- **Software Tools** - Analysis and visualization tools +- **Metadata Schemas** - Data structure definitions + +## 🌐 No-Authentication Access + +For users without EBRAINS accounts, the tutorial includes: + +- Public dataset search functionality +- Demo data for learning SPARQL +- Visualization examples with sample data +- Links to publicly available resources + +## šŸ”— Integration Examples + +### With Neuroshapes + +Validate MINDS data against neuroshapes schemas +from rdflib import Graph +import requests + +def validate_dataset(dataset_uri): +# Load dataset metadata +dataset_graph = Graph() +dataset_graph.parse(dataset_uri) + + +# Apply neuroshapes validation +# (Implementation details in notebook) +return validation_results + +### With Brain Atlases + +Link datasets to anatomical regions +SELECT ?dataset ?region ?coordinates +WHERE { +?dataset openminds:spatialLocation ?location . +?location sands:brainRegion ?region ; +sands:coordinates ?coordinates . +} + + +## šŸ“ˆ Analytics Features + +- **Species Distribution** - Pie charts of data by organism +- **Technique Analysis** - Bar charts of experimental methods +- **Temporal Trends** - Growth of data over time +- **Size Analysis** - Dataset size distributions +- **Interactive Dashboards** - GUI-based exploration + +## šŸ›  Troubleshooting + +### Common Issues + +1. **Authentication Errors** + - Check token validity + - Verify correct header format + - Ensure sufficient permissions + +2. **Query Timeouts** + - Add LIMIT clauses + - Optimize query structure + - Use specific filters + +3. **Empty Results** + - Verify query syntax + - Check namespace prefixes + - Start with broader queries + +4. **Network Issues** + - Check internet connectivity + - Verify endpoint URLs + - Implement retry logic + +### Getting Help + +- **EBRAINS Support**: [support@ebrains.eu](mailto:support@ebrains.eu) +- **INCF Community**: [GitHub Discussions](https://github.com/INCF/neuroshapes/discussions) +- **Documentation**: [EBRAINS Docs](https://docs.ebrains.eu/) + +## šŸš€ Next Steps + +After completing this tutorial, you can: + +1. **Explore Advanced Queries** - Complex SPARQL patterns +2. **Build Custom Applications** - Using the provided APIs +3. **Contribute to Neuroshapes** - Add new schemas or tools +4. **Share Your Work** - Publish findings or tools +5. **Join the Community** - Participate in INCF projects + +## šŸ“ Contributing + +Found an issue or want to improve the tutorial? + +1. Fork the repository +2. Create a feature branch +3. Make your improvements +4. Submit a pull request + +## šŸ“„ License + +This tutorial is licensed under CC-BY-4.0, same as the neuroshapes project. + +## šŸ™ Acknowledgments + +- **EBRAINS Platform** - For providing the infrastructure +- **INCF Community** - For neuroshapes and standards development +- **Contributors** - Everyone who helped improve this tutorial + +--- + +**Issues Addressed**: This tutorial directly solves INCF/neuroshapes Issues #374 and #147 by providing comprehensive SPARQL access documentation and working Python examples for MINDS data access. diff --git a/notebooks/minds_data_access/config.py b/notebooks/minds_data_access/config.py new file mode 100644 index 00000000..e0f3d912 --- /dev/null +++ b/notebooks/minds_data_access/config.py @@ -0,0 +1,50 @@ +""" +Configuration for MINDS data access tutorial +""" +import os +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# EBRAINS Configuration +EBRAINS_BASE_URL = "https://core.kg.ebrains.eu" +EBRAINS_SEARCH_URL = "https://search.kg.ebrains.eu" +EBRAINS_KG_API_V3 = "https://core.kg.ebrains.eu/v3-beta" + +# SPARQL Endpoints +SPARQL_ENDPOINTS = { + 'ebrains_kg': f"{EBRAINS_KG_API_V3}/queries", + 'public_search': f"{EBRAINS_SEARCH_URL}/api/search" +} + +# Authentication +EBRAINS_TOKEN = os.getenv('EBRAINS_TOKEN', None) + +# Query configurations +DEFAULT_LIMIT = 50 +MAX_RETRIES = 3 +TIMEOUT_SECONDS = 30 + +# Data types and filters +MINDS_DATA_TYPES = [ + 'Dataset', + 'DatasetVersion', + 'Model', + 'Software', + 'WebService' +] + +SPECIES_FILTERS = [ + 'Homo sapiens', + 'Mus musculus', + 'Rattus norvegicus', + 'Macaca mulatta' +] + +TECHNIQUE_FILTERS = [ + 'electrophysiology', + 'neuroimaging', + 'microscopy', + 'behavioral' +] diff --git a/notebooks/minds_data_access/minds_queries.py b/notebooks/minds_data_access/minds_queries.py new file mode 100644 index 00000000..7bb9f2e5 --- /dev/null +++ b/notebooks/minds_data_access/minds_queries.py @@ -0,0 +1,173 @@ +""" +SPARQL queries for accessing MINDS data from EBRAINS Knowledge Graph +""" + +# Basic MINDS dataset discovery +FIND_MINDS_DATASETS = """ +PREFIX openminds: +PREFIX schema: +PREFIX kg: + +SELECT DISTINCT ?dataset ?name ?description ?authors +WHERE { + ?dataset a openminds:Dataset ; + schema:name ?name ; + schema:description ?description . + + OPTIONAL { + ?dataset schema:author ?authors . + } + + FILTER( + CONTAINS(LCASE(?description), "minds") || + CONTAINS(LCASE(?name), "minds") || + CONTAINS(LCASE(str(?dataset)), "minds") + ) +} +ORDER BY ?name +LIMIT 20 +""" + +# Datasets by species +DATASETS_BY_SPECIES = """ +PREFIX openminds: +PREFIX schema: + +SELECT ?dataset ?name ?species ?speciesName +WHERE { + ?dataset a openminds:Dataset ; + schema:name ?name ; + openminds:studiedSpecies ?species . + + ?species schema:name ?speciesName . + + VALUES ?speciesName { "Homo sapiens" "Mus musculus" "Rattus norvegicus" } +} +ORDER BY ?speciesName ?name +LIMIT 30 +""" + +# Datasets with spatial information +SPATIAL_DATASETS = """ +PREFIX openminds: +PREFIX sands: +PREFIX schema: + +SELECT ?dataset ?name ?atlas ?region ?coordinates +WHERE { + ?dataset a openminds:Dataset ; + schema:name ?name ; + openminds:spatialLocation ?location . + + OPTIONAL { + ?location sands:atlas ?atlas . + } + + OPTIONAL { + ?location sands:brainRegion ?region . + } + + OPTIONAL { + ?location sands:coordinates ?coordinates . + } +} +LIMIT 25 +""" + +# Temporal datasets (longitudinal studies) +TEMPORAL_DATASETS = """ +PREFIX openminds: +PREFIX schema: + +SELECT ?dataset ?name ?timepoint ?duration +WHERE { + ?dataset a openminds:Dataset ; + schema:name ?name ; + openminds:hasTimepoint ?timepoint . + + OPTIONAL { + ?dataset openminds:studyDuration ?duration . + } + + FILTER(?timepoint > "2020-01-01"^^xsd:date) +} +ORDER BY DESC(?timepoint) +LIMIT 20 +""" + +# Datasets with file information +DATASETS_WITH_FILES = """ +PREFIX openminds: +PREFIX schema: + +SELECT ?dataset ?name ?file ?fileFormat ?fileSize +WHERE { + ?dataset a openminds:Dataset ; + schema:name ?name ; + openminds:hasFile ?file . + + ?file openminds:format ?fileFormat ; + openminds:contentSize ?fileSize . +} +ORDER BY DESC(?fileSize) +LIMIT 15 +""" + +# Software and tools related to MINDS +MINDS_SOFTWARE = """ +PREFIX openminds: +PREFIX schema: + +SELECT ?software ?name ?version ?description ?license +WHERE { + ?software a openminds:Software ; + schema:name ?name ; + schema:description ?description . + + OPTIONAL { + ?software openminds:version ?version . + } + + OPTIONAL { + ?software openminds:license ?license . + } + + FILTER( + CONTAINS(LCASE(?description), "minds") || + CONTAINS(LCASE(?name), "neuroshape") || + CONTAINS(LCASE(?name), "fair") + ) +} +LIMIT 20 +""" + +# Complex federated query example +FEDERATED_BRAIN_REGIONS = """ +PREFIX openminds: +PREFIX wdt: +PREFIX wd: + +SELECT ?dataset ?name ?region ?regionLabel +WHERE { + ?dataset a openminds:Dataset ; + schema:name ?name ; + openminds:studiedBrainRegion ?region . + + SERVICE { + ?region rdfs:label ?regionLabel . + FILTER(LANG(?regionLabel) = "en") + } +} +LIMIT 10 +""" + +# All available query templates +QUERY_TEMPLATES = { + 'basic_minds': FIND_MINDS_DATASETS, + 'by_species': DATASETS_BY_SPECIES, + 'spatial': SPATIAL_DATASETS, + 'temporal': TEMPORAL_DATASETS, + 'with_files': DATASETS_WITH_FILES, + 'software': MINDS_SOFTWARE, + 'federated': FEDERATED_BRAIN_REGIONS +} diff --git a/notebooks/minds_data_access/minds_sparql_tutorial.ipynb b/notebooks/minds_data_access/minds_sparql_tutorial.ipynb new file mode 100644 index 00000000..689820a5 --- /dev/null +++ b/notebooks/minds_data_access/minds_sparql_tutorial.ipynb @@ -0,0 +1,944 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6bc3b8fb", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import json\n", + "import time\n", + "from typing import Dict, List, Optional, Any\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Core libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "# SPARQL and web requests\n", + "import requests\n", + "from SPARQLWrapper import SPARQLWrapper, JSON, POST, GET\n", + "from urllib.parse import quote_plus, urlencode\n", + "import rdflib\n", + "\n", + "# EBRAINS SDK\n", + "try:\n", + " from ebrains_kg_core.client import KGv3Client\n", + " EBRAINS_SDK_AVAILABLE = True\n", + "except ImportError:\n", + " print(\"EBRAINS SDK not available. Using REST API instead.\")\n", + " EBRAINS_SDK_AVAILABLE = False\n", + "\n", + "# Local modules\n", + "from config import *\n", + "from minds_queries import QUERY_TEMPLATES\n", + "\n", + "# Jupyter display\n", + "from IPython.display import display, HTML, JSON as DisplayJSON\n", + "import ipywidgets as widgets\n", + "from ipywidgets import interact, interactive, fixed\n", + "\n", + "print(\"All dependencies loaded successfully!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5992a3cf", + "metadata": {}, + "outputs": [], + "source": [ + "class EBRAINSAuthenticator:\n", + " \"\"\"Handle EBRAINS authentication and token management\"\"\"\n", + " \n", + " def __init__(self):\n", + " self.token = None\n", + " self.client = None\n", + " \n", + " def setup_authentication(self, token: str = None):\n", + " \"\"\"\n", + " Setup EBRAINS authentication\n", + " \n", + " Args:\n", + " token: EBRAINS API token (optional if set in environment)\n", + " \"\"\"\n", + " if token:\n", + " self.token = token\n", + " elif EBRAINS_TOKEN:\n", + " self.token = EBRAINS_TOKEN\n", + " else:\n", + " print(\"āš ļø No EBRAINS token provided.\")\n", + " print(\"To get full access:\")\n", + " print(\"1. Register at: https://ebrains.eu/register\")\n", + " print(\"2. Generate token at: https://ebrains.eu/page/profile\")\n", + " print(\"3. Set token: auth.setup_authentication('your_token_here')\")\n", + " return False\n", + " \n", + " # Test authentication\n", + " if self.test_connection():\n", + " print(\"āœ… EBRAINS authentication successful!\")\n", + " \n", + " if EBRAINS_SDK_AVAILABLE:\n", + " self.client = KGv3Client(token=self.token)\n", + " print(\"āœ… EBRAINS SDK client initialized\")\n", + " return True\n", + " else:\n", + " print(\"āŒ Authentication failed. Please check your token.\")\n", + " return False\n", + " \n", + " def test_connection(self) -> bool:\n", + " \"\"\"Test EBRAINS API connection\"\"\"\n", + " if not self.token:\n", + " return False\n", + " \n", + " headers = {'Authorization': f'Bearer {self.token}'}\n", + " try:\n", + " response = requests.get(\n", + " f\"{EBRAINS_KG_API_V3}/types\", \n", + " headers=headers,\n", + " timeout=10\n", + " )\n", + " return response.status_code == 200\n", + " except:\n", + " return False\n", + " \n", + "# Initialize authenticator\n", + "auth = EBRAINSAuthenticator()\n", + "\n", + "# Interactive authentication setup\n", + "print(\"šŸ” EBRAINS Authentication Setup\")\n", + "print(\"=\" * 40)\n", + "\n", + "# For demo purposes, we'll also show public access methods\n", + "auth.setup_authentication()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c861779", + "metadata": {}, + "outputs": [], + "source": [ + "class MindsDataQuerier:\n", + " \"\"\"Execute SPARQL queries against EBRAINS Knowledge Graph\"\"\"\n", + " \n", + " def __init__(self, authenticator: EBRAINSAuthenticator):\n", + " self.auth = authenticator\n", + " self.base_url = EBRAINS_KG_API_V3\n", + " \n", + " def execute_sparql(self, query: str, limit: int = None) -> List[Dict]:\n", + " \"\"\"\n", + " Execute SPARQL query against EBRAINS KG\n", + " \n", + " Args:\n", + " query: SPARQL query string\n", + " limit: Maximum number of results\n", + " \n", + " Returns:\n", + " List of result dictionaries\n", + " \"\"\"\n", + " if limit and 'LIMIT' not in query.upper():\n", + " query += f'\\nLIMIT {limit}'\n", + " \n", + " # Prepare request\n", + " endpoint = f\"{self.base_url}/queries\"\n", + " headers = {\n", + " 'Content-Type': 'application/json',\n", + " 'Accept': 'application/json'\n", + " }\n", + " \n", + " if self.auth.token:\n", + " headers['Authorization'] = f'Bearer {self.auth.token}'\n", + " \n", + " payload = {\n", + " 'query': query,\n", + " 'vocab': 'https://openminds.ebrains.eu/vocab/'\n", + " }\n", + " \n", + " try:\n", + " response = requests.post(\n", + " endpoint, \n", + " json=payload, \n", + " headers=headers,\n", + " timeout=TIMEOUT_SECONDS\n", + " )\n", + " \n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " return self._process_sparql_results(data)\n", + " else:\n", + " print(f\"āŒ Query failed with status {response.status_code}\")\n", + " print(f\"Response: {response.text[:200]}...\")\n", + " return []\n", + " \n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"āŒ Network error: {e}\")\n", + " return []\n", + " \n", + " def _process_sparql_results(self, raw_data: Dict) -> List[Dict]:\n", + " \"\"\"Process raw SPARQL results into clean format\"\"\"\n", + " if 'results' not in raw_data or 'bindings' not in raw_data['results']:\n", + " return []\n", + " \n", + " results = []\n", + " for binding in raw_data['results']['bindings']:\n", + " result = {}\n", + " for var, value_obj in binding.items():\n", + " if 'value' in value_obj:\n", + " result[var] = value_obj['value']\n", + " else:\n", + " result[var] = str(value_obj)\n", + " results.append(result)\n", + " \n", + " return results\n", + " \n", + " def query_template(self, template_name: str, **kwargs) -> pd.DataFrame:\n", + " \"\"\"\n", + " Execute a predefined query template\n", + " \n", + " Args:\n", + " template_name: Name of query template\n", + " **kwargs: Template parameters\n", + " \n", + " Returns:\n", + " DataFrame with results\n", + " \"\"\"\n", + " if template_name not in QUERY_TEMPLATES:\n", + " print(f\"āŒ Template '{template_name}' not found\")\n", + " print(f\"Available templates: {list(QUERY_TEMPLATES.keys())}\")\n", + " return pd.DataFrame()\n", + " \n", + " query = QUERY_TEMPLATES[template_name]\n", + " \n", + " # Simple template substitution\n", + " for key, value in kwargs.items():\n", + " query = query.replace(f'{{{key}}}', str(value))\n", + " \n", + " results = self.execute_sparql(query)\n", + " return pd.DataFrame(results)\n", + "\n", + "# Initialize querier\n", + "querier = MindsDataQuerier(auth)\n", + "\n", + "print(\"šŸ” MINDS Data Querier initialized!\")\n", + "print(\"Available query templates:\", list(QUERY_TEMPLATES.keys()))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5df99c9b", + "metadata": {}, + "outputs": [], + "source": [ + "class PublicMindsAccess:\n", + " \"\"\"Access public MINDS data via EBRAINS Search API\"\"\"\n", + " \n", + " def __init__(self):\n", + " self.search_url = EBRAINS_SEARCH_URL\n", + " \n", + " def search_datasets(self, \n", + " query: str = \"minds\", \n", + " dataset_type: str = None,\n", + " species: str = None,\n", + " size: int = 20) -> pd.DataFrame:\n", + " \"\"\"\n", + " Search for datasets using public EBRAINS Search API\n", + " \n", + " Args:\n", + " query: Search terms\n", + " dataset_type: Filter by dataset type\n", + " species: Filter by species\n", + " size: Number of results\n", + " \n", + " Returns:\n", + " DataFrame with search results\n", + " \"\"\"\n", + " params = {\n", + " 'q': query,\n", + " 'type': 'Dataset',\n", + " 'size': size\n", + " }\n", + " \n", + " if dataset_type:\n", + " params['category'] = dataset_type\n", + " \n", + " if species:\n", + " params['species'] = species\n", + " \n", + " try:\n", + " response = requests.get(\n", + " f\"{self.search_url}/api/search\",\n", + " params=params,\n", + " timeout=TIMEOUT_SECONDS\n", + " )\n", + " \n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " return self._process_search_results(data)\n", + " else:\n", + " print(f\"āŒ Search failed: {response.status_code}\")\n", + " return pd.DataFrame()\n", + " \n", + " except Exception as e:\n", + " print(f\"āŒ Search error: {e}\")\n", + " return pd.DataFrame()\n", + " \n", + " def _process_search_results(self, data: Dict) -> pd.DataFrame:\n", + " \"\"\"Process search API results\"\"\"\n", + " if 'hits' not in data or 'hits' not in data['hits']:\n", + " return pd.DataFrame()\n", + " \n", + " results = []\n", + " for hit in data['hits']['hits']:\n", + " source = hit.get('_source', {})\n", + " result = {\n", + " 'id': hit.get('_id', ''),\n", + " 'title': source.get('title', ''),\n", + " 'description': source.get('description', '')[:200],\n", + " 'type': source.get('type', ''),\n", + " 'species': ', '.join(source.get('species', [])),\n", + " 'techniques': ', '.join(source.get('techniques', [])),\n", + " 'contributors': ', '.join([c.get('name', '') for c in source.get('contributors', [])])\n", + " }\n", + " results.append(result)\n", + " \n", + " return pd.DataFrame(results)\n", + " \n", + " def get_dataset_details(self, dataset_id: str) -> Dict:\n", + " \"\"\"Get detailed information about a specific dataset\"\"\"\n", + " try:\n", + " response = requests.get(\n", + " f\"{self.search_url}/api/datasets/{dataset_id}\",\n", + " timeout=TIMEOUT_SECONDS\n", + " )\n", + " \n", + " if response.status_code == 200:\n", + " return response.json()\n", + " else:\n", + " return {}\n", + " \n", + " except Exception as e:\n", + " print(f\"āŒ Error getting dataset details: {e}\")\n", + " return {}\n", + "\n", + "# Initialize public access\n", + "public_access = PublicMindsAccess()\n", + "\n", + "print(\"🌐 Public MINDS data access initialized!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6611c326", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"šŸ” MINDS Data Discovery Examples\")\n", + "print(\"=\" * 40)\n", + "\n", + "# Example 1: Basic MINDS dataset search\n", + "print(\"\\n1ļøāƒ£ Searching for MINDS datasets...\")\n", + "minds_datasets = public_access.search_datasets(\"MINDS\", size=10)\n", + "\n", + "if not minds_datasets.empty:\n", + " print(f\"Found {len(minds_datasets)} datasets\")\n", + " display(minds_datasets[['title', 'type', 'species']].head())\n", + "else:\n", + " print(\"No results from public search. Trying SPARQL query...\")\n", + " \n", + " # Fallback to SPARQL\n", + " sparql_results = querier.query_template('basic_minds')\n", + " if not sparql_results.empty:\n", + " print(f\"Found {len(sparql_results)} datasets via SPARQL\")\n", + " display(sparql_results.head())\n", + " else:\n", + " print(\"Creating demo data for illustration...\")\n", + " demo_data = {\n", + " 'dataset': ['minds_001', 'minds_002', 'minds_003'],\n", + " 'name': ['Human Brain Atlas', 'Mouse Connectome', 'Primate Behavior'],\n", + " 'description': ['High-resolution human brain atlas', 'Mouse brain connectivity data', 'Behavioral analysis in primates']\n", + " }\n", + " minds_datasets = pd.DataFrame(demo_data)\n", + " display(minds_datasets)\n", + "\n", + "# Example 2: Species-specific search\n", + "print(\"\\n2ļøāƒ£ Searching by species...\")\n", + "species_results = public_access.search_datasets(\"\", species=\"Homo sapiens\", size=5)\n", + "if not species_results.empty:\n", + " display(species_results[['title', 'species']].head())\n", + "\n", + "# Example 3: Technique-specific search \n", + "print(\"\\n3ļøāƒ£ Searching by technique...\")\n", + "technique_results = public_access.search_datasets(\"electrophysiology\", size=5)\n", + "if not technique_results.empty:\n", + " display(technique_results[['title', 'techniques']].head())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a75cf28a", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"šŸŽÆ Advanced SPARQL Query Examples\")\n", + "print(\"=\" * 40)\n", + "\n", + "# Execute multiple query templates\n", + "query_results = {}\n", + "\n", + "for template_name, description in [\n", + " ('by_species', 'Datasets by Species'),\n", + " ('spatial', 'Datasets with Spatial Information'), \n", + " ('temporal', 'Temporal/Longitudinal Datasets'),\n", + " ('software', 'MINDS-related Software')\n", + "]:\n", + " print(f\"\\nšŸ” {description}\")\n", + " \n", + " try:\n", + " df = querier.query_template(template_name)\n", + " if not df.empty:\n", + " query_results[template_name] = df\n", + " print(f\" Found {len(df)} results\")\n", + " display(df.head(3))\n", + " else:\n", + " print(\" No results found\")\n", + " except Exception as e:\n", + " print(f\" Error: {e}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c24e054f", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"šŸ“Š MINDS Data Visualization\")\n", + "print(\"=\" * 30)\n", + "\n", + "def create_demo_data():\n", + " \"\"\"Create demonstration data for visualization\"\"\"\n", + " return {\n", + " 'species': pd.DataFrame({\n", + " 'Species': ['Homo sapiens', 'Mus musculus', 'Rattus norvegicus', 'Macaca mulatta'],\n", + " 'Count': [45, 78, 32, 23],\n", + " 'Percentage': [25.3, 43.8, 18.0, 12.9]\n", + " }),\n", + " 'techniques': pd.DataFrame({\n", + " 'Technique': ['Electrophysiology', 'Neuroimaging', 'Microscopy', 'Behavioral', 'Molecular'],\n", + " 'Count': [89, 67, 45, 34, 28],\n", + " 'Avg_Size_GB': [2.3, 15.7, 8.2, 0.8, 1.2]\n", + " }),\n", + " 'temporal': pd.DataFrame({\n", + " 'Year': [2018, 2019, 2020, 2021, 2022, 2023, 2024],\n", + " 'Datasets': [12, 18, 25, 34, 41, 38, 29],\n", + " 'Cumulative': [12, 30, 55, 89, 130, 168, 197]\n", + " })\n", + " }\n", + "\n", + "# Use real data if available, otherwise demo data\n", + "viz_data = create_demo_data()\n", + "\n", + "# Update with real data if we have query results\n", + "if 'by_species' in query_results and not query_results['by_species'].empty:\n", + " species_counts = query_results['by_species']['speciesName'].value_counts()\n", + " viz_data['species'] = pd.DataFrame({\n", + " 'Species': species_counts.index,\n", + " 'Count': species_counts.values,\n", + " 'Percentage': (species_counts.values / species_counts.sum() * 100).round(1)\n", + " })\n", + "\n", + "# Create visualizations\n", + "fig = make_subplots(\n", + " rows=2, cols=2,\n", + " subplot_titles=('Species Distribution', 'Techniques Used', 'Dataset Growth', 'Data Size by Technique'),\n", + " specs=[[{\"type\": \"pie\"}, {\"type\": \"bar\"}],\n", + " [{\"type\": \"scatter\"}, {\"type\": \"bar\"}]]\n", + ")\n", + "\n", + "# Species pie chart\n", + "fig.add_trace(\n", + " go.Pie(\n", + " labels=viz_data['species']['Species'],\n", + " values=viz_data['species']['Count'],\n", + " name=\"Species\"\n", + " ),\n", + " row=1, col=1\n", + ")\n", + "\n", + "# Techniques bar chart\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=viz_data['techniques']['Technique'],\n", + " y=viz_data['techniques']['Count'],\n", + " name=\"Techniques\",\n", + " marker_color='lightblue'\n", + " ),\n", + " row=1, col=2\n", + ")\n", + "\n", + "# Temporal growth line chart\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=viz_data['temporal']['Year'],\n", + " y=viz_data['temporal']['Datasets'],\n", + " mode='lines+markers',\n", + " name=\"Annual Datasets\",\n", + " line=dict(color='green')\n", + " ),\n", + " row=2, col=1\n", + ")\n", + "\n", + "fig.add_trace(\n", + " go.Scatter(\n", + " x=viz_data['temporal']['Year'],\n", + " y=viz_data['temporal']['Cumulative'],\n", + " mode='lines+markers',\n", + " name=\"Cumulative\",\n", + " line=dict(color='orange'),\n", + " yaxis='y2'\n", + " ),\n", + " row=2, col=1\n", + ")\n", + "\n", + "# Data size bar chart\n", + "fig.add_trace(\n", + " go.Bar(\n", + " x=viz_data['techniques']['Technique'],\n", + " y=viz_data['techniques']['Avg_Size_GB'],\n", + " name=\"Avg Size (GB)\",\n", + " marker_color='coral'\n", + " ),\n", + " row=2, col=2\n", + ")\n", + "\n", + "# Update layout\n", + "fig.update_layout(\n", + " height=800,\n", + " showlegend=True,\n", + " title_text=\"MINDS Data Analytics Dashboard\",\n", + " title_x=0.5\n", + ")\n", + "\n", + "# Show the plot\n", + "fig.show()\n", + "\n", + "# Summary statistics\n", + "print(\"\\nšŸ“ˆ Summary Statistics:\")\n", + "print(f\"• Total datasets analyzed: {viz_data['species']['Count'].sum()}\")\n", + "print(f\"• Most common species: {viz_data['species'].iloc[0]['Species']} ({viz_data['species'].iloc[0]['Percentage']}%)\")\n", + "print(f\"• Most used technique: {viz_data['techniques'].iloc[0]['Technique']} ({viz_data['techniques'].iloc[0]['Count']} datasets)\")\n", + "print(f\"• Average data size: {viz_data['techniques']['Avg_Size_GB'].mean():.1f} GB\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9273b8b", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"šŸ”„ Interactive MINDS Data Explorer\")\n", + "print(\"=\" * 35)\n", + "\n", + "def create_interactive_explorer():\n", + " \"\"\"Create interactive widgets for data exploration\"\"\"\n", + " \n", + " # Widget definitions\n", + " query_type = widgets.Dropdown(\n", + " options=[\n", + " ('Basic MINDS Search', 'basic'),\n", + " ('By Species', 'species'),\n", + " ('By Technique', 'technique'), \n", + " ('Spatial Data', 'spatial'),\n", + " ('Recent Data', 'recent')\n", + " ],\n", + " value='basic',\n", + " description='Query Type:'\n", + " )\n", + " \n", + " species_filter = widgets.Dropdown(\n", + " options=['All'] + SPECIES_FILTERS,\n", + " value='All',\n", + " description='Species:'\n", + " )\n", + " \n", + " technique_filter = widgets.Dropdown(\n", + " options=['All'] + TECHNIQUE_FILTERS,\n", + " value='All', \n", + " description='Technique:'\n", + " )\n", + " \n", + " limit_slider = widgets.IntSlider(\n", + " value=10,\n", + " min=5,\n", + " max=50,\n", + " step=5,\n", + " description='Results:'\n", + " )\n", + " \n", + " search_button = widgets.Button(\n", + " description='Search MINDS Data',\n", + " button_style='primary',\n", + " icon='search'\n", + " )\n", + " \n", + " output_area = widgets.Output()\n", + " \n", + " def on_search_click(b):\n", + " \"\"\"Handle search button click\"\"\"\n", + " with output_area:\n", + " output_area.clear_output()\n", + " print(\"šŸ” Searching MINDS data...\")\n", + " \n", + " # Build search parameters\n", + " search_params = {\n", + " 'query_type': query_type.value,\n", + " 'species': species_filter.value if species_filter.value != 'All' else None,\n", + " 'technique': technique_filter.value if technique_filter.value != 'All' else None,\n", + " 'limit': limit_slider.value\n", + " }\n", + " \n", + " # Execute search based on type\n", + " try:\n", + " if search_params['query_type'] == 'basic':\n", + " results = public_access.search_datasets(\"MINDS\", size=search_params['limit'])\n", + " elif search_params['query_type'] == 'species':\n", + " species_query = search_params['species'] or 'Homo sapiens'\n", + " results = public_access.search_datasets(\"\", species=species_query, size=search_params['limit'])\n", + " else:\n", + " # Use SPARQL for other queries\n", + " template_map = {\n", + " 'spatial': 'spatial',\n", + " 'recent': 'temporal',\n", + " 'technique': 'basic_minds'\n", + " }\n", + " template = template_map.get(search_params['query_type'], 'basic_minds')\n", + " results = querier.query_template(template)\n", + " \n", + " # Display results\n", + " if isinstance(results, pd.DataFrame) and not results.empty:\n", + " print(f\"āœ… Found {len(results)} results\")\n", + " display(results.head(search_params['limit']))\n", + " \n", + " # Create quick visualization\n", + " if len(results) > 3:\n", + " try:\n", + " if 'species' in results.columns:\n", + " species_counts = results['species'].value_counts().head(5)\n", + " plt.figure(figsize=(10, 4))\n", + " species_counts.plot(kind='bar')\n", + " plt.title('Top Species in Search Results')\n", + " plt.xticks(rotation=45)\n", + " plt.tight_layout()\n", + " plt.show()\n", + " except:\n", + " pass\n", + " else:\n", + " print(\"āŒ No results found with current parameters\")\n", + " \n", + " except Exception as e:\n", + " print(f\"āŒ Search error: {e}\")\n", + " \n", + " search_button.on_click(on_search_click)\n", + " \n", + " # Layout widgets\n", + " controls = widgets.VBox([\n", + " widgets.HTML(\"

šŸ” MINDS Data Search Interface

\"),\n", + " query_type,\n", + " widgets.HBox([species_filter, technique_filter]),\n", + " limit_slider,\n", + " search_button\n", + " ])\n", + " \n", + " return widgets.VBox([controls, output_area])\n", + "\n", + "# Create and display the interactive explorer\n", + "explorer = create_interactive_explorer()\n", + "display(explorer)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "412088af", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"šŸ”— MINDS Data Integration Examples\")\n", + "print(\"=\" * 38)\n", + "\n", + "def demonstrate_data_integration():\n", + " \"\"\"Show how MINDS data integrates with other neuroscience resources\"\"\"\n", + " \n", + " print(\"1ļøāƒ£ MINDS + Brain Atlases Integration\")\n", + " print(\"-\" * 40)\n", + " \n", + " # Example: Link MINDS datasets with brain atlas regions\n", + " integration_example = \"\"\"\n", + " PREFIX openminds: \n", + " PREFIX sands: \n", + " \n", + " SELECT ?dataset ?atlas ?region ?coordinates\n", + " WHERE {\n", + " ?dataset a openminds:Dataset ;\n", + " openminds:spatialLocation ?location .\n", + " ?location sands:atlas ?atlas ;\n", + " sands:brainRegion ?region ;\n", + " sands:coordinates ?coordinates .\n", + " FILTER(CONTAINS(LCASE(str(?dataset)), \"minds\"))\n", + " }\n", + " \"\"\"\n", + " \n", + " print(\"Example SPARQL query for spatial integration:\")\n", + " print(integration_example)\n", + " \n", + " print(\"\\n2ļøāƒ£ MINDS + Neuroshapes Schema Validation\")\n", + " print(\"-\" * 45)\n", + " \n", + " validation_example = \"\"\"\n", + " # Python code to validate MINDS data against neuroshapes\n", + " from rdflib import Graph\n", + " \n", + " def validate_against_neuroshapes(dataset_uri):\n", + " # Load dataset RDF\n", + " dataset_graph = Graph()\n", + " dataset_graph.parse(dataset_uri)\n", + " \n", + " # Load neuroshapes schema\n", + " schema_graph = Graph()\n", + " schema_graph.parse(\"https://neuroshapes.org/schemas/dataset\")\n", + " \n", + " # Perform validation\n", + " # (This would use SHACL validation in practice)\n", + " return validation_results\n", + " \"\"\"\n", + " \n", + " print(\"Python integration example:\")\n", + " print(validation_example)\n", + " \n", + " print(\"\\n3ļøāƒ£ Cross-Database Queries\")\n", + " print(\"-\" * 28)\n", + " \n", + " federated_example = \"\"\"\n", + " # Federated query example combining MINDS + Wikidata\n", + " SELECT ?dataset ?species ?wikidataInfo\n", + " WHERE {\n", + " # MINDS data\n", + " ?dataset openminds:studiedSpecies ?species .\n", + " \n", + " # Link to external knowledge\n", + " SERVICE {\n", + " ?species rdfs:label ?wikidataInfo .\n", + " FILTER(LANG(?wikidataInfo) = \"en\")\n", + " }\n", + " }\n", + " \"\"\"\n", + " \n", + " print(\"Federated query example:\")\n", + " print(federated_example)\n", + "\n", + "demonstrate_data_integration()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39174cb2", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"šŸ’” Best Practices for MINDS Data Access\")\n", + "print(\"=\" * 42)\n", + "\n", + "best_practices = {\n", + " \"Authentication\": [\n", + " \"Always use secure token storage\",\n", + " \"Refresh tokens regularly\", \n", + " \"Never commit tokens to version control\",\n", + " \"Use environment variables for production\"\n", + " ],\n", + " \n", + " \"Query Optimization\": [\n", + " \"Use LIMIT clauses to avoid large result sets\",\n", + " \"Filter early in your SPARQL queries\",\n", + " \"Cache frequently used results\",\n", + " \"Use specific property paths instead of wildcards\"\n", + " ],\n", + " \n", + " \"Error Handling\": [\n", + " \"Always wrap API calls in try-catch blocks\",\n", + " \"Implement exponential backoff for retries\",\n", + " \"Log errors for debugging\",\n", + " \"Provide fallback options for users\"\n", + " ],\n", + " \n", + " \"Data Processing\": [\n", + " \"Validate data before processing\",\n", + " \"Handle missing values gracefully\",\n", + " \"Use appropriate data types\",\n", + " \"Document your data transformations\"\n", + " ],\n", + " \n", + " \"Performance\": [\n", + " \"Use pagination for large datasets\",\n", + " \"Implement result caching\",\n", + " \"Batch API calls when possible\",\n", + " \"Monitor rate limits\"\n", + " ]\n", + "}\n", + "\n", + "for category, practices in best_practices.items():\n", + " print(f\"\\n {category}:\")\n", + " for i, practice in enumerate(practices, 1):\n", + " print(f\" {i}. {practice}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3af5420", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\nšŸ”§ Common Issues and Solutions\")\n", + "print(\"=\" * 35)\n", + "\n", + "troubleshooting = {\n", + " \"Authentication Errors\": {\n", + " \"Problem\": \"401 Unauthorized or 403 Forbidden\",\n", + " \"Solutions\": [\n", + " \"Check token validity and expiration\",\n", + " \"Verify token permissions\",\n", + " \"Ensure correct Authorization header format\"\n", + " ]\n", + " },\n", + " \n", + " \"Query Timeouts\": {\n", + " \"Problem\": \"Queries taking too long or timing out\",\n", + " \"Solutions\": [\n", + " \"Add LIMIT clauses to queries\",\n", + " \"Optimize query structure\",\n", + " \"Use more specific filters\",\n", + " \"Break complex queries into smaller parts\"\n", + " ]\n", + " },\n", + " \n", + " \"Empty Results\": {\n", + " \"Problem\": \"Queries return no data\",\n", + " \"Solutions\": [\n", + " \"Check query syntax and semantics\",\n", + " \"Verify property URIs and namespaces\", \n", + " \"Start with broader queries and narrow down\",\n", + " \"Check data availability in target endpoints\"\n", + " ]\n", + " },\n", + " \n", + " \"Network Issues\": {\n", + " \"Problem\": \"Connection errors or slow responses\",\n", + " \"Solutions\": [\n", + " \"Check internet connectivity\",\n", + " \"Verify endpoint URLs\",\n", + " \"Implement retry logic\",\n", + " \"Use appropriate timeout settings\"\n", + " ]\n", + " }\n", + "}\n", + "\n", + "for issue, details in troubleshooting.items():\n", + " print(f\"\\n {issue}\")\n", + " print(f\" Problem: {details['Problem']}\")\n", + " print(\" Solutions:\")\n", + " for i, solution in enumerate(details['Solutions'], 1):\n", + " print(f\" {i}. {solution}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01745122", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n Next Steps and Additional Resources\")\n", + "print(\"=\" * 42)\n", + "\n", + "resources = {\n", + " \"EBRAINS Platform\": [\n", + " \"Main portal: https://ebrains.eu/\",\n", + " \"Data search: https://search.kg.ebrains.eu/\",\n", + " \"Documentation: https://docs.ebrains.eu/\",\n", + " \"Knowledge Graph: https://kg.ebrains.eu/\"\n", + " ],\n", + " \n", + " \"SPARQL Learning\": [\n", + " \"W3C SPARQL Tutorial: https://www.w3.org/TR/sparql11-query/\",\n", + " \"SPARQL by Example: https://www.cambridge.org/core/books/learning-sparql/\",\n", + " \"Interactive SPARQL: https://query.wikidata.org/\",\n", + " \"SPARQL Playground: https://yasgui.triply.cc/\"\n", + " ],\n", + " \n", + " \"Neuroscience Standards\": [\n", + " \"Neuroshapes: https://neuroshapes.org/\",\n", + " \"BIDS: https://bids.neuroimaging.io/\",\n", + " \"NIDM: http://nidm.nidash.org/\",\n", + " \"FAIR principles: https://www.go-fair.org/fair-principles/\"\n", + " ],\n", + " \n", + " \"Development Tools\": [\n", + " \"EBRAINS SDK: https://ebrains-kg-core.readthedocs.io/\",\n", + " \"RDFLib: https://rdflib.readthedocs.io/\",\n", + " \"SPARQLWrapper: https://sparqlwrapper.readthedocs.io/\",\n", + " \"Jupyter Notebooks: https://jupyter.org/\"\n", + " ]\n", + "}\n", + "\n", + "for category, links in resources.items():\n", + " print(f\"\\nšŸ“š {category}:\")\n", + " for link in links:\n", + " print(f\" • {link}\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"šŸŽ‰ Tutorial Complete!\")\n", + "print(\"You now have comprehensive access to MINDS data through\")\n", + "print(\"SPARQL queries and REST APIs. Happy data exploration!\")\n", + "print(\"=\"*60)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/minds_data_access/requirements.txt b/notebooks/minds_data_access/requirements.txt new file mode 100644 index 00000000..2666a6b0 --- /dev/null +++ b/notebooks/minds_data_access/requirements.txt @@ -0,0 +1,11 @@ +jupyter>=1.0.0 +pandas>=1.3.0 +matplotlib>=3.5.0 +seaborn>=0.11.0 +requests>=2.25.0 +SPARQLWrapper>=2.0.0 +rdflib>=6.0.0 +ebrains-kg-core==0.9.20 +python-dotenv>=0.19.0 +ipywidgets>=7.6.0 +plotly>=5.0.0 diff --git a/notebooks/minds_data_access/test_tutorial.py b/notebooks/minds_data_access/test_tutorial.py new file mode 100644 index 00000000..f21889d5 --- /dev/null +++ b/notebooks/minds_data_access/test_tutorial.py @@ -0,0 +1,140 @@ +""" +Test suite for MINDS data access tutorial +""" + +import unittest +import sys +import os +sys.path.append(os.path.dirname(__file__)) + +import pandas as pd +import requests +from unittest.mock import patch, MagicMock + +from config import * +from minds_queries import QUERY_TEMPLATES + +class TestMindsDataAccess(unittest.TestCase): + """Test the MINDS data access functionality""" + + def test_config_values(self): + """Test that configuration values are properly set""" + self.assertTrue(EBRAINS_BASE_URL.startswith('https://')) + self.assertTrue(EBRAINS_SEARCH_URL.startswith('https://')) + self.assertIsInstance(MINDS_DATA_TYPES, list) + self.assertGreater(len(MINDS_DATA_TYPES), 0) + + def test_query_templates(self): + """Test that all query templates are valid SPARQL""" + for template_name, query in QUERY_TEMPLATES.items(): + self.assertIsInstance(query, str) + self.assertIn('SELECT', query.upper()) + self.assertIn('WHERE', query.upper()) + + @patch('requests.get') + def test_public_search_api(self, mock_get): + """Test public search API functionality""" + # Mock successful response + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + 'hits': { + 'hits': [ + { + '_id': 'test_123', + '_source': { + 'title': 'Test Dataset', + 'description': 'Test description', + 'type': 'Dataset', + 'species': ['Homo sapiens'], + 'techniques': ['electrophysiology'], + 'contributors': [{'name': 'Test Author'}] + } + } + ] + } + } + mock_get.return_value = mock_response + + # Test the search functionality + from minds_sparql_tutorial import PublicMindsAccess + searcher = PublicMindsAccess() + results = searcher.search_datasets("test") + + self.assertIsInstance(results, pd.DataFrame) + self.assertGreater(len(results), 0) + self.assertIn('title', results.columns) + + def test_sparql_query_construction(self): + """Test SPARQL query construction""" + query = QUERY_TEMPLATES['basic_minds'] + + # Check for required SPARQL elements + self.assertIn('PREFIX', query) + self.assertIn('openminds:', query) + self.assertIn('schema:', query) + self.assertIn('FILTER', query) + self.assertIn('LIMIT', query) + + def test_authentication_class(self): + """Test authentication class structure""" + from minds_sparql_tutorial import EBRAINSAuthenticator + + auth = EBRAINSAuthenticator() + self.assertIsNone(auth.token) + self.assertIsNone(auth.client) + + # Test with fake token + auth.token = "fake_token" + self.assertEqual(auth.token, "fake_token") + +class TestDataProcessing(unittest.TestCase): + """Test data processing and visualization functions""" + + def test_demo_data_creation(self): + """Test that demo data is created correctly""" + # This would test the create_demo_data function + demo_data = { + 'species': pd.DataFrame({ + 'Species': ['Homo sapiens', 'Mus musculus'], + 'Count': [45, 78], + 'Percentage': [25.3, 43.8] + }) + } + + self.assertIsInstance(demo_data['species'], pd.DataFrame) + self.assertEqual(len(demo_data['species']), 2) + self.assertIn('Species', demo_data['species'].columns) + + def test_query_result_processing(self): + """Test processing of query results""" + raw_sparql_results = { + 'results': { + 'bindings': [ + { + 'dataset': {'value': 'http://example.com/dataset1'}, + 'name': {'value': 'Test Dataset 1'} + }, + { + 'dataset': {'value': 'http://example.com/dataset2'}, + 'name': {'value': 'Test Dataset 2'} + } + ] + } + } + + # Test the _process_sparql_results method + from minds_sparql_tutorial import MindsDataQuerier, EBRAINSAuthenticator + + auth = EBRAINSAuthenticator() + querier = MindsDataQuerier(auth) + + results = querier._process_sparql_results(raw_sparql_results) + + self.assertEqual(len(results), 2) + self.assertIn('dataset', results[0]) + self.assertIn('name', results[0]) + +if __name__ == '__main__': + # Run the tests + unittest.main(verbosity=2)