diff --git a/api/models/gaia.py b/api/models/gaia.py index 7745a87..b680e99 100644 --- a/api/models/gaia.py +++ b/api/models/gaia.py @@ -1,5 +1,18 @@ +from typing import List +from sqlalchemy import ForeignKey +from sqlalchemy.orm import relationship from api import db -from sqlalchemy.dialects.mysql import json + + +class Genes(db.Model): + __bind_key__ = "gaia" + __tablename__ = "genes" + + id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True) + species: db.Mapped[str] = db.mapped_column(db.String(64), nullable=False) + locus: db.Mapped[str] = db.mapped_column(db.String(64), nullable=True) + geneid: db.Mapped[str] = db.mapped_column(db.String(32), nullable=True) + children: db.Mapped[List["Aliases"]] = relationship() class Aliases(db.Model): @@ -7,4 +20,5 @@ class Aliases(db.Model): __tablename__ = "aliases" id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True) - data: db.Mapped[json] = db.mapped_column(db.JSON, nullable=True, primary_key=False) + genes_id: db.Mapped[int] = db.mapped_column(ForeignKey("genes.id", ondelete="CASCADE"), nullable=False) + alias: db.Mapped[str] = db.mapped_column(db.String(256), nullable=False) diff --git a/api/resources/gaia.py b/api/resources/gaia.py index 611ae9c..4098a34 100644 --- a/api/resources/gaia.py +++ b/api/resources/gaia.py @@ -2,8 +2,8 @@ from markupsafe import escape from api import db from api.utils.bar_utils import BARUtils -from api.models.gaia import Aliases -from sqlalchemy import func +from api.models.gaia import Genes, Aliases +from sqlalchemy import func, or_ import json gaia = Namespace("Gaia", description="Gaia", path="/gaia") @@ -18,23 +18,51 @@ def get(self, identifier=""): identifier = escape(identifier) # Is it valid - if BARUtils.is_alphanumeric(identifier): - # Convert to json - identifier_json = json.dumps([identifier]) - - # Get data - # Note: SQLAlchmemy or_ did not work here. Query had AND for some reason. - query = db.select(Aliases).filter( - (func.json_contains(func.lower(Aliases.data), func.lower(identifier_json), "$.aliases")) - | (func.json_extract(func.lower(Aliases.data), "$.geneid") == func.lower(identifier)) - | (func.json_extract(func.lower(Aliases.data), "$.locus") == func.lower(identifier)), - ) - row = db.session.execute(query).scalars().first() + if BARUtils.is_gaia_alias(identifier): + + # Check if alias exists + # Note: This check can be done in on query, but optimizer is not using indexes for some reason + # Also, GAIA only uses the first result + query = db.select(Aliases.genes_id, Aliases.alias).filter(Aliases.alias == identifier) + row = db.session.execute(query).fetchone() if row: - return BARUtils.success_exit(row.data) + # Alias exists. Get the genes_id + query_id = row.genes_id + + else: + # Alias doesn't exist. Get the genes_id if it's locus or ncbi id + query = db.select(Genes.id).filter(or_(Genes.locus == identifier, Genes.geneid == identifier)) + row = db.session.execute(query).fetchone() + + if row: + query_id = row.id + else: + return BARUtils.error_exit("Nothing found"), 404 + + # Left join is important in case aliases do not exist for the given locus / geneid + query = ( + db.select(Genes.species, Genes.locus, Genes.geneid, func.json_arrayagg(Aliases.alias).label("aliases")) + .select_from(Genes) + .outerjoin(Aliases, Aliases.genes_id == Genes.id) + .filter(Genes.id == query_id) + ) + + result = db.session.execute(query).fetchone() + + # See if aliases exists + if result.aliases: + aliases = json.loads(result.aliases) else: - return BARUtils.error_exit("Nothing found"), 404 + aliases = [] + + data = { + "species": result.species, + "locus": result.locus, + "geneid": result.geneid, + "aliases": aliases, + } + return BARUtils.success_exit(data) else: return BARUtils.error_exit("Invalid identifier"), 400 diff --git a/api/utils/bar_utils.py b/api/utils/bar_utils.py index 0153a58..9cff5fd 100644 --- a/api/utils/bar_utils.py +++ b/api/utils/bar_utils.py @@ -253,12 +253,12 @@ def is_integer(data): return False @staticmethod - def is_alphanumeric(data): - """Check if the input is alphanumeric. + def is_gaia_alias(data): + """Check if the input is a valid gaia alias. :param data - :return: True if alphanumeric + :return: True if valid gaia alias """ - if re.search(r"^[a-z0-9]{1,50}$", data, re.I): + if re.search(r"^[a-z0-9_]{1,50}$", data, re.I): return True else: return False