Skip to content

Commit 17063b4

Browse files
authored
Merge pull request #285 from asherpasha/dev
Worked on the new gaia databases
2 parents 78bf046 + c5174d2 commit 17063b4

File tree

3 files changed

+64
-22
lines changed

3 files changed

+64
-22
lines changed

api/models/gaia.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,24 @@
1+
from typing import List
2+
from sqlalchemy import ForeignKey
3+
from sqlalchemy.orm import relationship
14
from api import db
2-
from sqlalchemy.dialects.mysql import json
5+
6+
7+
class Genes(db.Model):
8+
__bind_key__ = "gaia"
9+
__tablename__ = "genes"
10+
11+
id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True)
12+
species: db.Mapped[str] = db.mapped_column(db.String(64), nullable=False)
13+
locus: db.Mapped[str] = db.mapped_column(db.String(64), nullable=True)
14+
geneid: db.Mapped[str] = db.mapped_column(db.String(32), nullable=True)
15+
children: db.Mapped[List["Aliases"]] = relationship()
316

417

518
class Aliases(db.Model):
619
__bind_key__ = "gaia"
720
__tablename__ = "aliases"
821

922
id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True)
10-
data: db.Mapped[json] = db.mapped_column(db.JSON, nullable=True, primary_key=False)
23+
genes_id: db.Mapped[int] = db.mapped_column(ForeignKey("genes.id", ondelete="CASCADE"), nullable=False)
24+
alias: db.Mapped[str] = db.mapped_column(db.String(256), nullable=False)

api/resources/gaia.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
from markupsafe import escape
33
from api import db
44
from api.utils.bar_utils import BARUtils
5-
from api.models.gaia import Aliases
6-
from sqlalchemy import func
5+
from api.models.gaia import Genes, Aliases
6+
from sqlalchemy import func, or_
77
import json
88

99
gaia = Namespace("Gaia", description="Gaia", path="/gaia")
@@ -18,23 +18,51 @@ def get(self, identifier=""):
1818
identifier = escape(identifier)
1919

2020
# Is it valid
21-
if BARUtils.is_alphanumeric(identifier):
22-
# Convert to json
23-
identifier_json = json.dumps([identifier])
24-
25-
# Get data
26-
# Note: SQLAlchmemy or_ did not work here. Query had AND for some reason.
27-
query = db.select(Aliases).filter(
28-
(func.json_contains(func.lower(Aliases.data), func.lower(identifier_json), "$.aliases"))
29-
| (func.json_extract(func.lower(Aliases.data), "$.geneid") == func.lower(identifier))
30-
| (func.json_extract(func.lower(Aliases.data), "$.locus") == func.lower(identifier)),
31-
)
32-
row = db.session.execute(query).scalars().first()
21+
if BARUtils.is_gaia_alias(identifier):
22+
23+
# Check if alias exists
24+
# Note: This check can be done in on query, but optimizer is not using indexes for some reason
25+
# Also, GAIA only uses the first result
26+
query = db.select(Aliases.genes_id, Aliases.alias).filter(Aliases.alias == identifier)
27+
row = db.session.execute(query).fetchone()
3328

3429
if row:
35-
return BARUtils.success_exit(row.data)
30+
# Alias exists. Get the genes_id
31+
query_id = row.genes_id
32+
33+
else:
34+
# Alias doesn't exist. Get the genes_id if it's locus or ncbi id
35+
query = db.select(Genes.id).filter(or_(Genes.locus == identifier, Genes.geneid == identifier))
36+
row = db.session.execute(query).fetchone()
37+
38+
if row:
39+
query_id = row.id
40+
else:
41+
return BARUtils.error_exit("Nothing found"), 404
42+
43+
# Left join is important in case aliases do not exist for the given locus / geneid
44+
query = (
45+
db.select(Genes.species, Genes.locus, Genes.geneid, func.json_arrayagg(Aliases.alias).label("aliases"))
46+
.select_from(Genes)
47+
.outerjoin(Aliases, Aliases.genes_id == Genes.id)
48+
.filter(Genes.id == query_id)
49+
)
50+
51+
result = db.session.execute(query).fetchone()
52+
53+
# See if aliases exists
54+
if result.aliases:
55+
aliases = json.loads(result.aliases)
3656
else:
37-
return BARUtils.error_exit("Nothing found"), 404
57+
aliases = []
58+
59+
data = {
60+
"species": result.species,
61+
"locus": result.locus,
62+
"geneid": result.geneid,
63+
"aliases": aliases,
64+
}
65+
return BARUtils.success_exit(data)
3866

3967
else:
4068
return BARUtils.error_exit("Invalid identifier"), 400

api/utils/bar_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -253,12 +253,12 @@ def is_integer(data):
253253
return False
254254

255255
@staticmethod
256-
def is_alphanumeric(data):
257-
"""Check if the input is alphanumeric.
256+
def is_gaia_alias(data):
257+
"""Check if the input is a valid gaia alias.
258258
:param data
259-
:return: True if alphanumeric
259+
:return: True if valid gaia alias
260260
"""
261-
if re.search(r"^[a-z0-9]{1,50}$", data, re.I):
261+
if re.search(r"^[a-z0-9_]{1,50}$", data, re.I):
262262
return True
263263
else:
264264
return False

0 commit comments

Comments
 (0)