Skip to content

Commit 22493b2

Browse files
authored
Merge pull request #1 from ODM2/develop
PreRelease 0.0.1
2 parents 4417cdb + 73d7bfd commit 22493b2

21 files changed

+768
-0
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#build files
2+
dist/*
3+
*.egg-info
4+
5+
*.pyc

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# ODM2DataModels
2+
3+
## What is this?
4+
odm2datamodels is a Python package that provides a set of object-relational mapping (ORM) data models for the [Observations Data Model Version 2.1](http://www.odm2.org/). This data models are built of the [SQLAlchemy](https://www.sqlalchemy.org/) and provide a convenient way of interfacing with an ODM2.1 database.
5+
6+
## Core Features
7+
The primary is the `ODM2DataModels` class, which once instantiated, provides access to the set of ORM ODM2.1 data models and an instance of an ODM2Engine which provide utility function to perform basic Create, Read, Update, Delete operations as well are read execution of custom SQLQueries constructed using a SQLAlchemy [Select object](https://docs.sqlalchemy.org/en/14/orm/queryguide.html#select-statements) or [Query Object](https://docs.sqlalchemy.org/en/14/orm/query.html#sqlalchemy.orm.Query)
8+
9+
## How to install?
10+
Presently the build files are only available on our [github repository](https://github.com/ODM2/ODM2DataModels)
11+
12+
Though we are aiming to release to the [Python Package Index (PyPI)](https://pypi.org/) and [Conda](https://docs.conda.io/en/latest/) in the near future.
13+
14+
## Testing and Database Dialect Support
15+
### Testing Method
16+
Presently very limited testing has been conducted and has primarily been through an implementation of a REST API with limited coverage of selected data models. Further expanding and automating testing is an area for future updates.
17+
### Database Dialect Support
18+
These data models have only been validated for a PostgreSQL database running a deployment of the ODM2.1 schema.
19+
20+
21+

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[build-system]
2+
requires = ["setuptools>=42"]
3+
build-backend = "setuptools.build_meta"

setup.cfg

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
2+
[metadata]
3+
name = odm2datamodels
4+
description = "Collection of object-relational mapping (ORM) data models for ODM2"
5+
long_description = file: README.md
6+
long_description_content = text/markdown
7+
version = 0.0.1
8+
author = "ODM2 Team"
9+
author_email = ""
10+
url = https://github.com/ODM2/ODM2DataModels
11+
project_urls =
12+
bugtracker = https://github.com/ODM2/ODM2DataModels/issueshttps://github.com/pypa/sampleproject/issues
13+
keywords='Observations Data Model ODM2'
14+
15+
[options]
16+
packages = find:
17+
package_dir =
18+
= src
19+
20+
python_requires = >=3.8
21+
install_requires =
22+
sqlalchemy>=1.4.32
23+
pandas>=1.4
24+
geoalchemy2>=0.6.3
25+
26+
[options.packages.find]
27+
where = src

src/odm2datamodels/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .base import ODM2DataModels as ODM2DataModels

src/odm2datamodels/base.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
import sqlalchemy
2+
from sqlalchemy.sql.expression import Select
3+
from sqlalchemy.orm import Query
4+
from sqlalchemy.ext.automap import automap_base
5+
from sqlalchemy.ext.declarative import declared_attr, declarative_base
6+
import geoalchemy2
7+
8+
import pickle
9+
from enum import Enum
10+
from typing import Dict, Union, Any, Type
11+
import warnings
12+
13+
import pandas as pd
14+
15+
from .exceptions import ObjectNotFound
16+
17+
from .models import annotations
18+
from .models import auth
19+
from .models import core
20+
from .models import cv
21+
from .models import dataquality
22+
from .models import equipment
23+
from .models import extensionproperties
24+
from .models import externalidentifiers
25+
from .models import labanalyses
26+
from .models import provenance
27+
from .models import results
28+
from .models import samplingfeatures
29+
from .models import simulation
30+
31+
32+
class OutputFormats(Enum):
33+
JSON ='JSON'
34+
DATAFRAME = 'DATAFRAME'
35+
DICT = 'DICT'
36+
37+
class Base():
38+
39+
@declared_attr
40+
def __tablename__(self) -> str:
41+
cls_name = str(self.__name__)
42+
return cls_name.lower()
43+
44+
@classmethod
45+
def from_dict(cls, attributes_dict:Dict) -> object:
46+
"""Alternative constructor that uses dictionary to populate attributes"""
47+
instance = cls.__new__(cls)
48+
instance.__init__()
49+
for key, value in attributes_dict.items():
50+
if hasattr(instance, key):
51+
if value == '': value = None
52+
setattr(instance, key, value)
53+
return instance
54+
55+
def to_dict(self) -> Dict[str,Any]:
56+
"""Converts attributes into a dictionary"""
57+
columns = self.__table__.columns.keys()
58+
output_dict = {}
59+
for column in columns:
60+
output_dict[column] = getattr(self,column)
61+
return output_dict
62+
63+
def update_from_dict(self, attributes_dict:Dict[str, any]) -> None:
64+
"""Updates instance attributes based on provided dictionary"""
65+
for key, value in attributes_dict.items():
66+
if hasattr(self, key):
67+
if value == '': value = None
68+
setattr(self, key, value)
69+
70+
@classmethod
71+
def get_pkey_name(cls) -> Union[str,None]:
72+
""" Returns the primary key field name for a given model"""
73+
columns = cls.__table__.columns
74+
for column in columns:
75+
if column.primary_key: return column.name
76+
return None
77+
78+
class ODM2Engine:
79+
80+
def __init__(self, session_maker:sqlalchemy.orm.sessionmaker) -> None:
81+
self.session_maker = session_maker
82+
83+
def read_query(self,
84+
query: Union[Query, Select],
85+
output_format:OutputFormats=OutputFormats.JSON,
86+
orient:str='records') -> Union[str, pd.DataFrame]:
87+
with self.session_maker() as session:
88+
if isinstance(query, Select):
89+
df = pd.read_sql(query, session.bind)
90+
else:
91+
df = pd.read_sql(query.statement, session.bind)
92+
93+
if output_format == OutputFormats.JSON:
94+
return df.to_json(orient=orient)
95+
elif output_format == OutputFormats.DATAFRAME:
96+
return df
97+
elif output_format == OutputFormats.DICT:
98+
return df.to_dict()
99+
raise TypeError("Unknown output format")
100+
101+
def insert_query(self) -> None:
102+
"""Placeholder for bulk insert"""
103+
#accept dataframe & model
104+
#use pandas to_sql method to perform insert
105+
#if except return false or maybe raise error
106+
#else return true
107+
raise NotImplementedError
108+
109+
def create_object(self, obj:object) -> Union[int, str]:
110+
pkey_name = obj.get_pkey_name()
111+
setattr(obj, pkey_name, None)
112+
113+
with self.session_maker() as session:
114+
session.add(obj)
115+
session.commit()
116+
pkey_value = getattr(obj, pkey_name)
117+
return pkey_value
118+
119+
def read_object(self, model:Type[Base], pkey:Union[int, str],
120+
output_format: OutputFormats=OutputFormats.DICT,
121+
orient:str='records') -> Dict[str, Any]:
122+
123+
with self.session_maker() as session:
124+
obj = session.get(model, pkey)
125+
pkey_name = model.get_pkey_name()
126+
if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
127+
session.commit()
128+
129+
obj_dict = obj.to_dict()
130+
if output_format == OutputFormats.DICT:
131+
return obj_dict
132+
133+
else:
134+
# convert to series if only one row
135+
keys = list(obj_dict.keys())
136+
if not isinstance(obj_dict[keys[0]], list):
137+
for key in keys:
138+
new_value = [obj_dict[key]]
139+
obj_dict[key] = new_value
140+
141+
obj_df = pd.DataFrame.from_dict(obj_dict)
142+
if output_format == OutputFormats.DATAFRAME:
143+
return obj_df
144+
elif output_format == OutputFormats.JSON:
145+
return obj_df.to_json(orient=orient)
146+
raise TypeError("Unknown output format")
147+
148+
149+
def update_object(self, model:Type[Base], pkey:Union[int,str], data:Dict[str, Any]) -> None:
150+
if not isinstance(data, dict):
151+
data = data.dict()
152+
pkey_name = model.get_pkey_name()
153+
if pkey_name in data:
154+
data.pop(pkey_name)
155+
with self.session_maker() as session:
156+
obj = session.get(model, pkey)
157+
if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
158+
obj.update_from_dict(data)
159+
session.commit()
160+
161+
def delete_object(self, model:Type[Base], pkey:Union[int, str]) -> None:
162+
with self.session_maker() as session:
163+
obj = session.get(model, pkey)
164+
pkey_name = model.get_pkey_name()
165+
if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
166+
session.delete(obj)
167+
session.commit()
168+
169+
class Models:
170+
171+
def __init__(self, base_model) -> None:
172+
self._base_model = base_model
173+
self._process_schema(annotations)
174+
self._process_schema(auth)
175+
self._process_schema(core)
176+
self._process_schema(cv)
177+
self._process_schema(dataquality)
178+
self._process_schema(equipment)
179+
self._process_schema(extensionproperties)
180+
self._process_schema(externalidentifiers)
181+
self._process_schema(labanalyses)
182+
self._process_schema(provenance)
183+
self._process_schema(results)
184+
self._process_schema(samplingfeatures)
185+
self._process_schema(simulation)
186+
187+
def _process_schema(self, schema:str) -> None:
188+
classes = [c for c in dir(schema) if not c.startswith('__')]
189+
base = tuple([self._base_model])
190+
for class_name in classes:
191+
model = getattr(schema, class_name)
192+
model_attribs = self._trim_dunders(dict(model.__dict__.copy()))
193+
extended_model = type(class_name, base, model_attribs)
194+
setattr(self, class_name, extended_model)
195+
196+
def _trim_dunders(self, dictionary:Dict[str, Any]) -> Dict[str, Any]:
197+
return { k:v for k, v in dictionary.items() if not k.startswith('__') }
198+
199+
class ODM2DataModels():
200+
201+
def __init__(self, engine:sqlalchemy.engine, schema:str='odm2', cache_path:str=None) -> None:
202+
203+
self._schema = schema
204+
self._cache_path = cache_path
205+
206+
self._engine = engine
207+
self._session = sqlalchemy.orm.sessionmaker(self._engine)
208+
self._cached= False
209+
self.odm2_engine: ODM2Engine = ODM2Engine(self._session)
210+
211+
self._model_base = self._prepare_model_base()
212+
self.models = Models(self._model_base)
213+
if not self._cached:
214+
self._prepare_automap_models()
215+
216+
def _prepare_model_base(self):
217+
try:
218+
with open(self._cache_path, 'rb') as file:
219+
metadata = pickle.load(file=file)
220+
self._cached = True
221+
return declarative_base(cls=Base, bind=self._engine, metadata=metadata)
222+
except FileNotFoundError:
223+
metadata = sqlalchemy.MetaData(schema=self._schema)
224+
self._cached = False
225+
return automap_base(cls=Base, metadata=metadata)
226+
227+
def _prepare_automap_models(self):
228+
self._model_base.prepare(self._engine)
229+
if not self._cache_path: return
230+
try:
231+
with open(self._cache_path, 'wb') as file:
232+
pickle.dump(self._model_base.metadata, file)
233+
except FileNotFoundError:
234+
warnings.warn('Unable to cache models which may lead to degraded performance.', RuntimeWarning)

src/odm2datamodels/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class ObjectNotFound(Exception):
2+
3+
def __init__(self, message:str) -> None:
4+
self.message = message
5+
super().__init__()
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from . import annotations
2+
from . import auth
3+
from . import core
4+
from . import cv
5+
from . import dataquality
6+
from . import equipment
7+
from . import extensionproperties
8+
from . import externalidentifiers
9+
from . import labanalyses
10+
from . import provenance
11+
from . import results
12+
from . import samplingfeatures
13+
from . import simulation
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""Data models corresponding to the tables under the ODM2Annotations schema
2+
Reference: http://odm2.github.io/ODM2/schemas/ODM2_Current/schemas/ODM2Annotations.html
3+
"""
4+
5+
class ActionAnnotations():
6+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ActionAnnotations.html"""
7+
8+
class Annotations():
9+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_Annotations.html"""
10+
11+
class CategoricalResultValueAnnotations():
12+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_CategoricalResultValueAnnotations.html"""
13+
14+
class EquipmentAnnotations():
15+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_EquipmentAnnotations.html"""
16+
17+
class MethodAnnotations():
18+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_MethodAnnotations.html"""
19+
20+
class PointCoverageResultValueAnnotations():
21+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_PointCoverageEesultValueAnnotations.html"""
22+
23+
class ProfileResultValueAnnotations():
24+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ProfileResultValueAnnotations.html"""
25+
26+
class ResultAnnotations():
27+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ResultAnnotations.html"""
28+
29+
class SamplingFeatureAnnotations():
30+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SamplingFeatureAnnotations.html"""
31+
32+
class SectionResultValueAnnotations():
33+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SectionResultValueAnnotations.html"""
34+
35+
class SpectraResultValueAnnotations():
36+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SpectraResultValueAnnotations.html"""
37+
38+
class TimeSeriesResultValueAnnotations():
39+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TimeSeriesResultValueAnnotations.html"""
40+
41+
class TrajectoryResultValueAnnotations():
42+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TrajectoryResultValueAnnotations.html"""
43+
44+
class TransectResultValueAnnotations():
45+
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TransectResultValueAnnotations.html"""

src/odm2datamodels/models/auth.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""
2+
"""
3+
4+
class Accounts():
5+
""""""
6+
7+
#PRT - even though this is a CV table it is for account auth so we might move this to the auth module.
8+
class CV_Permission():
9+
""""""
10+
11+
class OrganizationsPermissions():
12+
""""""
13+
14+
class OrganizationsSamplingFeatures():
15+
""""""
16+
17+
class ResultsPermissions():
18+
""""""
19+
20+
class Roles():
21+
""""""
22+
23+
class SamplingFeaturesPermissions():
24+
""""""

0 commit comments

Comments
 (0)