Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/ISSUE_TEMPLATE/bug-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,18 @@ body:
attributes:
label: Describe the bug
description: What went wrong?
placeholder: 'A clear and concise description of the bug... Did you get different data output? Did something work but fail in other ways?'
placeholder: 'A clear and concise description of the bug... Did you get different
data output? Did something work but fail in other ways?'
validations:
required: true

- type: textarea
id: logs
attributes:
label: REDACTED relevant log output
description:
Please copy and paste any relevant log output and/or the stack trace. Removing sensitive
information. This will be automatically formatted into code, so no need for
backticks.
description: Please copy and paste any relevant log output and/or the stack
trace. Removing sensitive information. This will be automatically formatted
into code, so no need for backticks.
render: shell
validations:
required: true
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/code-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@ jobs:
# formatted. The code is not automatically reformatted like it is when running the
# pre-commit hooks.
- uses: psf/black@stable
with:
# Specify the desired Black version
version: "24.10.0"
- uses: isort/isort-action@v1
39 changes: 38 additions & 1 deletion cwms/timeseries/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,13 @@ def combine_timeseries_results(results: List[Data]) -> Data:
combined_json["end"] = combined_df["date-time"].max().isoformat()
combined_json["total"] = len(combined_df)

combined_df["date-time"] = combined_df["date-time"].apply(
lambda x: int(pd.Timestamp(x).timestamp() * 1000)
)
combined_df["date-time"] = combined_df["date-time"].astype("Int64")
combined_df = combined_df.reindex(columns=["date-time", "value", "quality-code"])
# Update the "values" key in the JSON to include the combined data
combined_json["values"] = combined_df.to_dict(orient="records")
combined_json["values"] = combined_df.values.tolist()

# Return a new cwms Data object with the combined DataFrame and updated metadata
return Data(combined_json, selector="values")
Expand Down Expand Up @@ -455,6 +460,32 @@ def store_multi_timeseries_df(
office_id: str,
max_workers: Optional[int] = 30,
) -> None:
"""stored mulitple timeseries from a dataframe. The dataframe must be a metled dataframe with columns
for date-time, value, quality-code(optional), ts_id, units, and version_date(optional). The dataframe will
be grouped by ts_id and version_date and each group will be posted as a separate timeseries using the store_timeseries
function. If version_date column is not included then all data will be stored as unversioned data. If version_date
column is included then data will be grouped by ts_id and version_date and stored as versioned timeseries with the
version date specified in the version_date column.

Parameters
----------
data: dataframe
Time Series data to be stored. Dataframe must be melted with columns for date-time, value, quality-code(optional),
ts_id, units, and version_date(optional).
date-time value quality-code ts_id units version_date
0 2023-12-20T14:45:00.000-05:00 93.1 0 OMA.Stage.Inst.6Hours.0.Fcst-MRBWM-GRFT ft 2024-04-22 07:00:00-05:00
1 2023-12-20T15:00:00.000-05:00 99.8 0 OMA.Stage.Inst.6Hours.0.Fcst-MRBWM-GRFT ft 2024-04-22 07:00:00-05:00
2 2023-12-20T15:15:00.000-05:00 98.5 0 OMA.Stage.Inst.6Hours.0.Fcst-MRBWM-GRFT ft 2024-04-22 07:15:00-05:00
office_id: string
The owning office of the time series(s).
max_workers: Int, Optional, default is None
It is a number of Threads aka size of pool in concurrent.futures.ThreadPoolExecutor.

Returns
-------
None
"""

def store_ts_ids(
data: pd.DataFrame,
ts_id: str,
Expand All @@ -476,6 +507,12 @@ def store_ts_ids(
print(f"Error processing {ts_id}: {e}")
return None

required_columns = ["date-time", "value", "ts_id", "units"]
for col in required_columns:
if col not in data.columns:
raise TypeError(
f"{col} is a required column in data when posting multiple timeseries from a dataframe. Make sure you are using a melted dataframe with columns for date-time, value, quality-code(optional), ts_id, units, and version_date(optional)."
)
ts_data_all = data.copy()
if "version_date" not in ts_data_all.columns:
ts_data_all = ts_data_all.assign(version_date=pd.to_datetime(pd.Series([])))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "cwms-python"
repository = "https://github.com/HydrologicEngineeringCenter/cwms-python"

version = "1.0.1"
version = "1.0.2"


packages = [
Expand Down
11 changes: 6 additions & 5 deletions tests/cda/locations/location_groups_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
# Setup and teardown fixture for test location
@pytest.fixture(scope="module", autouse=True)
def setup_data():

TEST_LATITUDE = 45.1704758
TEST_LONGITUDE = -92.8411439

Expand All @@ -68,9 +67,12 @@ def setup_data():
yield

# Delete location and TS after tests
cwms.delete_location(
location_id=TEST_LOCATION_ID, office_id=TEST_OFFICE, cascade_delete=True
)
try:
cwms.delete_location(
location_id=TEST_LOCATION_ID, office_id=TEST_OFFICE, cascade_delete=True
)
except Exception as e:
print(f"Failed to delete location {TEST_LOCATION_ID}: {e}")


@pytest.fixture(autouse=True)
Expand All @@ -79,7 +81,6 @@ def init_session(request):


def test_store_location_group():

lg.store_location_groups(data=LOC_GROUP_DATA)
data = lg.get_location_group(
loc_group_id=TEST_GROUP_ID,
Expand Down
86 changes: 59 additions & 27 deletions tests/cda/timeseries/timeseries_CDA_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
import cwms.timeseries.timeseries as ts

TEST_OFFICE = "MVP"
TEST_LOCATION_ID = "pytest_group"
TEST_LOCATION_ID = "pytest_ts"
TEST_TSID = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Test"
TEST_TSID_MULTI = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi"
TEST_TSID_MULTI1 = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-1"
TEST_TSID_MULTI2 = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-2"
TEST_TSID_STORE = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Store"
TEST_TSID_CHUNK_MULTI = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-Chunk"
TEST_TSID_COPY = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Copy"
TEST_TSID_DELETE = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Delete"
TS_ID_REV_TEST = TEST_TSID_MULTI.replace("Raw-Multi", "Raw-Rev-Test")
# Generate 15-minute interval timestamps
Expand All @@ -29,6 +30,7 @@
TEST_TSID_MULTI2,
TEST_TSID_STORE,
TEST_TSID_CHUNK_MULTI,
TEST_TSID_COPY,
]


Expand Down Expand Up @@ -71,6 +73,11 @@
[DF_MULTI_TIMESERIES1, DF_MULTI_TIMESERIES2]
).reset_index(drop=True)

DT = datetime(2023, 1, 1, 0, 0, tzinfo=timezone.utc)
EPOCH_MS = int(DT.timestamp() * 1000)
BEGIN = DT - timedelta(minutes=5)
END = DT + timedelta(minutes=5)


@pytest.fixture(scope="module", autouse=True)
def setup_data():
Expand All @@ -91,6 +98,13 @@ def setup_data():
}
cwms.store_location(location)

ts_json = {
"name": TEST_TSID_DELETE,
"office-id": TEST_OFFICE,
"units": "ft",
"values": [[EPOCH_MS, 99, 0]],
}
ts.store_timeseries(ts_json)
yield
for ts_id in TSIDS:
try:
Expand All @@ -108,40 +122,32 @@ def init_session():


def test_store_timeseries():
now = datetime.now(timezone.utc).replace(microsecond=0)
now_epoch_ms = int(now.timestamp() * 1000)
iso_now = now.isoformat()
ts_json = {
"name": TEST_TSID_STORE,
"office-id": TEST_OFFICE,
"units": "ft",
"values": [[now_epoch_ms, 99, 0]],
"begin": iso_now,
"end": iso_now,
"version-date": iso_now,
"time-zone": "UTC",
"values": [[EPOCH_MS, 99, 0]],
}
ts.store_timeseries(ts_json)
data = ts.get_timeseries(TEST_TSID_STORE, TEST_OFFICE).json
data = ts.get_timeseries(TEST_TSID_STORE, TEST_OFFICE, begin=BEGIN, end=END).json
assert data["name"] == TEST_TSID_STORE
assert data["office-id"] == TEST_OFFICE
assert data["units"] == "ft"
assert data["values"][0][1] == pytest.approx(99)


def test_get_timeseries():
data = ts.get_timeseries(TEST_TSID_STORE, TEST_OFFICE).json
data = ts.get_timeseries(TEST_TSID_STORE, TEST_OFFICE, begin=BEGIN, end=END).json
assert data["name"] == TEST_TSID_STORE
assert data["office-id"] == TEST_OFFICE
assert data["units"] == "ft"
assert data["values"][0][1] == pytest.approx(99)


def test_timeseries_df_to_json():
dt = datetime(2023, 1, 1, 0, 0, tzinfo=timezone.utc)
df = pd.DataFrame(
{
"date-time": [dt],
"date-time": [DT],
"value": [42],
"quality-code": [0],
}
Expand All @@ -154,25 +160,27 @@ def test_timeseries_df_to_json():
assert json_out["office-id"] == office, "Incorrect office-id in output"
assert json_out["units"] == units, "Incorrect units in output"
assert json_out["values"] == [
[dt.isoformat(), 42, 0]
[DT.isoformat(), 42, 0]
], "Values do not match expected"


def test_store_multi_timeseries_df():
now = datetime.now(timezone.utc).replace(microsecond=0)
TS_ID_REV_TEST = TEST_TSID_MULTI.replace("Raw-Multi", "Raw-Rev-Test")
df = pd.DataFrame(
{
"date-time": [now, now],
"date-time": [DT, DT],
"value": [7, 8],
"quality-code": [0, 0],
"ts_id": [TEST_TSID_MULTI, TS_ID_REV_TEST],
"units": ["ft", "ft"],
}
)
ts.store_multi_timeseries_df(df, TEST_OFFICE)
data1 = ts.get_timeseries(TEST_TSID_MULTI, TEST_OFFICE, multithread=False).json
data2 = ts.get_timeseries(TS_ID_REV_TEST, TEST_OFFICE, multithread=False).json
data1 = ts.get_timeseries(
TEST_TSID_MULTI, TEST_OFFICE, multithread=False, begin=BEGIN, end=END
).json
data2 = ts.get_timeseries(
TS_ID_REV_TEST, TEST_OFFICE, multithread=False, begin=BEGIN, end=END
).json
assert data1["name"] == TEST_TSID_MULTI
assert data1["office-id"] == TEST_OFFICE
assert data1["units"] == "ft"
Expand Down Expand Up @@ -228,8 +236,9 @@ def test_get_multi_timeseries_chunk_df():


def test_get_multi_timeseries_df():
TS_ID_REV_TEST = TEST_TSID_MULTI.replace("Raw-Multi", "Raw-Rev-Test")
df = ts.get_multi_timeseries_df([TEST_TSID_MULTI, TS_ID_REV_TEST], TEST_OFFICE)
df = ts.get_multi_timeseries_df(
[TEST_TSID_MULTI, TS_ID_REV_TEST], TEST_OFFICE, begin=BEGIN, end=END
)
assert df is not None, "Returned DataFrame is None"
assert not df.empty, "Returned DataFrame is empty"
assert any(
Expand Down Expand Up @@ -266,6 +275,33 @@ def test_store_timeseries_chunk_ts():
), f"Data frames do not match: original = {DF_CHUNK_MULTI.describe()}, stored = {df.describe()}"


def test_copy_timeseries_chunk_json():
data_json = ts.get_timeseries(
ts_id=TEST_TSID_CHUNK_MULTI,
office_id=TEST_OFFICE,
begin=START_DATE_CHUNK_MULTI,
end=END_DATE_CHUNK_MULTI,
max_days_per_chunk=14,
unit="SI",
).json
data_json["name"] = TEST_TSID_COPY
ts.store_timeseries(data_json)

data_multithread = ts.get_timeseries(
ts_id=TEST_TSID_COPY,
office_id=TEST_OFFICE,
begin=START_DATE_CHUNK_MULTI,
end=END_DATE_CHUNK_MULTI,
max_days_per_chunk=14,
unit="SI",
)
df = data_multithread.df
# make sure the dataframe matches stored dataframe
pdt.assert_frame_equal(
df, DF_CHUNK_MULTI
), f"Data frames do not match: original = {DF_CHUNK_MULTI.describe()}, stored = {df.describe()}"


def test_read_timeseries_chunk_ts():
# Capture the log output
data_multithread = ts.get_timeseries(
Expand Down Expand Up @@ -296,10 +332,6 @@ def test_read_timeseries_chunk_ts():


def test_delete_timeseries():
TS_ID_REV_TEST = TEST_TSID_MULTI.replace("Raw-Multi", "Raw-Rev-Test")
now = datetime.now(timezone.utc).replace(microsecond=0)
begin = now - timedelta(minutes=15)
end = now + timedelta(minutes=15)
ts.delete_timeseries(TS_ID_REV_TEST, TEST_OFFICE, begin, end)
result = ts.get_timeseries(TS_ID_REV_TEST, TEST_OFFICE)
ts.delete_timeseries(TEST_TSID_DELETE, TEST_OFFICE, BEGIN, END)
result = ts.get_timeseries(TEST_TSID_DELETE, TEST_OFFICE)
assert result is None or result.json.get("values", []) == []
Loading