Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
TimeGPT-pytorch
Commits
f42429f6
Commit
f42429f6
authored
Nov 19, 2025
by
bailuo
Browse files
readme
parents
Changes
395
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2534 additions
and
0 deletions
+2534
-0
nixtla_tests/fixtures/dask_fixtures.py
nixtla_tests/fixtures/dask_fixtures.py
+44
-0
nixtla_tests/fixtures/ray_fixtures.py
nixtla_tests/fixtures/ray_fixtures.py
+42
-0
nixtla_tests/fixtures/spark_fixtures.py
nixtla_tests/fixtures/spark_fixtures.py
+54
-0
nixtla_tests/helpers/__init__.py
nixtla_tests/helpers/__init__.py
+0
-0
nixtla_tests/helpers/checks.py
nixtla_tests/helpers/checks.py
+675
-0
nixtla_tests/helpers/client_helper.py
nixtla_tests/helpers/client_helper.py
+17
-0
nixtla_tests/helpers/states.py
nixtla_tests/helpers/states.py
+9
-0
nixtla_tests/nixtla_client/__init__.py
nixtla_tests/nixtla_client/__init__.py
+0
-0
nixtla_tests/nixtla_client/test_audit_data.py
nixtla_tests/nixtla_client/test_audit_data.py
+282
-0
nixtla_tests/nixtla_client/test_client.py
nixtla_tests/nixtla_client/test_client.py
+183
-0
nixtla_tests/nixtla_client/test_dask.py
nixtla_tests/nixtla_client/test_dask.py
+46
-0
nixtla_tests/nixtla_client/test_detect_anomalies_online.py
nixtla_tests/nixtla_client/test_detect_anomalies_online.py
+39
-0
nixtla_tests/nixtla_client/test_finetune_and_forecast.py
nixtla_tests/nixtla_client/test_finetune_and_forecast.py
+138
-0
nixtla_tests/nixtla_client/test_nixtla_client.py
nixtla_tests/nixtla_client/test_nixtla_client.py
+516
-0
nixtla_tests/nixtla_client/test_ray.py
nixtla_tests/nixtla_client/test_ray.py
+60
-0
nixtla_tests/nixtla_client/test_retry.py
nixtla_tests/nixtla_client/test_retry.py
+89
-0
nixtla_tests/nixtla_client/test_spark.py
nixtla_tests/nixtla_client/test_spark.py
+49
-0
nixtla_tests/utils/test_utils.py
nixtla_tests/utils/test_utils.py
+166
-0
pyproject.toml
pyproject.toml
+115
-0
scripts/filter_licenses.py
scripts/filter_licenses.py
+10
-0
No files found.
nixtla_tests/fixtures/dask_fixtures.py
0 → 100644
View file @
f42429f6
import
pytest
try
:
import
dask.dataframe
as
dd
from
dask.distributed
import
Client
@
pytest
.
fixture
(
scope
=
"module"
)
def
dask_client
():
with
Client
()
as
client
:
yield
client
@
pytest
.
fixture
(
scope
=
"module"
)
def
dask_df
(
distributed_series
):
return
dd
.
from_pandas
(
distributed_series
,
npartitions
=
2
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
dask_diff_cols_df
(
distributed_series
,
renamer
):
return
dd
.
from_pandas
(
distributed_series
.
rename
(
columns
=
renamer
),
npartitions
=
2
,
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
dask_df_x
(
distributed_df_x
):
return
dd
.
from_pandas
(
distributed_df_x
,
npartitions
=
2
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
dask_future_ex_vars_df
(
distributed_future_ex_vars_df
):
return
dd
.
from_pandas
(
distributed_future_ex_vars_df
,
npartitions
=
2
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
dask_df_x_diff_cols
(
distributed_df_x
,
renamer
):
return
dd
.
from_pandas
(
distributed_df_x
.
rename
(
columns
=
renamer
),
npartitions
=
2
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
dask_future_ex_vars_df_diff_cols
(
distributed_future_ex_vars_df
,
renamer
):
return
dd
.
from_pandas
(
distributed_future_ex_vars_df
.
rename
(
columns
=
renamer
),
npartitions
=
2
)
except
ImportError
:
# If Dask is not installed, we skip the fixtures
pytest
.
skip
(
"Dask is not installed, skipping Dask fixtures"
,
allow_module_level
=
True
)
nixtla_tests/fixtures/ray_fixtures.py
0 → 100644
View file @
f42429f6
import
pytest
try
:
import
ray
from
ray.cluster_utils
import
Cluster
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_cluster_setup
():
ray_cluster
=
Cluster
(
initialize_head
=
True
,
head_node_args
=
{
"num_cpus"
:
2
})
with
ray
.
init
(
address
=
ray_cluster
.
address
,
ignore_reinit_error
=
True
):
# add mock node to simulate a cluster
ray_cluster
.
add_node
(
num_cpus
=
2
)
yield
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_df
(
distributed_series
):
return
ray
.
data
.
from_pandas
(
distributed_series
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_diff_cols_df
(
distributed_series
,
renamer
):
return
ray
.
data
.
from_pandas
(
distributed_series
.
rename
(
columns
=
renamer
))
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_df_x
(
distributed_df_x
):
return
ray
.
data
.
from_pandas
(
distributed_df_x
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_future_ex_vars_df
(
distributed_future_ex_vars_df
):
return
ray
.
data
.
from_pandas
(
distributed_future_ex_vars_df
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_df_x_diff_cols
(
distributed_df_x
,
renamer
):
return
ray
.
data
.
from_pandas
(
distributed_df_x
.
rename
(
columns
=
renamer
))
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_future_ex_vars_df_diff_cols
(
distributed_future_ex_vars_df
,
renamer
):
return
ray
.
data
.
from_pandas
(
distributed_future_ex_vars_df
.
rename
(
columns
=
renamer
)
)
except
ImportError
:
# If Ray is not installed, we skip the fixtures
pytest
.
skip
(
"Ray is not installed, skipping Ray fixtures"
,
allow_module_level
=
True
)
nixtla_tests/fixtures/spark_fixtures.py
0 → 100644
View file @
f42429f6
import
pytest
try
:
from
pyspark.sql
import
SparkSession
@
pytest
.
fixture
(
scope
=
"module"
)
def
spark_client
():
with
SparkSession
.
builder
.
getOrCreate
()
as
spark
:
yield
spark
@
pytest
.
fixture
(
scope
=
"module"
)
def
spark_df
(
spark_client
,
distributed_series
):
spark_df
=
spark_client
.
createDataFrame
(
distributed_series
).
repartition
(
2
)
return
spark_df
@
pytest
.
fixture
(
scope
=
"module"
)
def
spark_diff_cols_df
(
spark_client
,
distributed_series
,
renamer
):
spark_df
=
spark_client
.
createDataFrame
(
distributed_series
.
rename
(
columns
=
renamer
)
).
repartition
(
2
)
return
spark_df
@
pytest
.
fixture
(
scope
=
"module"
)
def
spark_df_x
(
spark_client
,
distributed_df_x
):
spark_df
=
spark_client
.
createDataFrame
(
distributed_df_x
).
repartition
(
2
)
return
spark_df
@
pytest
.
fixture
(
scope
=
"module"
)
def
spark_df_x_diff_cols
(
spark_client
,
distributed_df_x
,
renamer
):
spark_df
=
spark_client
.
createDataFrame
(
distributed_df_x
.
rename
(
columns
=
renamer
)
).
repartition
(
2
)
return
spark_df
@
pytest
.
fixture
(
scope
=
"module"
)
def
spark_future_ex_vars_df
(
spark_client
,
distributed_future_ex_vars_df
):
spark_df
=
spark_client
.
createDataFrame
(
distributed_future_ex_vars_df
).
repartition
(
2
)
return
spark_df
@
pytest
.
fixture
(
scope
=
"module"
)
def
spark_future_ex_vars_df_diff_cols
(
spark_client
,
distributed_future_ex_vars_df
,
renamer
):
spark_df
=
spark_client
.
createDataFrame
(
distributed_future_ex_vars_df
.
rename
(
columns
=
renamer
)
).
repartition
(
2
)
return
spark_df
except
ImportError
:
# If PySpark is not installed, we skip the fixtures
pytest
.
skip
(
"PySpark is not installed, skipping Spark fixtures"
,
allow_module_level
=
True
)
nixtla_tests/helpers/__init__.py
0 → 100644
View file @
f42429f6
nixtla_tests/helpers/checks.py
0 → 100644
View file @
f42429f6
import
fugue
import
fugue.api
as
fa
import
numpy
as
np
import
pandas
as
pd
import
pytest
import
time
from
nixtla.nixtla_client
import
NixtlaClient
from
typing
import
Callable
# setting used for distributed related tests
ATOL
=
1e-3
# test num partitions
# we need to be sure that we can recover the same results
# using a for loop
# A: be aware that num partitons can produce different results
# when used finetune_steps
def
check_num_partitions_same_results
(
method
:
Callable
,
num_partitions
:
int
,
**
kwargs
):
res_partitioned
=
method
(
**
kwargs
,
num_partitions
=
num_partitions
)
res_no_partitioned
=
method
(
**
kwargs
,
num_partitions
=
1
)
sort_by
=
[
"unique_id"
,
"ds"
]
if
"cutoff"
in
res_partitioned
:
sort_by
.
extend
([
"cutoff"
])
pd
.
testing
.
assert_frame_equal
(
res_partitioned
.
sort_values
(
sort_by
).
reset_index
(
drop
=
True
),
res_no_partitioned
.
sort_values
(
sort_by
).
reset_index
(
drop
=
True
),
rtol
=
1e-2
,
atol
=
1e-2
,
)
def
check_retry_behavior
(
df
,
side_effect
,
side_effect_exception
,
max_retries
=
5
,
retry_interval
=
5
,
max_wait_time
=
40
,
should_retry
=
True
,
sleep_seconds
=
5
,
):
mock_nixtla_client
=
NixtlaClient
(
max_retries
=
max_retries
,
retry_interval
=
retry_interval
,
max_wait_time
=
max_wait_time
,
)
mock_nixtla_client
.
_make_request
=
side_effect
init_time
=
time
.
time
()
with
pytest
.
raises
(
side_effect_exception
):
mock_nixtla_client
.
forecast
(
df
=
df
,
h
=
12
,
time_col
=
"timestamp"
,
target_col
=
"value"
)
total_mock_time
=
time
.
time
()
-
init_time
if
should_retry
:
approx_expected_time
=
min
((
max_retries
-
1
)
*
retry_interval
,
max_wait_time
)
upper_expected_time
=
min
(
max_retries
*
retry_interval
,
max_wait_time
)
assert
total_mock_time
>=
approx_expected_time
,
"It is not retrying as expected"
# preprocessing time before the first api call should be less than 60 seconds
assert
(
total_mock_time
-
upper_expected_time
-
(
max_retries
-
1
)
*
sleep_seconds
<=
sleep_seconds
)
else
:
assert
total_mock_time
<=
max_wait_time
# test we recover the same <mean> forecasts
# with and without restricting input
# (add_history)
def
check_equal_fcsts_add_history
(
nixtla_client
,
**
kwargs
):
fcst_no_rest_df
=
nixtla_client
.
forecast
(
**
kwargs
,
add_history
=
True
)
fcst_no_rest_df
=
(
fcst_no_rest_df
.
groupby
(
"unique_id"
,
observed
=
True
)
.
tail
(
kwargs
[
"h"
])
.
reset_index
(
drop
=
True
)
)
fcst_rest_df
=
nixtla_client
.
forecast
(
**
kwargs
)
pd
.
testing
.
assert_frame_equal
(
fcst_no_rest_df
,
fcst_rest_df
,
atol
=
1e-4
,
rtol
=
1e-3
,
)
return
fcst_rest_df
def
check_quantiles
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"id_col"
,
time_col
:
str
=
"time_col"
,
):
test_qls
=
list
(
np
.
arange
(
0.1
,
1
,
0.1
))
exp_q_cols
=
[
f
"TimeGPT-q-
{
int
(
q
*
100
)
}
"
for
q
in
test_qls
]
def
test_method_qls
(
method
,
**
kwargs
):
df_qls
=
method
(
df
=
df
,
h
=
12
,
id_col
=
id_col
,
time_col
=
time_col
,
quantiles
=
test_qls
,
**
kwargs
)
df_qls
=
fa
.
as_pandas
(
df_qls
)
assert
all
(
col
in
df_qls
.
columns
for
col
in
exp_q_cols
)
# test monotonicity of quantiles
df_qls
.
apply
(
lambda
x
:
x
.
is_monotonic_increasing
,
axis
=
1
).
sum
()
==
len
(
exp_q_cols
)
test_method_qls
(
nixtla_client
.
forecast
)
test_method_qls
(
nixtla_client
.
forecast
,
add_history
=
True
)
test_method_qls
(
nixtla_client
.
cross_validation
)
def
check_cv_same_results_num_partitions
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
horizon
:
int
=
12
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
**
fcst_kwargs
,
):
fcst_df
=
nixtla_client
.
cross_validation
(
df
=
df
,
h
=
horizon
,
num_partitions
=
1
,
id_col
=
id_col
,
time_col
=
time_col
,
**
fcst_kwargs
,
)
fcst_df
=
fa
.
as_pandas
(
fcst_df
)
fcst_df_2
=
nixtla_client
.
cross_validation
(
df
=
df
,
h
=
horizon
,
num_partitions
=
2
,
id_col
=
id_col
,
time_col
=
time_col
,
**
fcst_kwargs
,
)
fcst_df_2
=
fa
.
as_pandas
(
fcst_df_2
)
pd
.
testing
.
assert_frame_equal
(
fcst_df
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
fcst_df_2
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
atol
=
ATOL
,
)
def
check_forecast_diff_results_diff_models
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
horizon
:
int
=
12
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
**
fcst_kwargs
,
):
fcst_df
=
nixtla_client
.
forecast
(
df
=
df
,
h
=
horizon
,
num_partitions
=
1
,
id_col
=
id_col
,
time_col
=
time_col
,
model
=
"timegpt-1"
,
**
fcst_kwargs
,
)
fcst_df
=
fa
.
as_pandas
(
fcst_df
)
fcst_df_2
=
nixtla_client
.
forecast
(
df
=
df
,
h
=
horizon
,
num_partitions
=
1
,
id_col
=
id_col
,
time_col
=
time_col
,
model
=
"timegpt-1-long-horizon"
,
**
fcst_kwargs
,
)
fcst_df_2
=
fa
.
as_pandas
(
fcst_df_2
)
with
pytest
.
raises
(
AssertionError
,
match
=
r
'\(column name="TimeGPT"\) are different'
):
pd
.
testing
.
assert_frame_equal
(
fcst_df
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
fcst_df_2
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
)
def
check_forecast
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
horizon
:
int
=
12
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
n_series_to_check
:
int
=
4
,
**
fcst_kwargs
,
):
fcst_df
=
nixtla_client
.
forecast
(
df
=
df
,
h
=
horizon
,
id_col
=
id_col
,
time_col
=
time_col
,
**
fcst_kwargs
,
)
fcst_df
=
fa
.
as_pandas
(
fcst_df
)
assert
n_series_to_check
*
12
==
len
(
fcst_df
)
cols
=
fcst_df
.
columns
.
to_list
()
exp_cols
=
[
id_col
,
time_col
,
"TimeGPT"
]
if
"level"
in
fcst_kwargs
:
level
=
sorted
(
fcst_kwargs
[
"level"
])
exp_cols
.
extend
([
f
"TimeGPT-lo-
{
lv
}
"
for
lv
in
reversed
(
level
)])
exp_cols
.
extend
([
f
"TimeGPT-hi-
{
lv
}
"
for
lv
in
level
])
assert
cols
==
exp_cols
def
check_forecast_same_results_num_partitions
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
horizon
:
int
=
12
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
**
fcst_kwargs
,
):
fcst_df
=
nixtla_client
.
forecast
(
df
=
df
,
h
=
horizon
,
num_partitions
=
1
,
id_col
=
id_col
,
time_col
=
time_col
,
**
fcst_kwargs
,
)
fcst_df
=
fa
.
as_pandas
(
fcst_df
)
fcst_df_2
=
nixtla_client
.
forecast
(
df
=
df
,
h
=
horizon
,
num_partitions
=
2
,
id_col
=
id_col
,
time_col
=
time_col
,
**
fcst_kwargs
,
)
fcst_df_2
=
fa
.
as_pandas
(
fcst_df_2
)
pd
.
testing
.
assert_frame_equal
(
fcst_df
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
fcst_df_2
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
atol
=
ATOL
,
)
def
check_forecast_dataframe
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
n_series_to_check
:
int
=
4
,
):
check_cv_same_results_num_partitions
(
nixtla_client
,
df
,
n_windows
=
2
,
step_size
=
1
)
check_cv_same_results_num_partitions
(
nixtla_client
,
df
,
n_windows
=
3
,
step_size
=
None
,
horizon
=
1
)
check_cv_same_results_num_partitions
(
nixtla_client
,
df
,
model
=
"timegpt-1-long-horizon"
,
horizon
=
1
)
check_forecast_diff_results_diff_models
(
nixtla_client
,
df
)
check_forecast
(
nixtla_client
,
df
,
num_partitions
=
1
)
check_forecast
(
nixtla_client
,
df
,
level
=
[
90
,
80
],
num_partitions
=
1
,
n_series_to_check
=
n_series_to_check
,
)
check_forecast_same_results_num_partitions
(
nixtla_client
,
df
)
def
check_forecast_dataframe_diff_cols
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"id_col"
,
time_col
:
str
=
"time_col"
,
target_col
:
str
=
"target_col"
,
):
check_forecast
(
nixtla_client
,
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
num_partitions
=
1
,
)
check_forecast
(
nixtla_client
,
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
level
=
[
90
,
80
],
num_partitions
=
1
,
)
check_forecast_same_results_num_partitions
(
nixtla_client
,
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
)
def
check_anomalies
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
target_col
:
str
=
"y"
,
**
anomalies_kwargs
,
):
anomalies_df
=
nixtla_client
.
detect_anomalies
(
df
=
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
anomalies_kwargs
,
)
anomalies_df
=
fa
.
as_pandas
(
anomalies_df
)
assert
(
fa
.
as_pandas
(
df
)[
id_col
].
unique
()
==
anomalies_df
[
id_col
].
unique
()).
all
()
cols
=
anomalies_df
.
columns
.
to_list
()
level
=
anomalies_kwargs
.
get
(
"level"
,
99
)
exp_cols
=
[
id_col
,
time_col
,
target_col
,
"TimeGPT"
,
"anomaly"
,
f
"TimeGPT-lo-
{
level
}
"
,
f
"TimeGPT-hi-
{
level
}
"
,
]
assert
cols
==
exp_cols
def
check_anomalies_same_results_num_partitions
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
target_col
:
str
=
"y"
,
**
anomalies_kwargs
,
):
anomalies_df
=
nixtla_client
.
detect_anomalies
(
df
=
df
,
num_partitions
=
1
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
anomalies_kwargs
,
)
anomalies_df
=
fa
.
as_pandas
(
anomalies_df
)
anomalies_df_2
=
nixtla_client
.
detect_anomalies
(
df
=
df
,
num_partitions
=
2
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
anomalies_kwargs
,
)
anomalies_df_2
=
fa
.
as_pandas
(
anomalies_df_2
)
pd
.
testing
.
assert_frame_equal
(
anomalies_df
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
anomalies_df_2
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
atol
=
ATOL
,
)
def
check_anomalies_dataframe
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
):
check_anomalies
(
nixtla_client
,
df
,
num_partitions
=
1
)
check_anomalies
(
nixtla_client
,
df
,
level
=
90
,
num_partitions
=
1
)
check_anomalies_same_results_num_partitions
(
nixtla_client
,
df
)
def
check_online_anomalies
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
target_col
:
str
=
"y"
,
level
=
99
,
**
reatlime_anomalies_kwargs
,
):
anomalies_df
=
nixtla_client
.
detect_anomalies_online
(
df
=
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
reatlime_anomalies_kwargs
,
)
anomalies_df
=
fa
.
as_pandas
(
anomalies_df
)
assert
(
fa
.
as_pandas
(
df
)[
id_col
].
unique
()
==
anomalies_df
[
id_col
].
unique
()).
all
()
cols
=
anomalies_df
.
columns
.
to_list
()
exp_cols
=
[
id_col
,
time_col
,
target_col
,
"TimeGPT"
,
"anomaly"
,
"anomaly_score"
,
f
"TimeGPT-lo-
{
level
}
"
,
f
"TimeGPT-hi-
{
level
}
"
,
]
assert
cols
==
exp_cols
def
check_anomalies_online_same_results_num_partitions
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
target_col
:
str
=
"y"
,
**
reatlime_anomalies_kwargs
,
):
anomalies_df
=
nixtla_client
.
detect_anomalies_online
(
df
=
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
num_partitions
=
1
,
**
reatlime_anomalies_kwargs
,
)
anomalies_df
=
fa
.
as_pandas
(
anomalies_df
)
anomalies_df_2
=
nixtla_client
.
detect_anomalies_online
(
df
=
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
num_partitions
=
2
,
**
reatlime_anomalies_kwargs
,
)
anomalies_df_2
=
fa
.
as_pandas
(
anomalies_df_2
)
pd
.
testing
.
assert_frame_equal
(
anomalies_df
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
anomalies_df_2
.
sort_values
([
id_col
,
time_col
]).
reset_index
(
drop
=
True
),
atol
=
ATOL
,
)
def
check_anomalies_online_dataframe
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
):
check_online_anomalies
(
nixtla_client
,
df
,
h
=
20
,
detection_size
=
5
,
threshold_method
=
"univariate"
,
level
=
99
,
num_partitions
=
1
,
)
check_anomalies_online_same_results_num_partitions
(
nixtla_client
,
df
,
h
=
20
,
detection_size
=
5
,
threshold_method
=
"univariate"
,
level
=
99
,
)
def
check_anomalies_dataframe_diff_cols
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"id_col"
,
time_col
:
str
=
"time_col"
,
target_col
:
str
=
"target_col"
,
):
check_anomalies
(
nixtla_client
,
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
num_partitions
=
1
,
)
check_anomalies
(
nixtla_client
,
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
level
=
90
,
num_partitions
=
1
,
)
check_anomalies_same_results_num_partitions
(
nixtla_client
,
df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
)
def
check_forecast_x
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
X_df
:
fugue
.
AnyDataFrame
,
horizon
:
int
=
24
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
target_col
:
str
=
"y"
,
**
fcst_kwargs
,
):
fcst_df
=
nixtla_client
.
forecast
(
df
=
df
,
X_df
=
X_df
,
h
=
horizon
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
fcst_kwargs
,
)
fcst_df
=
fa
.
as_pandas
(
fcst_df
)
n_series
=
fa
.
as_pandas
(
X_df
)[
id_col
].
nunique
()
assert
n_series
*
horizon
==
len
(
fcst_df
)
cols
=
fcst_df
.
columns
.
to_list
()
exp_cols
=
[
id_col
,
time_col
,
"TimeGPT"
]
if
"level"
in
fcst_kwargs
:
level
=
sorted
(
fcst_kwargs
[
"level"
])
exp_cols
.
extend
([
f
"TimeGPT-lo-
{
lv
}
"
for
lv
in
reversed
(
level
)])
exp_cols
.
extend
([
f
"TimeGPT-hi-
{
lv
}
"
for
lv
in
level
])
assert
cols
==
exp_cols
fcst_df_2
=
nixtla_client
.
forecast
(
df
=
df
,
h
=
horizon
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
fcst_kwargs
,
)
fcst_df_2
=
fa
.
as_pandas
(
fcst_df_2
)
equal_arrays
=
np
.
array_equal
(
fcst_df
.
sort_values
([
id_col
,
time_col
])[
"TimeGPT"
].
values
,
fcst_df_2
.
sort_values
([
id_col
,
time_col
])[
"TimeGPT"
].
values
,
)
assert
not
equal_arrays
,
"Forecasts with and without ex vars are equal"
def
check_forecast_x_same_results_num_partitions
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
X_df
:
fugue
.
AnyDataFrame
,
horizon
:
int
=
24
,
id_col
:
str
=
"unique_id"
,
time_col
:
str
=
"ds"
,
target_col
:
str
=
"y"
,
**
fcst_kwargs
,
):
fcst_df
=
nixtla_client
.
forecast
(
df
=
df
,
X_df
=
X_df
,
h
=
horizon
,
num_partitions
=
1
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
fcst_kwargs
,
)
fcst_df
=
fa
.
as_pandas
(
fcst_df
)
fcst_df_2
=
nixtla_client
.
forecast
(
df
=
df
,
h
=
horizon
,
num_partitions
=
2
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
**
fcst_kwargs
,
)
fcst_df_2
=
fa
.
as_pandas
(
fcst_df_2
)
equal_arrays
=
np
.
array_equal
(
fcst_df
.
sort_values
([
id_col
,
time_col
])[
"TimeGPT"
].
values
,
fcst_df_2
.
sort_values
([
id_col
,
time_col
])[
"TimeGPT"
].
values
,
)
assert
not
equal_arrays
,
"Forecasts with and without ex vars are equal"
def
check_forecast_x_dataframe
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
X_df
:
fugue
.
AnyDataFrame
):
check_forecast_x
(
nixtla_client
,
df
,
X_df
,
num_partitions
=
1
)
check_forecast_x
(
nixtla_client
,
df
,
X_df
,
level
=
[
90
,
80
],
num_partitions
=
1
)
check_forecast_x_same_results_num_partitions
(
nixtla_client
,
df
,
X_df
)
def
check_forecast_x_dataframe_diff_cols
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
X_df
:
fugue
.
AnyDataFrame
,
id_col
:
str
=
"id_col"
,
time_col
:
str
=
"time_col"
,
target_col
:
str
=
"target_col"
,
):
check_forecast_x
(
nixtla_client
,
df
,
X_df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
num_partitions
=
1
,
)
check_forecast_x
(
nixtla_client
,
df
,
X_df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
,
level
=
[
90
,
80
],
num_partitions
=
1
,
)
check_forecast_x_same_results_num_partitions
(
nixtla_client
,
df
,
X_df
,
id_col
=
id_col
,
time_col
=
time_col
,
target_col
=
target_col
)
def
check_finetuned_model
(
nixtla_client
:
NixtlaClient
,
df
:
fugue
.
AnyDataFrame
,
model_id2
:
str
,
):
# fine-tuning on distributed fails
with
pytest
.
raises
(
ValueError
,
match
=
"Can only fine-tune on pandas or polars dataframes."
):
nixtla_client
.
finetune
(
df
=
df
)
# forecast
local_fcst
=
nixtla_client
.
forecast
(
df
=
fa
.
as_pandas
(
df
),
h
=
5
,
finetuned_model_id
=
model_id2
,
)
distr_fcst
=
(
fa
.
as_pandas
(
nixtla_client
.
forecast
(
df
=
df
,
h
=
5
,
finetuned_model_id
=
model_id2
))
.
sort_values
([
"unique_id"
,
"ds"
])
.
reset_index
(
drop
=
True
)
)
pd
.
testing
.
assert_frame_equal
(
local_fcst
,
distr_fcst
,
check_dtype
=
False
,
atol
=
1e-4
,
rtol
=
1e-2
,
)
# cross-validation
local_cv
=
nixtla_client
.
cross_validation
(
df
=
fa
.
as_pandas
(
df
),
n_windows
=
2
,
h
=
5
,
finetuned_model_id
=
model_id2
)
distr_cv
=
(
fa
.
as_pandas
(
nixtla_client
.
cross_validation
(
df
=
df
,
n_windows
=
2
,
h
=
5
,
finetuned_model_id
=
model_id2
)
)
.
sort_values
([
"unique_id"
,
"ds"
])
.
reset_index
(
drop
=
True
)
)
pd
.
testing
.
assert_frame_equal
(
local_cv
,
distr_cv
[
local_cv
.
columns
],
check_dtype
=
False
,
atol
=
1e-4
,
rtol
=
1e-2
,
)
# anomaly detection
local_anomaly
=
nixtla_client
.
detect_anomalies
(
df
=
fa
.
as_pandas
(
df
),
finetuned_model_id
=
model_id2
)
distr_anomaly
=
(
fa
.
as_pandas
(
nixtla_client
.
detect_anomalies
(
df
=
df
,
finetuned_model_id
=
model_id2
)
)
.
sort_values
([
"unique_id"
,
"ds"
])
.
reset_index
(
drop
=
True
)
)
pd
.
testing
.
assert_frame_equal
(
local_anomaly
,
distr_anomaly
[
local_anomaly
.
columns
],
check_dtype
=
False
,
atol
=
1e-3
,
rtol
=
1e-2
,
)
nixtla_tests/helpers/client_helper.py
0 → 100644
View file @
f42429f6
import
os
from
contextlib
import
contextmanager
@
contextmanager
def
delete_env_var
(
key
):
original_value
=
os
.
environ
.
get
(
key
)
rm
=
False
if
key
in
os
.
environ
:
del
os
.
environ
[
key
]
rm
=
True
try
:
yield
finally
:
if
rm
:
os
.
environ
[
key
]
=
original_value
nixtla_tests/helpers/states.py
0 → 100644
View file @
f42429f6
import
uuid
class
ModelIds
:
model_id1
=
str
(
uuid
.
uuid4
())
model_id2
=
None
model_ids_object
=
ModelIds
()
nixtla_tests/nixtla_client/__init__.py
0 → 100644
View file @
f42429f6
nixtla_tests/nixtla_client/test_audit_data.py
0 → 100644
View file @
f42429f6
import
pandas
as
pd
import
pytest
def
test_audit_data_all_pass
(
custom_client
,
df_ok
,
common_kwargs
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_ok
,
**
common_kwargs
)
assert
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
0
def
test_audit_data_with_duplicates
(
custom_client
,
df_with_duplicates_set2
,
common_kwargs
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_duplicates_set2
,
**
common_kwargs
)
assert
not
all_pass
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
fail_dfs
)
==
2
assert
"D001"
in
fail_dfs
# The two duplicate rows should be returned
assert
len
(
fail_dfs
[
"D001"
])
==
2
assert
"D002"
in
fail_dfs
## D002 can not be run with duplicates
assert
fail_dfs
[
"D002"
]
is
None
def
test_clean_data_with_duplicates
(
custom_client
,
df_with_duplicates_set2
,
common_kwargs
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_duplicates_set2
,
**
common_kwargs
)
cleaned_df
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
df_with_duplicates_set2
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
agg_dict
=
{
"y"
:
"sum"
},
**
common_kwargs
)
assert
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
cleaned_df
)
==
3
def
test_clean_data_raises_valueerror
(
custom_client
,
df_with_duplicates_set2
,
common_kwargs
):
_
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_duplicates_set2
,
**
common_kwargs
)
with
pytest
.
raises
(
ValueError
,
match
=
"agg_dict must be provided to resolve D001 failure."
):
custom_client
.
clean_data
(
df
=
df_with_duplicates_set2
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
**
common_kwargs
)
def
test_audit_data_with_missing_dates
(
custom_client
,
df_with_missing_dates
,
common_kwargs
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_missing_dates
,
**
common_kwargs
)
assert
not
all_pass
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
fail_dfs
)
==
1
assert
"D002"
in
fail_dfs
assert
len
(
fail_dfs
[
"D002"
])
==
2
# Two missing dates should be returned
def
test_clean_data_with_missing_dates
(
custom_client
,
df_with_missing_dates
,
common_kwargs
):
# First audit to get fail_dfs and case_specific_dfs
_
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_missing_dates
,
**
common_kwargs
)
cleaned_df
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
df_with_missing_dates
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
agg_dict
=
{
"y"
:
"sum"
},
**
common_kwargs
)
assert
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
cleaned_df
)
==
6
# Two missing rows added.
assert
pd
.
to_datetime
(
"2023-01-02"
)
in
pd
.
to_datetime
(
cleaned_df
[
"ds"
]).
values
def
test_audit_data_with_duplicates_and_missing_dates
(
custom_client
,
df_with_duplicates_and_missing_dates
,
common_kwargs
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_duplicates_and_missing_dates
,
**
common_kwargs
)
assert
not
all_pass
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
fail_dfs
)
==
2
assert
"D001"
in
fail_dfs
assert
len
(
fail_dfs
[
"D001"
])
==
2
# The two duplicate rows should be returned
assert
"D002"
in
fail_dfs
assert
fail_dfs
[
"D002"
]
is
None
# D002 can not be run with duplicates
def
test_clean_data_with_duplicates_and_missing_dates
(
custom_client
,
df_with_duplicates_and_missing_dates
,
common_kwargs
):
# First audit to get fail_dfs and case_specific_dfs
_
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_duplicates_and_missing_dates
,
**
common_kwargs
)
# Clean Data (pass 1 will clear the duplicates)
cleaned_df
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
df_with_duplicates_and_missing_dates
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
agg_dict
=
{
"y"
:
"sum"
},
**
common_kwargs
)
assert
not
all_pass
assert
len
(
fail_dfs
)
==
1
# Since duplicates have been removed, D002 has been run now.
assert
"D002"
in
fail_dfs
assert
len
(
fail_dfs
[
"D002"
])
==
1
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
cleaned_df
)
==
4
# Two duplicates rows consolidated into one.
# Clean Data (pass 2 will clear the missing dates)
cleaned_df
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
cleaned_df
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
**
common_kwargs
)
assert
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
0
# Two duplicates rows consolidated into one plus one missing row added.
assert
len
(
cleaned_df
)
==
5
def
test_audit_data_with_cat_columns
(
custom_client
,
df_with_cat_columns
,
common_kwargs
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_with_cat_columns
,
**
common_kwargs
)
assert
not
all_pass
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
fail_dfs
)
==
1
assert
"F001"
in
fail_dfs
assert
fail_dfs
[
"F001"
].
shape
[
1
]
==
2
# Should return both categorical columns
def
test_audit_data_with_negative_vals
(
custom_client
,
df_negative_vals
,
common_kwargs
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_negative_vals
,
**
common_kwargs
)
assert
not
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
1
assert
"V001"
in
case_specific_dfs
assert
case_specific_dfs
[
"V001"
].
shape
[
0
]
==
3
# should return all negative values
def
test_clean_data_with_negative_vals_without_cleaning_case_specific
(
custom_client
,
df_negative_vals
,
common_kwargs
):
_
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_negative_vals
,
**
common_kwargs
)
_
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
df_negative_vals
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
# clean_case_specific=False, # Default
**
common_kwargs
)
assert
not
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
1
assert
"V001"
in
case_specific_dfs
assert
case_specific_dfs
[
"V001"
].
shape
[
0
]
==
3
# should return all negative values
def
test_clean_data_with_negative_vals_cleaning_case_specific
(
custom_client
,
df_negative_vals
,
common_kwargs
):
_
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_negative_vals
,
**
common_kwargs
)
cleaned_df
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
df_negative_vals
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
clean_case_specific
=
True
,
**
common_kwargs
)
assert
not
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
1
assert
"V002"
in
case_specific_dfs
assert
case_specific_dfs
[
"V002"
].
shape
[
0
]
==
1
# should return leading zeros
# test second pass
# Clean Data, second pass (removes leading zeros)
cleaned_df
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
cleaned_df
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
clean_case_specific
=
True
,
**
common_kwargs
)
assert
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
0
def
test_audit_data_leading_zeros
(
custom_client
,
common_kwargs
,
df_leading_zeros_set2
):
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_leading_zeros_set2
,
**
common_kwargs
)
assert
not
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
1
assert
"V002"
in
case_specific_dfs
assert
(
case_specific_dfs
[
"V002"
].
shape
[
0
]
==
2
)
# should return ids with leading zeros
def
test_clean_data_leading_zeroes_without_cleaning_case_specific
(
custom_client
,
common_kwargs
,
df_leading_zeros_set2
):
_
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_leading_zeros_set2
,
**
common_kwargs
)
_
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
df_leading_zeros_set2
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
# clean_case_specific=False, # Default
**
common_kwargs
)
assert
not
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
1
assert
"V002"
in
case_specific_dfs
assert
(
case_specific_dfs
[
"V002"
].
shape
[
0
]
==
2
)
# should return ids with leading zeros
def
test_clean_data_with_cleaning_case_specific
(
custom_client
,
common_kwargs
,
df_leading_zeros_set2
):
_
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
audit_data
(
df
=
df_leading_zeros_set2
,
**
common_kwargs
)
cleaned_df
,
all_pass
,
fail_dfs
,
case_specific_dfs
=
custom_client
.
clean_data
(
df
=
df_leading_zeros_set2
,
fail_dict
=
fail_dfs
,
case_specific_dict
=
case_specific_dfs
,
clean_case_specific
=
True
,
**
common_kwargs
)
assert
all_pass
assert
len
(
fail_dfs
)
==
0
assert
len
(
case_specific_dfs
)
==
0
assert
len
(
cleaned_df
)
==
7
# all leading zeros removed, zero series unchanged
nixtla_tests/nixtla_client/test_client.py
0 → 100644
View file @
f42429f6
import
os
import
pytest
import
pandas
as
pd
import
warnings
from
nixtla_tests.helpers.client_helper
import
delete_env_var
from
nixtla.nixtla_client
import
NixtlaClient
def
test_custom_business_hours
(
business_hours_series
,
custom_business_hours
):
nixtla_test_client
=
NixtlaClient
()
nixtla_test_client
.
detect_anomalies
(
df
=
business_hours_series
,
freq
=
custom_business_hours
,
level
=
90
)
nixtla_test_client
.
cross_validation
(
df
=
business_hours_series
,
freq
=
custom_business_hours
,
h
=
7
)
fcst
=
nixtla_test_client
.
forecast
(
df
=
business_hours_series
,
freq
=
custom_business_hours
,
h
=
7
)
assert
sorted
(
fcst
[
"ds"
].
dt
.
hour
.
unique
().
tolist
())
==
list
(
range
(
9
,
16
))
assert
[
(
model
,
freq
.
lower
())
for
(
model
,
freq
)
in
nixtla_test_client
.
_model_params
.
keys
()
]
==
[(
"timegpt-1"
,
"cbh"
)]
def
test_integer_freq
(
integer_freq_series
):
nixtla_test_client
=
NixtlaClient
()
nixtla_test_client
.
detect_anomalies
(
df
=
integer_freq_series
,
level
=
90
,
freq
=
1
)
nixtla_test_client
.
cross_validation
(
df
=
integer_freq_series
,
h
=
7
,
freq
=
1
)
fcst
=
nixtla_test_client
.
forecast
(
df
=
integer_freq_series
,
h
=
7
,
freq
=
1
)
train_ends
=
integer_freq_series
.
groupby
(
"unique_id"
,
observed
=
True
)[
"ds"
].
max
()
fcst_ends
=
fcst
.
groupby
(
"unique_id"
,
observed
=
True
)[
"ds"
].
max
()
pd
.
testing
.
assert_series_equal
(
fcst_ends
,
train_ends
+
7
)
assert
list
(
nixtla_test_client
.
_model_params
.
keys
())
==
[(
"timegpt-1"
,
"MS"
)]
def
test_api_key_fail
():
with
delete_env_var
(
"NIXTLA_API_KEY"
),
delete_env_var
(
"TIMEGPT_TOKEN"
):
with
pytest
.
raises
(
KeyError
)
as
excinfo
:
NixtlaClient
()
assert
"NIXTLA_API_KEY"
in
str
(
excinfo
.
value
)
def
test_api_key_success
():
nixtla_client
=
NixtlaClient
()
assert
nixtla_client
.
validate_api_key
()
def
test_custom_client_success
():
custom_client
=
NixtlaClient
(
base_url
=
os
.
environ
[
"NIXTLA_BASE_URL_CUSTOM"
],
api_key
=
os
.
environ
[
"NIXTLA_API_KEY_CUSTOM"
],
)
assert
custom_client
.
validate_api_key
()
# assert the usage endpoint
usage
=
custom_client
.
usage
()
assert
sorted
(
usage
.
keys
())
==
[
"minute"
,
"month"
]
def
test_forecast_with_wrong_api_key
():
with
pytest
.
raises
(
Exception
)
as
excinfo
:
NixtlaClient
(
api_key
=
"transphobic"
).
forecast
(
df
=
pd
.
DataFrame
(),
h
=
None
,
validate_api_key
=
True
)
assert
"nixtla"
in
str
(
excinfo
.
value
)
def
test_get_model_params
(
nixtla_test_client
):
assert
nixtla_test_client
.
_get_model_params
(
model
=
"timegpt-1"
,
freq
=
"D"
)
==
(
28
,
7
)
def
test_client_plot
(
nixtla_test_client
,
air_passengers_df
):
nixtla_test_client
.
plot
(
air_passengers_df
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
engine
=
"plotly"
)
def
test_finetune_cv
(
nixtla_test_client
,
air_passengers_df
):
finetune_cv
=
nixtla_test_client
.
cross_validation
(
air_passengers_df
,
h
=
12
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
n_windows
=
1
,
finetune_steps
=
1
,
)
assert
finetune_cv
is
not
None
def
test_forecast_warning
(
nixtla_test_client
,
air_passengers_df
,
caplog
):
nixtla_test_client
.
forecast
(
df
=
air_passengers_df
.
tail
(
3
),
h
=
100
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
)
assert
'The specified horizon "h" exceeds the model horizon'
in
caplog
.
text
@
pytest
.
mark
.
parametrize
(
"kwargs"
,
[
{
"add_history"
:
True
},
],
ids
=
[
"short horizon with add_history"
],
)
def
test_forecast_error
(
nixtla_test_client
,
air_passengers_df
,
kwargs
):
with
pytest
.
raises
(
ValueError
,
match
=
"Some series are too short. Please make sure that each series"
):
nixtla_test_client
.
forecast
(
df
=
air_passengers_df
.
tail
(
3
),
h
=
12
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
**
kwargs
,
)
def
test_large_request_partition_error
(
nixtla_test_client
,
large_series
):
with
pytest
.
raises
(
Exception
)
as
excinfo
:
nixtla_test_client
.
forecast
(
df
=
large_series
,
h
=
1
,
freq
=
"min"
,
finetune_steps
=
2
)
assert
"num_partitions"
in
str
(
excinfo
.
value
)
def
test_forecast_exogenous_warnings
(
nixtla_test_client
,
two_short_series_with_time_features_train_future
):
train
,
future
=
two_short_series_with_time_features_train_future
# features in df but not in X_df
missing_exogenous
=
train
.
columns
.
drop
([
"unique_id"
,
"ds"
,
"y"
]).
tolist
()
expected_warning
=
(
f
"`df` contains the following exogenous features:
{
missing_exogenous
}
, "
"but `X_df` was not provided and they were not declared in `hist_exog_list`. "
"They will be ignored."
)
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
nixtla_test_client
.
forecast
(
train
,
h
=
5
)
assert
any
(
expected_warning
in
str
(
warning
.
message
)
for
warning
in
w
)
# features in df not set as historic nor in X_df
expected_warning
=
(
"`df` contains the following exogenous features: ['month'], "
"but they were not found in `X_df` nor declared in `hist_exog_list`. "
"They will be ignored."
)
with
warnings
.
catch_warnings
(
record
=
True
)
as
w
:
nixtla_test_client
.
forecast
(
train
,
h
=
5
,
X_df
=
future
[[
"unique_id"
,
"ds"
,
"year"
]]
)
assert
any
(
expected_warning
in
str
(
warning
.
message
)
for
warning
in
w
)
def
test_features_not_in_df_error
(
nixtla_test_client
,
two_short_series_with_time_features_train_future
):
train
,
future
=
two_short_series_with_time_features_train_future
with
pytest
.
raises
(
ValueError
,
match
=
"features are present in `X_df` but not in `df`"
):
nixtla_test_client
.
forecast
(
df
=
train
[[
"unique_id"
,
"ds"
,
"y"
]],
h
=
5
,
X_df
=
future
,
)
def
test_setting_one_as_historic_and_other_as_future
(
nixtla_test_client
,
two_short_series_with_time_features_train_future
):
train
,
future
=
two_short_series_with_time_features_train_future
# test setting one as historic and other as future
nixtla_test_client
.
forecast
(
train
,
h
=
5
,
X_df
=
future
[[
"unique_id"
,
"ds"
,
"year"
]],
hist_exog_list
=
[
"month"
]
)
assert
nixtla_test_client
.
weights_x
[
"features"
].
tolist
()
==
[
"year"
,
"month"
]
nixtla_tests/nixtla_client/test_dask.py
0 → 100644
View file @
f42429f6
import
pytest
from
nixtla_tests.helpers.checks
import
check_anomalies_dataframe
from
nixtla_tests.helpers.checks
import
check_anomalies_online_dataframe
from
nixtla_tests.helpers.checks
import
check_anomalies_dataframe_diff_cols
from
nixtla_tests.helpers.checks
import
check_forecast_dataframe
from
nixtla_tests.helpers.checks
import
check_forecast_dataframe_diff_cols
from
nixtla_tests.helpers.checks
import
check_forecast_x_dataframe
from
nixtla_tests.helpers.checks
import
check_forecast_x_dataframe_diff_cols
from
nixtla_tests.helpers.checks
import
check_quantiles
pytestmark
=
pytest
.
mark
.
distributed_run
def
test_quantiles
(
nixtla_test_client
,
dask_df
):
check_quantiles
(
nixtla_test_client
,
dask_df
,
id_col
=
"unique_id"
,
time_col
=
"ds"
)
def
test_forecast
(
nixtla_test_client
,
dask_df
,
dask_diff_cols_df
,
distributed_n_series
):
check_forecast_dataframe
(
nixtla_test_client
,
dask_df
,
n_series_to_check
=
distributed_n_series
)
check_forecast_dataframe_diff_cols
(
nixtla_test_client
,
dask_diff_cols_df
)
def
test_anomalies
(
nixtla_test_client
,
dask_df
,
dask_diff_cols_df
):
check_anomalies_dataframe
(
nixtla_test_client
,
dask_df
)
check_anomalies_dataframe_diff_cols
(
nixtla_test_client
,
dask_diff_cols_df
)
def
test_anomalies_online
(
nixtla_test_client
,
dask_df
):
check_anomalies_online_dataframe
(
nixtla_test_client
,
dask_df
)
def
test_forecast_x_dataframe
(
nixtla_test_client
,
dask_df_x
,
dask_future_ex_vars_df
,
dask_df_x_diff_cols
,
dask_future_ex_vars_df_diff_cols
,
):
check_forecast_x_dataframe
(
nixtla_test_client
,
dask_df_x
,
dask_future_ex_vars_df
)
check_forecast_x_dataframe_diff_cols
(
nixtla_test_client
,
dask_df_x_diff_cols
,
dask_future_ex_vars_df_diff_cols
,
)
nixtla_tests/nixtla_client/test_detect_anomalies_online.py
0 → 100644
View file @
f42429f6
def
test_detect_anomalies_online_univariate
(
nixtla_test_client
,
anomaly_online_df
):
df
,
n_series
,
detection_size
=
anomaly_online_df
anomaly_df
=
nixtla_test_client
.
detect_anomalies_online
(
df
,
h
=
20
,
detection_size
=
detection_size
,
threshold_method
=
"univariate"
,
freq
=
"W-SUN"
,
level
=
99
,
)
assert
len
(
anomaly_df
)
==
n_series
*
detection_size
assert
(
len
(
anomaly_df
.
columns
)
==
8
)
# [unique_id, ds, TimeGPT, y, anomaly, anomaly_score, hi, lo]
assert
anomaly_df
[
"anomaly"
].
sum
()
==
2
assert
anomaly_df
[
"anomaly"
].
iloc
[
0
]
and
anomaly_df
[
"anomaly"
].
iloc
[
-
1
]
def
test_detect_anomalies_online_multivariate
(
nixtla_test_client
,
anomaly_online_df
):
df
,
n_series
,
detection_size
=
anomaly_online_df
multi_anomaly_df
=
nixtla_test_client
.
detect_anomalies_online
(
df
,
h
=
20
,
detection_size
=
detection_size
,
threshold_method
=
"multivariate"
,
freq
=
"W-SUN"
,
level
=
99
,
)
assert
len
(
multi_anomaly_df
)
==
n_series
*
detection_size
assert
(
len
(
multi_anomaly_df
.
columns
)
==
7
)
# [unique_id, ds, TimeGPT, y, anomaly, anomaly_score, accumulated_anomaly_score]
assert
multi_anomaly_df
[
"anomaly"
].
sum
()
==
4
assert
(
multi_anomaly_df
[
"anomaly"
].
iloc
[
0
]
and
multi_anomaly_df
[
"anomaly"
].
iloc
[
4
]
and
multi_anomaly_df
[
"anomaly"
].
iloc
[
5
]
and
multi_anomaly_df
[
"anomaly"
].
iloc
[
9
]
)
nixtla_tests/nixtla_client/test_finetune_and_forecast.py
0 → 100644
View file @
f42429f6
import
pytest
from
utilsforecast.evaluation
import
evaluate
from
utilsforecast.losses
import
rmse
from
nixtla.nixtla_client
import
ApiError
from
nixtla_tests.helpers.checks
import
check_finetuned_model
from
nixtla_tests.helpers.states
import
model_ids_object
class
TestTimeSeriesDataSet1
:
def
test_finetuning_and_forecasting
(
self
,
custom_client
,
ts_data_set1
):
# Finetune the model
finetune_resp
=
custom_client
.
finetune
(
ts_data_set1
.
train
,
output_model_id
=
model_ids_object
.
model_id1
)
assert
finetune_resp
==
model_ids_object
.
model_id1
model_id2
=
custom_client
.
finetune
(
ts_data_set1
.
train
,
finetuned_model_id
=
model_ids_object
.
model_id1
)
model_ids_object
.
model_id2
=
model_id2
# store the model_id2 for later use
# Forecast with fine-tuned models
forecast_base
=
custom_client
.
forecast
(
ts_data_set1
.
train
,
h
=
ts_data_set1
.
h
)
forecast1
=
custom_client
.
forecast
(
ts_data_set1
.
train
,
h
=
ts_data_set1
.
h
,
finetuned_model_id
=
model_ids_object
.
model_id1
,
)
forecast2
=
custom_client
.
forecast
(
ts_data_set1
.
train
,
h
=
ts_data_set1
.
h
,
finetuned_model_id
=
model_ids_object
.
model_id2
,
)
all_fcsts
=
forecast_base
.
assign
(
ten_rounds
=
forecast1
[
"TimeGPT"
],
twenty_rounds
=
forecast2
[
"TimeGPT"
]
)
fcst_rmse
=
evaluate
(
all_fcsts
.
merge
(
ts_data_set1
.
valid
),
metrics
=
[
rmse
],
agg_fn
=
"mean"
,
).
loc
[
0
]
# error was reduced over 30% by finetuning
assert
1
-
fcst_rmse
[
"ten_rounds"
]
/
fcst_rmse
[
"TimeGPT"
]
>
0.3
# error was reduced over 20% by further finetuning
assert
1
-
fcst_rmse
[
"twenty_rounds"
]
/
fcst_rmse
[
"ten_rounds"
]
>
0.2
# non-existent model returns 404
with
pytest
.
raises
(
ApiError
)
as
excinfo
:
custom_client
.
forecast
(
ts_data_set1
.
train
,
h
=
ts_data_set1
.
h
,
finetuned_model_id
=
"unexisting"
)
assert
getattr
(
excinfo
.
value
,
"status_code"
,
None
)
==
404
# Enough data to finetune
_
=
custom_client
.
forecast
(
ts_data_set1
.
train
.
tail
(
2
),
h
=
ts_data_set1
.
h
,
finetune_steps
=
10
,
freq
=
"D"
,
)
def
test_cv_with_finetuned_model
(
self
,
custom_client
,
ts_data_set1
):
try
:
cv_base
=
custom_client
.
cross_validation
(
ts_data_set1
.
series
,
n_windows
=
2
,
h
=
ts_data_set1
.
h
)
cv_finetune
=
custom_client
.
cross_validation
(
ts_data_set1
.
series
,
n_windows
=
2
,
h
=
ts_data_set1
.
h
,
finetuned_model_id
=
model_ids_object
.
model_id1
,
)
merged
=
cv_base
.
merge
(
cv_finetune
,
on
=
[
"unique_id"
,
"ds"
,
"cutoff"
,
"y"
],
suffixes
=
(
"_base"
,
"_finetune"
),
).
drop
(
columns
=
"cutoff"
)
cv_rmse
=
evaluate
(
merged
,
metrics
=
[
rmse
],
agg_fn
=
"mean"
,
).
loc
[
0
]
# error was reduced over 30% by finetuning
assert
1
-
cv_rmse
[
"TimeGPT_finetune"
]
/
cv_rmse
[
"TimeGPT_base"
]
>
0.3
finally
:
custom_client
.
delete_finetuned_model
(
model_ids_object
.
model_id1
)
def
test_anomaly_detection_with_finetuned_model
(
self
,
custom_client
,
ts_anomaly_data
):
anomaly_base
=
custom_client
.
detect_anomalies
(
ts_anomaly_data
.
train_anomalies
)
anomaly_finetune
=
custom_client
.
detect_anomalies
(
ts_anomaly_data
.
train_anomalies
,
finetuned_model_id
=
model_ids_object
.
model_id2
,
)
detected_anomalies_base
=
(
anomaly_base
.
set_index
(
"ds"
)
.
loc
[
ts_anomaly_data
.
anomaly_date
,
"anomaly"
]
.
sum
()
)
detected_anomalies_finetune
=
(
anomaly_finetune
.
set_index
(
"ds"
)
.
loc
[
ts_anomaly_data
.
anomaly_date
,
"anomaly"
]
.
sum
()
)
assert
detected_anomalies_base
<
detected_anomalies_finetune
def
test_list_finetuned_models
(
self
,
custom_client
):
models
=
custom_client
.
finetuned_models
()
ids
=
{
m
.
id
for
m
in
models
}
assert
(
model_ids_object
.
model_id1
not
in
ids
and
model_ids_object
.
model_id2
in
ids
)
def
test_get_single_finetuned_model
(
self
,
custom_client
):
single_model
=
custom_client
.
finetuned_model
(
model_ids_object
.
model_id2
)
assert
single_model
.
id
==
model_ids_object
.
model_id2
assert
single_model
.
base_model_id
==
model_ids_object
.
model_id1
def
test_non_existing_model_returns_error
(
self
,
custom_client
):
with
pytest
.
raises
(
ApiError
,
match
=
"Model not found"
):
custom_client
.
finetuned_model
(
"hi"
)
@
pytest
.
mark
.
distributed_run
@
pytest
.
mark
.
ray_run
def
test_ray_finetune_model
(
self
,
custom_client
,
ray_df
):
check_finetuned_model
(
custom_client
,
ray_df
,
model_ids_object
.
model_id2
)
@
pytest
.
mark
.
distributed_run
@
pytest
.
mark
.
spark_run
def
test_spark_finetune_model
(
self
,
custom_client
,
spark_df
):
check_finetuned_model
(
custom_client
,
spark_df
,
model_ids_object
.
model_id2
)
@
pytest
.
mark
.
distributed_run
@
pytest
.
mark
.
flaky
(
reruns
=
3
,
delay
=
10
)
def
test_dask_finetune_model
(
self
,
custom_client
,
dask_df
):
check_finetuned_model
(
custom_client
,
dask_df
,
model_ids_object
.
model_id2
)
nixtla_tests/nixtla_client/test_nixtla_client.py
0 → 100644
View file @
f42429f6
from
contextlib
import
contextmanager
from
copy
import
deepcopy
import
httpx
import
numpy
as
np
import
pandas
as
pd
import
pytest
import
zstandard
as
zstd
from
pydantic
import
ValidationError
from
nixtla_tests.conftest
import
HYPER_PARAMS_TEST
from
nixtla_tests.helpers.checks
import
(
check_equal_fcsts_add_history
,
check_num_partitions_same_results
,
)
CAPTURED_REQUEST
=
None
class
CapturingClient
(
httpx
.
Client
):
def
post
(
self
,
*
args
,
**
kwargs
):
request
=
self
.
build_request
(
"POST"
,
*
args
,
**
kwargs
)
global
CAPTURED_REQUEST
CAPTURED_REQUEST
=
{
"headers"
:
dict
(
request
.
headers
),
"content"
:
request
.
content
,
"method"
:
request
.
method
,
"url"
:
str
(
request
.
url
),
}
return
super
().
post
(
*
args
,
**
kwargs
)
@
contextmanager
def
capture_request
():
original_client
=
httpx
.
Client
httpx
.
Client
=
CapturingClient
try
:
yield
finally
:
httpx
.
Client
=
original_client
@
pytest
.
mark
.
parametrize
(
"df_converter, freq"
,
[
pytest
.
param
(
lambda
series
,
with_gaps
:
with_gaps
,
"5min"
,
id
=
"gaps"
),
pytest
.
param
(
lambda
series
,
with_gaps
:
pd
.
concat
([
series
,
series
]),
"5min"
,
id
=
"duplicates"
,
),
pytest
.
param
(
lambda
series
,
with_gaps
:
series
,
"1min"
,
id
=
"wrong_freq"
),
],
)
def
test_forecast_with_error
(
series_with_gaps
,
nixtla_test_client
,
df_converter
,
freq
):
series
,
with_gaps
=
series_with_gaps
with
pytest
.
raises
(
ValueError
,
match
=
"missing or duplicate timestamps, or the timestamps do not match"
,
):
nixtla_test_client
.
forecast
(
df
=
df_converter
(
series
,
with_gaps
),
h
=
1
,
freq
=
freq
)
@
pytest
.
mark
.
parametrize
(
"test_params, expected_exception, expected_error_msg"
,
[
({
"model_parameters"
:
None
},
None
,
""
),
({
"model_parameters"
:
{
"max_q"
:
1
}},
None
,
""
),
({
"model_parameters"
:
{
"max_p"
:
None
}},
None
,
""
),
({
"model_parameters"
:
{
"horizon"
:
[
1
,
2
,
3
]}},
None
,
""
),
({
"model_parameters"
:
{
"horizon"
:
(
1
,
2
,
3
)}},
None
,
""
),
({
"model_parameters"
:
{
"horizon"
:
{
"nested"
:
"dict"
}}},
None
,
""
),
({
"model_parameters"
:
{
"horizon"
:
{
"nested"
:
None
}}},
None
,
""
),
({
"model_parameters"
:
"not a dict"
},
ValidationError
,
"Input should be a valid dictionary"
),
({
"model_parameters"
:
123
},
ValidationError
,
"Input should be a valid dictionary"
),
({
"model_parameters"
:
{
"horizon"
:
{
"nested_key"
:
[
1
,
2
,
3
]}}},
TypeError
,
"Invalid value type"
),
({
"model_parameters"
:
{
"horizon"
:
{
"nested_key"
:
(
1
,
2
,
3
)}}},
TypeError
,
"Invalid value type"
),
({
"model_parameters"
:
{
"horizon"
:
{
"nested_key"
:
{
1
,
2
}}}},
TypeError
,
"Invalid value type"
),
({
"model_parameters"
:
{
"horizon"
:
{
"nested_key"
:
{
"inner_key"
:
"val"
}}}},
TypeError
,
"Invalid value type"
),
({
"model_parameters"
:
{
"horizon"
:
pd
.
DataFrame
()}},
TypeError
,
"Invalid value type"
),
]
)
@
pytest
.
mark
.
parametrize
(
"endpoint"
,
[
"forecast"
,
"cross_validation"
])
def
test_model_parameters
(
nixtla_test_client
,
air_passengers_df
,
test_params
,
expected_exception
,
expected_error_msg
,
endpoint
):
base_params
=
{
"df"
:
air_passengers_df
,
"h"
:
12
,
"time_col"
:
"timestamp"
,
"target_col"
:
"value"
,
}
base_params
.
update
(
test_params
)
if
expected_exception
is
None
:
if
endpoint
==
"forecast"
:
nixtla_test_client
.
forecast
(
**
base_params
)
elif
endpoint
==
"cross_validation"
:
nixtla_test_client
.
cross_validation
(
**
base_params
)
else
:
with
pytest
.
raises
(
expected_exception
)
as
exc_info
:
if
endpoint
==
"forecast"
:
nixtla_test_client
.
forecast
(
**
base_params
)
elif
endpoint
==
"cross_validation"
:
nixtla_test_client
.
cross_validation
(
**
base_params
)
assert
expected_error_msg
in
str
(
exc_info
.
value
)
def
test_cv_forecast_consistency
(
nixtla_test_client
,
cv_series_with_features
):
series_with_features
,
train
,
valid
,
x_cols
,
h
,
freq
=
cv_series_with_features
for
hist_exog_list
in
[
None
,
[],
[
x_cols
[
2
],
x_cols
[
1
]],
x_cols
]:
cv_res
=
nixtla_test_client
.
cross_validation
(
series_with_features
,
n_windows
=
1
,
h
=
h
,
freq
=
freq
,
hist_exog_list
=
hist_exog_list
,
)
fcst_res
=
nixtla_test_client
.
forecast
(
train
,
h
=
h
,
freq
=
freq
,
hist_exog_list
=
hist_exog_list
,
X_df
=
valid
,
)
np
.
testing
.
assert_allclose
(
cv_res
[
"TimeGPT"
],
fcst_res
[
"TimeGPT"
],
atol
=
1e-4
,
rtol
=
1e-3
)
def
test_forecast_different_hist_exog_gives_different_results
(
nixtla_test_client
,
cv_series_with_features
):
_
,
train
,
valid
,
x_cols
,
h
,
freq
=
cv_series_with_features
for
X_df
in
(
None
,
valid
):
res1
=
nixtla_test_client
.
forecast
(
train
,
h
=
h
,
X_df
=
X_df
,
freq
=
freq
,
hist_exog_list
=
x_cols
[:
2
]
)
res2
=
nixtla_test_client
.
forecast
(
train
,
h
=
h
,
X_df
=
X_df
,
freq
=
freq
,
hist_exog_list
=
x_cols
[
2
:]
)
with
pytest
.
raises
(
AssertionError
):
np
.
testing
.
assert_allclose
(
res1
[
"TimeGPT"
],
res2
[
"TimeGPT"
],
atol
=
1e-4
,
rtol
=
1e-3
,
)
def
test_forecast_date_features_multiple_series_and_different_ends
(
nixtla_test_client
,
two_short_series
):
h
=
12
fcst_test_series
=
nixtla_test_client
.
forecast
(
two_short_series
,
h
=
h
,
date_features
=
[
"dayofweek"
]
)
uids
=
two_short_series
[
"unique_id"
]
for
uid
in
uids
:
expected
=
pd
.
date_range
(
periods
=
h
+
1
,
start
=
two_short_series
.
query
(
"unique_id == @uid"
)[
"ds"
].
max
()
)[
1
:].
tolist
()
actual
=
fcst_test_series
.
query
(
"unique_id == @uid"
)[
"ds"
].
tolist
()
assert
actual
==
expected
def
test_compression
(
nixtla_test_client
,
series_1MB_payload
):
with
capture_request
():
nixtla_test_client
.
forecast
(
df
=
series_1MB_payload
,
freq
=
"D"
,
h
=
1
,
hist_exog_list
=
[
"static_0"
,
"static_1"
],
)
assert
CAPTURED_REQUEST
[
"headers"
][
"content-encoding"
]
==
"zstd"
content
=
CAPTURED_REQUEST
[
"content"
]
assert
len
(
content
)
<
2
**
20
assert
len
(
zstd
.
ZstdDecompressor
().
decompress
(
content
))
>
2
**
20
def
test_cv_refit_equivalence
(
nixtla_test_client
,
air_passengers_df
):
cv_kwargs
=
dict
(
df
=
air_passengers_df
,
n_windows
=
2
,
h
=
12
,
freq
=
"MS"
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
finetune_steps
=
2
,
)
res_refit
=
nixtla_test_client
.
cross_validation
(
refit
=
True
,
**
cv_kwargs
)
res_no_refit
=
nixtla_test_client
.
cross_validation
(
refit
=
False
,
**
cv_kwargs
)
np
.
testing
.
assert_allclose
(
res_refit
[
"value"
],
res_no_refit
[
"value"
])
with
pytest
.
raises
(
AssertionError
):
np
.
testing
.
assert_allclose
(
res_refit
[
"TimeGPT"
],
res_no_refit
[
"TimeGPT"
],
atol
=
1e-4
,
rtol
=
1e-3
,
)
def
test_forecast_quantiles_error
(
nixtla_test_client
,
air_passengers_df
):
with
pytest
.
raises
(
Exception
)
as
excinfo
:
nixtla_test_client
.
forecast
(
df
=
air_passengers_df
,
h
=
12
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
level
=
[
80
],
quantiles
=
[
0.2
,
0.3
],
)
assert
"not both"
in
str
(
excinfo
.
value
)
@
pytest
.
mark
.
parametrize
(
"method,kwargs"
,
[
(
"forecast"
,
{}),
(
"forecast"
,
{
"add_history"
:
True
}),
(
"cross_validation"
,
{}),
],
)
def
test_forecast_quantiles_output
(
nixtla_test_client
,
air_passengers_df
,
method
,
kwargs
):
test_qls
=
list
(
np
.
arange
(
0.1
,
1
,
0.1
))
exp_q_cols
=
[
f
"TimeGPT-q-
{
int
(
100
*
q
)
}
"
for
q
in
test_qls
]
args
=
{
"df"
:
air_passengers_df
,
"h"
:
12
,
"time_col"
:
"timestamp"
,
"target_col"
:
"value"
,
"quantiles"
:
test_qls
,
**
kwargs
,
}
if
method
==
"cross_validation"
:
func
=
nixtla_test_client
.
cross_validation
elif
method
==
"forecast"
:
func
=
nixtla_test_client
.
forecast
df_qls
=
func
(
**
args
)
assert
all
(
col
in
df_qls
.
columns
for
col
in
exp_q_cols
)
assert
not
any
(
"-lo-"
in
col
for
col
in
df_qls
.
columns
)
# test monotonicity of quantiles
for
c1
,
c2
in
zip
(
exp_q_cols
[:
-
1
],
exp_q_cols
[
1
:]):
assert
df_qls
[
c1
].
lt
(
df_qls
[
c2
]).
all
()
@
pytest
.
mark
.
parametrize
(
"freq"
,
[
"D"
,
"W-THU"
,
"Q-DEC"
,
"15T"
])
@
pytest
.
mark
.
parametrize
(
"method_name,method_kwargs,exog"
,
[
(
"detect_anomalies"
,
{
"level"
:
98
},
False
),
(
"cross_validation"
,
{
"h"
:
7
,
"n_windows"
:
2
},
False
),
(
"forecast"
,
{
"h"
:
7
,
"add_history"
:
True
},
False
),
(
"detect_anomalies"
,
{
"level"
:
98
},
True
),
(
"cross_validation"
,
{
"h"
:
7
,
"n_windows"
:
2
},
False
),
(
"forecast"
,
{
"h"
:
7
,
"add_history"
:
True
},
False
),
],
)
def
test_num_partitions_same_results_parametrized
(
nixtla_test_client
,
df_freq_generator
,
method_name
,
method_kwargs
,
freq
,
exog
):
mathod_mapper
=
{
"detect_anomalies"
:
nixtla_test_client
.
detect_anomalies
,
"cross_validation"
:
nixtla_test_client
.
cross_validation
,
"forecast"
:
nixtla_test_client
.
forecast
,
}
method
=
mathod_mapper
[
method_name
]
df_freq
=
df_freq_generator
(
n_series
=
10
,
min_length
=
500
,
max_length
=
550
,
freq
=
freq
)
df_freq
[
"ds"
]
=
df_freq
.
groupby
(
"unique_id"
,
observed
=
True
)[
"ds"
].
transform
(
lambda
x
:
pd
.
date_range
(
periods
=
len
(
x
),
freq
=
freq
,
end
=
"2023-01-01"
)
)
if
exog
:
df_freq
[
"exog_1"
]
=
1
kwargs
=
{
"method"
:
method
,
"num_partitions"
:
2
,
"df"
:
df_freq
,
**
method_kwargs
,
}
check_num_partitions_same_results
(
**
kwargs
)
@
pytest
.
mark
.
parametrize
(
"freq,h"
,
[
(
"D"
,
7
),
(
"W-THU"
,
52
),
(
"Q-DEC"
,
8
),
(
"15T"
,
4
*
24
*
7
),
],
)
def
test_forecast_models_different_results
(
nixtla_test_client
,
df_freq_generator
,
freq
,
h
):
df_freq
=
df_freq_generator
(
n_series
=
10
,
min_length
=
500
,
max_length
=
550
,
freq
=
freq
)
df_freq
[
"ds"
]
=
df_freq
.
groupby
(
"unique_id"
,
observed
=
True
)[
"ds"
].
transform
(
lambda
x
:
pd
.
date_range
(
periods
=
len
(
x
),
freq
=
freq
,
end
=
"2023-01-01"
)
)
kwargs
=
dict
(
df
=
df_freq
,
h
=
h
)
fcst_1_df
=
check_equal_fcsts_add_history
(
nixtla_test_client
,
**
{
**
kwargs
,
"model"
:
"timegpt-1"
}
)
fcst_2_df
=
check_equal_fcsts_add_history
(
nixtla_test_client
,
**
{
**
kwargs
,
"model"
:
"timegpt-1-long-horizon"
}
)
with
pytest
.
raises
(
AssertionError
,
match
=
r
'\(column name="TimeGPT"\) are different'
):
pd
.
testing
.
assert_frame_equal
(
fcst_1_df
,
fcst_2_df
)
@
pytest
.
mark
.
parametrize
(
"method, method_kwargs"
,
[
(
"forecast"
,
dict
(
h
=
12
,
level
=
[
90
,
95
],
add_history
=
True
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
),
),
(
"cross_validation"
,
dict
(
h
=
12
,
level
=
[
90
,
95
],
time_col
=
"timestamp"
,
target_col
=
"value"
),
),
(
"detect_anomalies"
,
dict
(
level
=
99
,
time_col
=
"timestamp"
,
target_col
=
"value"
)),
],
)
def
test_different_models_give_different_results
(
air_passengers_df
,
nixtla_test_client
,
method
,
method_kwargs
):
mathod_mapper
=
{
"detect_anomalies"
:
nixtla_test_client
.
detect_anomalies
,
"cross_validation"
:
nixtla_test_client
.
cross_validation
,
"forecast"
:
nixtla_test_client
.
forecast
,
}
execute
=
mathod_mapper
[
method
]
# Run with first model
out1
=
execute
(
df
=
air_passengers_df
,
model
=
"timegpt-1"
,
**
method_kwargs
)
# Run with second model
out2
=
execute
(
df
=
air_passengers_df
,
model
=
"timegpt-1-long-horizon"
,
**
method_kwargs
)
# Compare only the TimeGPT column
with
pytest
.
raises
(
AssertionError
,
match
=
r
'\(column name="TimeGPT"\) are different'
):
pd
.
testing
.
assert_frame_equal
(
out1
[[
"TimeGPT"
]],
out2
[[
"TimeGPT"
]])
# test unsupported model
method_kwargs
[
"model"
]
=
"my-awesome-model"
with
pytest
.
raises
(
ValueError
,
match
=
"unsupported model"
):
execute
(
df
=
air_passengers_df
,
**
method_kwargs
)
def
test_shap_features
(
nixtla_test_client
,
date_features_result
):
# Test shap values are returned and sum to predictions
df_date_features
,
future_df
,
_
=
date_features_result
h
=
12
fcst_df
=
nixtla_test_client
.
forecast
(
df
=
df_date_features
,
h
=
h
,
X_df
=
future_df
,
feature_contributions
=
True
)
shap_values
=
nixtla_test_client
.
feature_contributions
assert
len
(
shap_values
)
==
len
(
fcst_df
)
np
.
testing
.
assert_allclose
(
fcst_df
[
"TimeGPT"
].
values
,
shap_values
.
iloc
[:,
3
:].
sum
(
axis
=
1
).
values
,
rtol
=
1e-3
)
fcst_hist_df
=
nixtla_test_client
.
forecast
(
df
=
df_date_features
,
h
=
h
,
X_df
=
future_df
,
add_history
=
True
,
feature_contributions
=
True
,
)
shap_values_hist
=
nixtla_test_client
.
feature_contributions
assert
len
(
shap_values_hist
)
==
len
(
fcst_hist_df
)
np
.
testing
.
assert_allclose
(
fcst_hist_df
[
"TimeGPT"
].
values
,
shap_values_hist
.
iloc
[:,
3
:].
sum
(
axis
=
1
).
values
,
atol
=
1e-4
,
)
# test num partitions
_
=
nixtla_test_client
.
feature_contributions
pd
.
testing
.
assert_frame_equal
(
nixtla_test_client
.
feature_contributions
,
shap_values_hist
,
atol
=
1e-4
,
rtol
=
1e-3
)
@
pytest
.
mark
.
parametrize
(
"hyp"
,
HYPER_PARAMS_TEST
)
def
test_exogenous_variables_cv
(
nixtla_test_client
,
exog_data
,
hyp
):
df_ex_
,
df_train
,
df_test
,
x_df_test
=
exog_data
fcst_test
=
nixtla_test_client
.
forecast
(
df_train
.
merge
(
df_ex_
.
drop
(
columns
=
"y"
)),
h
=
12
,
X_df
=
x_df_test
,
**
hyp
)
fcst_test
=
df_test
[[
"unique_id"
,
"ds"
,
"y"
]].
merge
(
fcst_test
)
fcst_test
=
fcst_test
.
sort_values
([
"unique_id"
,
"ds"
]).
reset_index
(
drop
=
True
)
fcst_cv
=
nixtla_test_client
.
cross_validation
(
df_ex_
,
h
=
12
,
**
hyp
)
fcst_cv
=
fcst_cv
.
sort_values
([
"unique_id"
,
"ds"
]).
reset_index
(
drop
=
True
)
pd
.
testing
.
assert_frame_equal
(
fcst_test
,
fcst_cv
.
drop
(
columns
=
"cutoff"
),
atol
=
1e-4
,
rtol
=
1e-3
,
)
@
pytest
.
mark
.
parametrize
(
"hyp"
,
HYPER_PARAMS_TEST
)
def
test_forecast_vs_cv_no_exog
(
nixtla_test_client
,
train_test_split
,
air_passengers_renamed_df
,
hyp
):
df_train
,
df_test
=
train_test_split
fcst_test
=
nixtla_test_client
.
forecast
(
df_train
,
h
=
12
,
**
hyp
)
fcst_test
=
df_test
[[
"unique_id"
,
"ds"
,
"y"
]].
merge
(
fcst_test
)
fcst_test
=
fcst_test
.
sort_values
([
"unique_id"
,
"ds"
]).
reset_index
(
drop
=
True
)
fcst_cv
=
nixtla_test_client
.
cross_validation
(
air_passengers_renamed_df
,
h
=
12
,
**
hyp
)
fcst_cv
=
fcst_cv
.
sort_values
([
"unique_id"
,
"ds"
]).
reset_index
(
drop
=
True
)
pd
.
testing
.
assert_frame_equal
(
fcst_test
,
fcst_cv
.
drop
(
columns
=
"cutoff"
),
rtol
=
1e-2
,
)
@
pytest
.
mark
.
parametrize
(
"hyp"
,
HYPER_PARAMS_TEST
)
def
test_forecast_vs_cv_insert_y
(
nixtla_test_client
,
train_test_split
,
air_passengers_renamed_df
,
hyp
):
df_train
,
df_test
=
train_test_split
fcst_test
=
nixtla_test_client
.
forecast
(
df_train
,
h
=
12
,
**
hyp
)
fcst_test
.
insert
(
2
,
"y"
,
df_test
[
"y"
].
values
)
fcst_test
=
fcst_test
.
sort_values
([
"unique_id"
,
"ds"
]).
reset_index
(
drop
=
True
)
fcst_cv
=
nixtla_test_client
.
cross_validation
(
air_passengers_renamed_df
,
h
=
12
,
**
hyp
)
fcst_cv
=
fcst_cv
.
sort_values
([
"unique_id"
,
"ds"
]).
reset_index
(
drop
=
True
)
pd
.
testing
.
assert_frame_equal
(
fcst_test
,
fcst_cv
.
drop
(
columns
=
"cutoff"
),
rtol
=
1e-2
,
)
def
test_forecast_and_anomalies_index_vs_columns
(
nixtla_test_client
,
air_passengers_renamed_df
,
air_passengers_renamed_df_with_index
):
fcst_inferred_df_index
=
nixtla_test_client
.
forecast
(
air_passengers_renamed_df_with_index
,
h
=
10
)
anom_inferred_df_index
=
nixtla_test_client
.
detect_anomalies
(
air_passengers_renamed_df_with_index
)
fcst_inferred_df
=
nixtla_test_client
.
forecast
(
air_passengers_renamed_df
[[
"ds"
,
"unique_id"
,
"y"
]],
h
=
10
)
anom_inferred_df
=
nixtla_test_client
.
detect_anomalies
(
air_passengers_renamed_df
[[
"ds"
,
"unique_id"
,
"y"
]]
)
pd
.
testing
.
assert_frame_equal
(
fcst_inferred_df_index
,
fcst_inferred_df
,
atol
=
1e-4
,
rtol
=
1e-3
)
pd
.
testing
.
assert_frame_equal
(
anom_inferred_df_index
,
anom_inferred_df
,
atol
=
1e-4
,
rtol
=
1e-3
)
@
pytest
.
mark
.
parametrize
(
"freq"
,
[
"Y"
,
"W-MON"
,
"Q-DEC"
,
"H"
])
def
test_forecast_index_vs_columns_various_freq
(
nixtla_test_client
,
air_passengers_renamed_df_with_index
,
freq
):
df_ds_index
=
air_passengers_renamed_df_with_index
.
groupby
(
"unique_id"
).
tail
(
80
)
df_ds_index
.
index
=
np
.
concatenate
(
df_ds_index
[
"unique_id"
].
nunique
()
*
[
pd
.
date_range
(
end
=
"2023-01-01"
,
periods
=
80
,
freq
=
freq
)]
)
df_ds_index
.
index
.
name
=
"ds"
fcst_inferred_df_index
=
nixtla_test_client
.
forecast
(
df_ds_index
,
h
=
10
)
df_test
=
df_ds_index
.
reset_index
()
fcst_inferred_df
=
nixtla_test_client
.
forecast
(
df_test
,
h
=
10
)
pd
.
testing
.
assert_frame_equal
(
fcst_inferred_df_index
,
fcst_inferred_df
,
atol
=
1e-4
,
rtol
=
1e-3
)
def
test_index_as_time_col
(
nixtla_test_client
,
air_passengers_df
):
df_test
=
deepcopy
(
air_passengers_df
)
df_test
[
"timestamp"
]
=
pd
.
to_datetime
(
df_test
[
"timestamp"
])
df_test
.
set_index
(
df_test
[
"timestamp"
],
inplace
=
True
)
df_test
.
drop
(
columns
=
"timestamp"
,
inplace
=
True
)
# Using user_provided time_col and freq
timegpt_anomalies_df_1
=
nixtla_test_client
.
detect_anomalies
(
air_passengers_df
,
time_col
=
"timestamp"
,
target_col
=
"value"
,
freq
=
"M"
)
# Infer time_col and freq from index
timegpt_anomalies_df_2
=
nixtla_test_client
.
detect_anomalies
(
df_test
,
time_col
=
"timestamp"
,
target_col
=
"value"
)
pd
.
testing
.
assert_frame_equal
(
timegpt_anomalies_df_1
,
timegpt_anomalies_df_2
,
atol
=
1e-4
,
rtol
=
1e-3
,
)
nixtla_tests/nixtla_client/test_ray.py
0 → 100644
View file @
f42429f6
import
platform
import
sys
import
pytest
from
nixtla_tests.helpers.checks
import
(
check_anomalies_dataframe
,
check_anomalies_dataframe_diff_cols
,
check_anomalies_online_dataframe
,
check_forecast_dataframe
,
check_forecast_dataframe_diff_cols
,
check_forecast_x_dataframe
,
check_forecast_x_dataframe_diff_cols
,
check_quantiles
,
)
pytestmark
=
[
pytest
.
mark
.
distributed_run
,
pytest
.
mark
.
ray_run
,
]
def
test_quantiles
(
nixtla_test_client
,
ray_df
):
check_quantiles
(
nixtla_test_client
,
ray_df
,
id_col
=
"unique_id"
,
time_col
=
"ds"
)
def
test_forecast
(
nixtla_test_client
,
ray_df
,
ray_diff_cols_df
,
distributed_n_series
):
check_forecast_dataframe
(
nixtla_test_client
,
ray_df
,
n_series_to_check
=
distributed_n_series
)
check_forecast_dataframe_diff_cols
(
nixtla_test_client
,
ray_diff_cols_df
)
def
test_anomalies
(
nixtla_test_client
,
ray_df
,
ray_diff_cols_df
):
check_anomalies_dataframe
(
nixtla_test_client
,
ray_df
)
check_anomalies_dataframe_diff_cols
(
nixtla_test_client
,
ray_diff_cols_df
)
def
test_anomalies_online
(
nixtla_test_client
,
ray_df
):
check_anomalies_online_dataframe
(
nixtla_test_client
,
ray_df
)
@
pytest
.
mark
.
xfail
(
reason
=
(
"triad.collections.schema.SchemaError: Schema can't be empty"
"error triggered https://github.com/Nixtla/nixtla/blob/b56a89bf6b80b137c57f3511eef3ed8857705a59/nixtla/nixtla_client.py#L1383"
)
)
def
test_forecast_x_dataframe
(
nixtla_test_client
,
ray_df_x
,
ray_future_ex_vars_df
,
ray_df_x_diff_cols
,
ray_future_ex_vars_df_diff_cols
,
):
check_forecast_x_dataframe
(
nixtla_test_client
,
ray_df_x
,
ray_future_ex_vars_df
)
check_forecast_x_dataframe_diff_cols
(
nixtla_test_client
,
ray_df_x_diff_cols
,
ray_future_ex_vars_df_diff_cols
,
)
nixtla_tests/nixtla_client/test_retry.py
0 → 100644
View file @
f42429f6
import
httpx
import
pytest
import
time
from
itertools
import
product
from
nixtla.nixtla_client
import
(
ApiError
,
)
from
nixtla_tests.helpers.checks
import
check_retry_behavior
def
raise_api_error_with_text
(
*
args
,
**
kwargs
):
raise
ApiError
(
status_code
=
503
,
body
=
"""
<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>503 Server Error</title>
</head>
<body text=#000000 bgcolor=#ffffff>
<h1>Error: Server Error</h1>
<h2>The service you requested is not available at this time.<p>Service error -27.</h2>
<h2></h2>
</body></html>
"""
,
)
def
raise_api_error_with_json
(
*
args
,
**
kwargs
):
raise
ApiError
(
status_code
=
422
,
body
=
dict
(
detail
=
"Please use numbers"
),
)
def
raise_read_timeout_error
(
*
args
,
**
kwargs
):
sleep_seconds
=
5
print
(
f
"raising ReadTimeout error after
{
sleep_seconds
}
seconds"
)
time
.
sleep
(
sleep_seconds
)
raise
httpx
.
ReadTimeout
(
"Timed out"
)
def
raise_http_error
(
*
args
,
**
kwargs
):
print
(
"raising HTTP error"
)
raise
ApiError
(
status_code
=
503
,
body
=
"HTTP error"
)
@
pytest
.
mark
.
parametrize
(
"side_effect,side_effect_exception,should_retry"
,
[
(
raise_api_error_with_text
,
ApiError
,
True
),
(
raise_api_error_with_json
,
ApiError
,
False
),
],
)
def
test_retry_behavior
(
air_passengers_df
,
side_effect
,
side_effect_exception
,
should_retry
):
check_retry_behavior
(
df
=
air_passengers_df
,
side_effect
=
side_effect
,
side_effect_exception
=
side_effect_exception
,
should_retry
=
should_retry
,
)
combs
=
[
(
2
,
5
,
30
),
(
10
,
1
,
5
),
]
side_effect_settings
=
[
(
raise_read_timeout_error
,
httpx
.
ReadTimeout
),
(
raise_http_error
,
ApiError
),
]
@
pytest
.
mark
.
parametrize
(
"retry_settings,side_effect_settings"
,
product
(
combs
,
side_effect_settings
)
)
def
test_retry_behavior_set2
(
air_passengers_df
,
retry_settings
,
side_effect_settings
):
max_retries
,
retry_interval
,
max_wait_time
=
retry_settings
side_effect
,
side_effect_exception
=
side_effect_settings
check_retry_behavior
(
df
=
air_passengers_df
,
side_effect
=
side_effect
,
side_effect_exception
=
side_effect_exception
,
max_retries
=
max_retries
,
retry_interval
=
retry_interval
,
max_wait_time
=
max_wait_time
,
)
nixtla_tests/nixtla_client/test_spark.py
0 → 100644
View file @
f42429f6
import
pytest
from
nixtla_tests.helpers.checks
import
check_anomalies_dataframe
from
nixtla_tests.helpers.checks
import
check_anomalies_online_dataframe
from
nixtla_tests.helpers.checks
import
check_anomalies_dataframe_diff_cols
from
nixtla_tests.helpers.checks
import
check_forecast_dataframe
from
nixtla_tests.helpers.checks
import
check_forecast_dataframe_diff_cols
from
nixtla_tests.helpers.checks
import
check_forecast_x_dataframe
from
nixtla_tests.helpers.checks
import
check_forecast_x_dataframe_diff_cols
from
nixtla_tests.helpers.checks
import
check_quantiles
pytestmark
=
[
pytest
.
mark
.
distributed_run
,
pytest
.
mark
.
spark_run
]
def
test_quantiles
(
nixtla_test_client
,
spark_df
):
check_quantiles
(
nixtla_test_client
,
spark_df
,
id_col
=
"unique_id"
,
time_col
=
"ds"
)
def
test_forecast
(
nixtla_test_client
,
spark_df
,
spark_diff_cols_df
,
distributed_n_series
):
check_forecast_dataframe
(
nixtla_test_client
,
spark_df
,
n_series_to_check
=
distributed_n_series
)
check_forecast_dataframe_diff_cols
(
nixtla_test_client
,
spark_diff_cols_df
)
def
test_anomalies
(
nixtla_test_client
,
spark_df
,
spark_diff_cols_df
):
check_anomalies_dataframe
(
nixtla_test_client
,
spark_df
)
check_anomalies_dataframe_diff_cols
(
nixtla_test_client
,
spark_diff_cols_df
)
def
test_anomalies_online
(
nixtla_test_client
,
spark_df
):
check_anomalies_online_dataframe
(
nixtla_test_client
,
spark_df
)
def
test_forecast_x_dataframe
(
nixtla_test_client
,
spark_df_x
,
spark_future_ex_vars_df
,
spark_df_x_diff_cols
,
spark_future_ex_vars_df_diff_cols
,
):
check_forecast_x_dataframe
(
nixtla_test_client
,
spark_df_x
,
spark_future_ex_vars_df
)
check_forecast_x_dataframe_diff_cols
(
nixtla_test_client
,
spark_df_x_diff_cols
,
spark_future_ex_vars_df_diff_cols
)
nixtla_tests/utils/test_utils.py
0 → 100644
View file @
f42429f6
import
re
import
pandas
as
pd
import
pytest
from
nixtla.nixtla_client
import
_audit_duplicate_rows
from
nixtla.nixtla_client
import
_audit_categorical_variables
from
nixtla.nixtla_client
import
_audit_leading_zeros
from
nixtla.nixtla_client
import
_audit_missing_dates
from
nixtla.nixtla_client
import
_audit_negative_values
from
nixtla.nixtla_client
import
_model_in_list
from
nixtla.nixtla_client
import
_maybe_add_date_features
from
nixtla.nixtla_client
import
AuditDataSeverity
from
nixtla.date_features
import
SpecialDates
@
pytest
.
mark
.
parametrize
(
"name, patterns, expected"
,
[
(
"a"
,
(
"a"
,
"b"
),
True
),
(
"a"
,
(
"b"
,
"c"
),
False
),
(
"axb"
,
(
"x"
,
re
.
compile
(
"a.*b"
)),
True
),
(
"axb"
,
(
"x"
,
re
.
compile
(
"^a.*b$"
)),
True
),
(
"a-b"
,
(
"x"
,
re
.
compile
(
"^a-.*b$"
)),
True
),
(
"a-dfdfb"
,
(
"x"
,
re
.
compile
(
"^a-.*b$"
)),
True
),
(
"abc"
,
(
"x"
,
re
.
compile
(
"ab"
),
re
.
compile
(
"abcd"
)),
False
),
],
)
def
test_model_in_list
(
name
,
patterns
,
expected
):
assert
_model_in_list
(
name
,
patterns
)
is
expected
def
test_audit_duplicate_rows_pass
(
df_no_duplicates
):
audit
,
duplicates
=
_audit_duplicate_rows
(
df_no_duplicates
)
assert
audit
==
AuditDataSeverity
.
PASS
assert
len
(
duplicates
)
==
0
def
test_audit_duplicate_rows_fail
(
df_with_duplicates
):
audit
,
duplicates
=
_audit_duplicate_rows
(
df_with_duplicates
)
assert
audit
==
AuditDataSeverity
.
FAIL
assert
len
(
duplicates
)
==
2
def
test_audit_missing_dates_complete
(
df_complete
):
audit
,
missing
=
_audit_missing_dates
(
df_complete
,
freq
=
"D"
)
assert
audit
==
AuditDataSeverity
.
PASS
assert
len
(
missing
)
==
0
def
test_audit_missing_dates_with_missing
(
df_missing
):
audit
,
missing
=
_audit_missing_dates
(
df_missing
,
freq
=
"D"
)
assert
audit
==
AuditDataSeverity
.
FAIL
assert
len
(
missing
)
==
2
# One missing date per unique_id
# --- Audit Categorical Variables ---
def
test_audit_categorical_variables_no_cat
(
df_no_cat
):
audit
,
cat_df
=
_audit_categorical_variables
(
df_no_cat
)
assert
audit
==
AuditDataSeverity
.
PASS
assert
len
(
cat_df
)
==
0
def
test_audit_categorical_variables_with_cat
(
df_with_cat
):
audit
,
cat_df
=
_audit_categorical_variables
(
df_with_cat
)
assert
audit
==
AuditDataSeverity
.
FAIL
assert
cat_df
.
shape
[
1
]
==
1
# Should include only 'cat_col'
def
test_audit_categorical_variables_with_cat_dtype
(
df_with_cat_dtype
):
audit
,
cat_df
=
_audit_categorical_variables
(
df_with_cat_dtype
)
assert
audit
==
AuditDataSeverity
.
FAIL
assert
cat_df
.
shape
[
1
]
==
1
# Should include only 'cat_col'
def
test_audit_leading_zeros
(
df_leading_zeros
):
audit
,
leading_zeros_df
=
_audit_leading_zeros
(
df_leading_zeros
)
assert
audit
==
AuditDataSeverity
.
CASE_SPECIFIC
assert
len
(
leading_zeros_df
)
==
3
def
test_audit_negative_values
(
df_negative_values
):
audit
,
negative_values_df
=
_audit_negative_values
(
df_negative_values
)
assert
audit
==
AuditDataSeverity
.
CASE_SPECIFIC
assert
len
(
negative_values_df
)
==
3
@
pytest
.
mark
.
parametrize
(
"date_features,freq,one_hot,expected_date_features"
,
[
([
"year"
,
"month"
],
"MS"
,
False
,
[
"year"
,
"month"
]),
(
[
SpecialDates
(
{
"first_dates"
:
[
"2021-01-1"
],
"second_dates"
:
[
"2021-01-01"
]}
)
],
"D"
,
False
,
[
"first_dates"
,
"second_dates"
],
),
([
"year"
,
"month"
],
"D"
,
[
"month"
],
[
"month_"
+
str
(
i
)
for
i
in
range
(
1
,
13
)]),
],
)
def
test_maybe_add_date_features
(
air_passengers_df
,
date_features
,
freq
,
one_hot
,
expected_date_features
):
df_copy
=
air_passengers_df
.
copy
()
df_copy
.
rename
(
columns
=
{
"timestamp"
:
"ds"
,
"value"
:
"y"
},
inplace
=
True
)
df_copy
.
insert
(
0
,
"unique_id"
,
"AirPassengers"
)
df_date_features
,
future_df
=
_maybe_add_date_features
(
df
=
df_copy
,
X_df
=
None
,
h
=
12
,
freq
=
freq
,
features
=
date_features
,
one_hot
=
one_hot
,
id_col
=
"unique_id"
,
time_col
=
"ds"
,
target_col
=
"y"
,
)
assert
all
(
col
in
df_date_features
for
col
in
expected_date_features
)
assert
all
(
col
in
future_df
for
col
in
expected_date_features
)
@
pytest
.
mark
.
parametrize
(
"date_features,one_hot,expected_date_features"
,
[
([
"year"
,
"month"
],
False
,
[
"year"
,
"month"
]),
([
"month"
,
"day"
],
[
"month"
,
"day"
],
[
"month_"
+
str
(
i
)
for
i
in
range
(
1
,
13
)]),
],
ids
=
[
"no_one_hot"
,
"with_one_hot"
],
)
def
test_add_date_features_with_exogenous_variables
(
air_passengers_df
,
date_features
,
one_hot
,
expected_date_features
,
request
):
df_copy
=
air_passengers_df
.
copy
()
df_copy
.
rename
(
columns
=
{
"timestamp"
:
"ds"
,
"value"
:
"y"
},
inplace
=
True
)
df_copy
.
insert
(
0
,
"unique_id"
,
"AirPassengers"
)
df_actual_future
=
df_copy
.
tail
(
12
)[[
"unique_id"
,
"ds"
]]
df_date_features
,
future_df
=
_maybe_add_date_features
(
df
=
df_copy
,
X_df
=
df_actual_future
,
h
=
24
,
freq
=
"H"
,
features
=
date_features
,
one_hot
=
one_hot
,
id_col
=
"unique_id"
,
time_col
=
"ds"
,
target_col
=
"y"
,
)
assert
all
(
col
in
df_date_features
for
col
in
expected_date_features
)
assert
all
(
col
in
future_df
for
col
in
expected_date_features
)
pd
.
testing
.
assert_frame_equal
(
df_date_features
[
df_copy
.
columns
],
df_copy
,
)
if
request
.
node
.
callspec
.
id
==
"no_one_hot"
:
expected_df_actual_future
=
df_actual_future
.
copy
()
elif
request
.
node
.
callspec
.
id
==
"with_one_hot"
:
expected_df_actual_future
=
df_actual_future
.
reset_index
(
drop
=
True
)
pd
.
testing
.
assert_frame_equal
(
future_df
[
df_actual_future
.
columns
],
expected_df_actual_future
,
)
pyproject.toml
0 → 100644
View file @
f42429f6
[build-system]
requires
=
[
"setuptools>=36.2"
,
"wheel"
]
build-backend
=
"setuptools.build_meta"
[project]
name
=
"nixtla"
dynamic
=
["version"]
description
=
"Python SDK for Nixtla API (TimeGPT)"
authors
=
[
{name
=
"Nixtla"
,
email
=
"business@nixtla.io"
}
]
license
=
{
text
=
"Apache Software License 2.0"
}
readme
=
"README.md"
requires-python
=
">=3.9"
keywords
=
[
"time-series"
,
"forecasting"
,
"gpt"
]
classifiers
=
[
"Development Status :: 4 - Beta"
,
"Intended Audience :: Developers"
,
"Natural Language :: English"
,
"License :: OSI Approved :: Apache Software License"
,
"Programming Language :: Python :: 3.9"
,
"Programming Language :: Python :: 3.10"
,
"Programming Language :: Python :: 3.11"
,
"Programming Language :: Python :: 3.12"
,
"Programming Language :: Python :: 3.13"
,
]
dependencies
=
[
"annotated-types"
,
"httpx[zstd]"
,
"orjson"
,
"pandas"
,
"pydantic>=1.10"
,
"tenacity"
,
"tqdm"
,
"utilsforecast>=0.2.8"
,
]
[tool.setuptools.dynamic]
version
=
{
attr
=
"nixtla.__version__"
}
[project.optional-dependencies]
dev
=
[
"black"
,
"datasetsforecast"
,
"fire"
,
"hierarchicalforecast"
,
"ipython<=8.32.0"
,
"ipywidgets"
,
"jupyterlab"
,
"neuralforecast"
,
"numpy<2"
,
"plotly"
,
"polars"
,
"pre-commit"
,
"pyreadr<0.5.3"
,
"python-dotenv"
,
"pyyaml"
,
"setuptools<70"
,
"statsforecast"
,
"tabulate"
,
"shap"
,
"pytest"
,
"pytest-cov"
,
"pytest-rerunfailures"
,
"pyarrow<21.0.0"
,
"mlforecast"
,
"lightgbm"
,
"utilsforecast[plotting]"
,
"holidays"
,
"pandas_market_calendars"
,
"pip-licenses"
]
distributed
=
[
"fugue[dask,ray,spark]>=0.8.7"
,
"dask<=2024.12.1"
,
"pandas<2.2"
,
"ray<=2.20.0"
,
]
plotting
=
[
"utilsforecast[plotting]"
,
]
date_extras
=
[
"holidays"
,
"pandas_market_calendars"
,
]
[project.urls]
Homepage
=
"https://github.com/Nixtla/nixtla/"
Documentation
=
"https://nixtlaverse.nixtla.io/"
Repository
=
"https://github.com/Nixtla/nixtla/"
[tool.setuptools]
include-package-data
=
true
[tool.setuptools.packages.find]
exclude
=
["action_files*"]
[tool.ruff.lint]
select
=
[
"F"
,
# pyflakes
]
[tool.pytest.ini_options]
markers
=
[
"distributed_run: mark test as requiring distributed run, such as those depending on Ray, Spark frameworks"
,
"spark_run: mark test execution related to Spark framework"
,
"ray_run: mark test execution related to Ray framework"
,
]
testpaths
=
["nixtla_tests"]
addopts
=
[
"--cov=python/statsforecast"
,
"--cov-report=term-missing"
,
"--cov-report=html"
,
"--cov-fail-under=80"
]
scripts/filter_licenses.py
0 → 100644
View file @
f42429f6
import
pandas
as
pd
df
=
pd
.
read_csv
(
'third_party_licenses.csv'
)
df
=
df
[
df
[
'License'
].
str
.
contains
(
'GPL|AGPL|LGPL|MPL'
,
na
=
False
)]
# if the license has a long agreement, only capture the title and skip the rest
df
[
'License'
]
=
df
[
'License'
].
apply
(
lambda
x
:
x
.
split
(
'
\n
'
)[
0
])
df
=
df
[
~
df
[
'Name'
].
str
.
contains
(
'quadprog'
)]
# ignore quadprog
df
.
to_markdown
(
'THIRD_PARTY_LICENSES.md'
,
index
=
False
)
Prev
1
…
9
10
11
12
13
14
15
16
17
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment