Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
name: Publish Documentation
on: workflow_dispatch
env:
PYTHON_VERSION: "3.12"
jobs:
docs:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Install poetry
shell: bash
run: |
sudo apt install python3-poetry
- name: Configure poetry
shell: bash
run: |
poetry config virtualenvs.in-project true
- name: Install dependencies
shell: bash
run: |
poetry install --all-extras
- name: Build documentation
shell: bash
working-directory: ./docs
run: |
poetry run make html
- name: Publish docs on github Pages
uses: peaceiris/actions-gh-pages@v3
with:
publish_branch: gh-pages
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: docs/build/html
force_orphan: true
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ datafaker/config.ini

# sphinx
docs/build/*
docs/esbonio-build/*
docs/temp/*

# vim swap files
Expand Down
8 changes: 4 additions & 4 deletions datafaker/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ def create_db_data_into(
Populate the database.

:param sorted_tables: The table names to populate, sorted so that foreign
keys' targets are populated before the foreign keys themselves.
keys' targets are populated before the foreign keys themselves.
:param table_generator_dict: A mapping of table names to the generators
used to make data for them.
used to make data for them.
:param story_generator_list: A list of story generators to be run after the
table generators on each pass.
table generators on each pass.
:param num_passes: Number of passes to perform.
:param db_dsn: Connection string for the destination database.
:param schema_name: Destination schema name.
Expand Down Expand Up @@ -196,7 +196,7 @@ def table_name(self) -> str | None:
Get the name of the current table.

:return: The table name, or None if there are no more stories
to process.
to process.
"""
return self._table_name

Expand Down
4 changes: 2 additions & 2 deletions datafaker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ def load_metadata_config(

:param orm_file_name: The name of the file to load.
:param config: The ``config.yaml`` file object. Ignored tables will be
excluded from the output.
excluded from the output.
:return: A dict representing the ``orm.yaml`` file, with the tables
the ``config`` says to ignore removed.
the ``config`` says to ignore removed.
"""
with open(orm_file_name, encoding="utf-8") as orm_fh:
meta_dict = yaml.load(orm_fh, yaml.Loader)
Expand Down
40 changes: 18 additions & 22 deletions datafaker/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datetime import datetime
from pathlib import Path
from types import TracebackType
from typing import Any, Callable, Final, Mapping, Optional, Sequence, Tuple, Type
from typing import Any, Callable, Final, Mapping, Optional, Sequence, Tuple, Type, Union

import pandas as pd
import snsql
Expand Down Expand Up @@ -90,8 +90,8 @@ def make_column_choices(

:param table_config: The ``tables`` part of ``config.yaml``.
:return: A list of ``ColumnChoice`` objects; that is, descriptions of
functions and their arguments to call to reveal a list of columns that
should have values generated for them.
functions and their arguments to call to reveal a list of columns that
should have values generated for them.
"""
return [
ColumnChoice(
Expand Down Expand Up @@ -125,7 +125,7 @@ class TableGeneratorInfo:
column_choices: list[ColumnChoice]
rows_per_pass: int
row_gens: list[RowGeneratorInfo] = field(default_factory=list)
unique_constraints: Sequence[UniqueConstraint | _PrimaryConstraint] = field(
unique_constraints: Sequence[Union[UniqueConstraint, _PrimaryConstraint]] = field(
default_factory=list
)

Expand Down Expand Up @@ -286,7 +286,7 @@ def _integer_generator(column: Column) -> tuple[str, dict[str, str]]:

:param column: The column to get the generator for.
:return: A pair consisting of the name of a generator and its
arguments.
arguments.
"""
if not column.primary_key:
return ("generic.numeric.integer_number", {})
Expand Down Expand Up @@ -423,7 +423,7 @@ def _get_generator_and_arguments(column: Column) -> tuple[str | None, dict[str,
Get the generator and its arguments from the column type.

:return: A tuple of a string representing the generator callable and a dict of
keyword arguments to supply to it.
keyword arguments to supply to it.
"""
generator_function = _get_generator_for_column(type(column.type))

Expand All @@ -437,12 +437,10 @@ def _get_provider_for_column(column: Column) -> Tuple[list[str], str, dict[str,
"""
Get a default Mimesis provider and its arguments for a SQL column type.

Args:
column: SQLAlchemy column object
:param column: SQLAlchemy column object

Returns:
Tuple[str, str, list[str]]: Tuple containing the variable names to assign to,
generator function and any generator arguments.
:return: Tuple[str, str, list[str]]: Tuple containing the variable names
to assign to, generator function and any generator arguments.
"""
variable_names: list[str] = [column.name]

Expand Down Expand Up @@ -589,19 +587,17 @@ def make_table_generators( # pylint: disable=too-many-locals
The orm and vocabulary YAML files must already have been
generated (by make-tables and make-vocab).

Args:
metadata: database ORM
config: Configuration to control the generator creation.
orm_filename: "orm.yaml" file path so that the generator
file can load the MetaData object
config_filename: "config.yaml" file path so that the generator
file can load the MetaData object
src_stats_filename: A filename for where to read src stats from.
:param metadata: database ORM
:param config: Configuration to control the generator creation.
:param orm_filename: "orm.yaml" file path so that the generator
file can load the MetaData object
:param config_filename: "config.yaml" file path so that the generator
file can load the MetaData object
:param src_stats_filename: A filename for where to read src stats from.
Optional, if `None` this feature will be skipped
overwrite_files: Whether to overwrite pre-existing vocabulary files
:param overwrite_files: Whether to overwrite pre-existing vocabulary files

Returns:
A string that is a valid Python module, once written to file.
:return: A string that is a valid Python module, once written to file.
"""
row_generator_module_name: str = config.get("row_generators_module", None)
story_generator_module_name = config.get("story_generators_module", None)
Expand Down
66 changes: 33 additions & 33 deletions datafaker/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,12 +249,12 @@ def merge_with_constants(
Merge a list of items with other items that must be placed at certain indices.

:param constants_at: A map of indices to objects that must be placed at
those indices.
those indices.
:param xs: Items that fill in the gaps left by ``constants_at``.
:return: ``xs`` with ``constants_at`` inserted at the appropriate
points. If there are not enough elements in ``xs`` to fill in the gaps
in ``constants_at``, the elements of ``constants_at`` after the gap
are dropped.
points. If there are not enough elements in ``xs`` to fill in the gaps
in ``constants_at``, the elements of ``constants_at`` after the gap
are dropped.
"""
outi = 0
xi = 0
Expand Down Expand Up @@ -344,7 +344,7 @@ def choice(self, a: list[Mapping[str, T]]) -> T | None:
Choose a value with equal probability.

:param a: The list of values to output. Each element is a mapping with
a key ``value`` and the key is the value to return.
a key ``value`` and the key is the value to return.
:return: The chosen value.
"""
return self.choice_direct(a).get("value", None)
Expand All @@ -371,8 +371,8 @@ def zipf_choice(self, a: list[Mapping[str, T]], n: int | None = None) -> T | Non
1/n times as frequently as the first value is chosen.

:param a: The list of rows to choose between, most frequent first.
Each element is a mapping with a key ``value`` and the key is the
value to return.
Each element is a mapping with a key ``value`` and the key is the
value to return.
:return: The chosen value.
"""
c = self.zipf_choice_direct(a, n)
Expand All @@ -383,8 +383,8 @@ def weighted_choice(self, a: list[dict[str, Any]]) -> Any:
Choice weighted by the count in the original dataset.

:param a: a list of dicts, each with a ``value`` key
holding the value to be returned and a ``count`` key holding the
number of that value found in the original dataset
holding the value to be returned and a ``count`` key holding the
number of that value found in the original dataset
:return: The chosen ``value``.
"""
vs = []
Expand All @@ -402,13 +402,13 @@ def constant(self, value: T) -> T:
return value

def multivariate_normal_np(self, cov: dict[str, Any]) -> np.typing.NDArray:
"""
r"""
Return an array of values chosen from the given covariates.

:param cov: Keys are ``rank``: The number of values to output;
``mN``: The mean of variable ``N`` (where ``N`` is between 0 and
one less than ``rank``). ``cN_M`` (where 0 < ``N`` <= ``M`` < ``rank``):
the covariance between the ``N``th and the ``M``th variables.
``mN``: The mean of variable ``N`` (where ``N`` is between 0 and
one less than ``rank``). ``cN_M`` (where 0 < ``N`` <= ``M`` < ``rank``):
the covariance between the ``N``\th and the ``M``\th variables.
:return: A numpy array of results.
"""
rank = int(cov["rank"])
Expand Down Expand Up @@ -469,29 +469,29 @@ def _find_constants(self, result: dict[str, Any]) -> dict[int, Any]:
}

def multivariate_normal(self, cov: dict[str, Any]) -> list[float]:
"""
r"""
Produce a list of values pulled from a multivariate distribution.

:param cov: A dict with various keys: ``rank`` is the number of
output values, ``m0``, ``m1``, ... are the means of the
distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ...
are the covariates, ``cN_M`` is the covariate of the ``N``th and
``M``th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``.
output values, ``m0``, ``m1``, ... are the means of the
distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ...
are the covariates, ``cN_M`` is the covariate of the ``N``\th and
``M``\th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``.
:return: list of ``rank`` floating point values
"""
out: list[float] = self.multivariate_normal_np(cov).tolist()
return out

def multivariate_lognormal(self, cov: dict[str, Any]) -> list[float]:
"""
r"""
Produce a list of values pulled from a multivariate distribution.

:param cov: A dict with various keys: ``rank`` is the number of
output values, ``m0``, ``m1``, ... are the means of the
distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ...
are the covariates, ``cN_M`` is the covariate of the ``N``th and
``M``th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``. These
are all the means and covariants of the logs of the data.
output values, ``m0``, ``m1``, ... are the means of the
distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ...
are the covariates, ``cN_M`` is the covariate of the ``N``\th and
``M``\th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``. These
are all the means and covariants of the logs of the data.
:return: list of ``rank`` floating point values
"""
out: list[Any] = np.exp(self.multivariate_normal_np(cov)).tolist()
Expand Down Expand Up @@ -528,13 +528,13 @@ def alternatives(
Pick between other generators.

:param alternative_configs: List of alternative generators.
Each alternative has the following keys: "count" -- a weight for
how often to use this alternative; "name" -- which generator
for this partition, for example "composite"; "params" -- the
parameters for this alternative.
Each alternative has the following keys: "count" -- a weight for
how often to use this alternative; "name" -- which generator
for this partition, for example "composite"; "params" -- the
parameters for this alternative.
:param counts: A list of weights for each alternative. If None, the
"count" value of each alternative is used. Each count is a dict
with a "count" key.
"count" value of each alternative is used. Each count is a dict
with a "count" key.
:return: list of values
"""
if counts is not None:
Expand All @@ -560,12 +560,12 @@ def with_constants_at(
Insert constants into the results of a different generator.

:param constants_at: A dictionary of positions and objects to insert
into the return list at those positions.
into the return list at those positions.
:param subgen: The name of the function to call to get the results
that will have the constants inserted into.
that will have the constants inserted into.
:param params: Keyword arguments to the ``subgen`` function.
:return: A list of results from calling ``subgen(**params)``
with ``constants_at`` inserted in at the appropriate indices.
with ``constants_at`` inserted in at the appropriate indices.
"""
if subgen not in self.PERMITTED_SUBGENS:
logger.error(
Expand Down
4 changes: 2 additions & 2 deletions datafaker/remove.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Functions and classes to undo the operations in create.py."""
from typing import Any, Mapping
from typing import Any, Mapping, Optional

from sqlalchemy import MetaData, delete

Expand Down Expand Up @@ -56,7 +56,7 @@ def remove_db_vocab(
reinstate_vocab_foreign_key_constraints(metadata, meta_dict, config, dst_conn)


def remove_db_tables(metadata: MetaData | None) -> None:
def remove_db_tables(metadata: Optional[MetaData]) -> None:
"""Drop the tables in the destination schema."""
settings = get_settings()
assert settings.dst_dsn, "Missing destination database settings"
Expand Down
4 changes: 2 additions & 2 deletions datafaker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ def make_foreign_key_name(table_name: str, col_name: str) -> str:
def remove_vocab_foreign_key_constraints(
metadata: MetaData,
config: Mapping[str, Any],
dst_engine: Connection | Engine,
dst_engine: Union[Connection, Engine],
) -> None:
"""
Remove the foreign key constraints from vocabulary tables.
Expand Down Expand Up @@ -532,7 +532,7 @@ def reinstate_vocab_foreign_key_constraints(
metadata: MetaData,
meta_dict: Mapping[str, Any],
config: Mapping[str, Any],
dst_engine: Connection | Engine,
dst_engine: Union[Connection, Engine],
) -> None:
"""
Put the removed foreign keys back into the destination database.
Expand Down
5 changes: 5 additions & 0 deletions docs/source/_static/custom.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* Fix for sphinx_rtd_theme's tables, which do not allow line breaks by default */
.wy-table-responsive table td,
.wy-table-responsive table th {
white-space: normal;
}
8 changes: 8 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import pathlib
import sys

from sphinx.application import Sphinx

sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix())
sys.path.insert(0, os.path.abspath("../.."))

Expand All @@ -29,6 +31,7 @@
"sphinx.ext.autosummary",
"sphinx_rtd_theme",
"sphinx.ext.napoleon",
"sphinxcontrib.mermaid",
]

autodoc_mock_imports: list[str] = ["typer", "pydantic", "sqlalchemy"]
Expand All @@ -41,3 +44,8 @@

html_theme = "sphinx_rtd_theme" # pylint: disable=C0103
html_static_path = ["_static"]


def setup(app: Sphinx) -> None:
"""Include our own CSS in rendered pages."""
app.add_css_file("custom.css")
2 changes: 2 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Contents:
:glob:
:maxdepth: 2

overview
installation
docker
quickstart
Expand All @@ -29,6 +30,7 @@ Contents:
loan_data
health_data
configuration
custom_generators
api
faq
glossary
Expand Down
Loading