diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 7a12d3c07d..0000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,115 +0,0 @@ -version: 2.1 - -setup: true - -on_main_or_tag_filter: &on_main_or_tag_filter - filters: - branches: - only: main - tags: - only: /^v\d+\.\d+\.\d+/ - -on_tag_filter: &on_tag_filter - filters: - branches: - ignore: /.*/ - tags: - only: /^v\d+\.\d+\.\d+/ - -orbs: - path-filtering: circleci/path-filtering@1.2.0 - -jobs: - publish: - docker: - - image: cimg/python:3.10 - resource_class: small - steps: - - checkout - - attach_workspace: - at: web/client - - run: - name: Publish Python package - command: make publish - - run: - name: Update pypirc - command: ./.circleci/update-pypirc.sh - - run: - name: Publish Python Tests package - command: unset TWINE_USERNAME TWINE_PASSWORD && make publish-tests - gh-release: - docker: - - image: cimg/node:20.19.0 - resource_class: small - steps: - - run: - name: Create release on GitHub - command: | - GITHUB_TOKEN="$GITHUB_TOKEN" \ - TARGET_TAG="$CIRCLE_TAG" \ - REPO_OWNER="$CIRCLE_PROJECT_USERNAME" \ - REPO_NAME="$CIRCLE_PROJECT_REPONAME" \ - CONTINUE_ON_ERROR="false" \ - npx https://github.com/TobikoData/circleci-gh-conventional-release - - ui-build: - docker: - - image: cimg/node:20.19.0 - resource_class: medium - steps: - - checkout - - run: - name: Install Dependencies - command: | - pnpm install - - run: - name: Build UI - command: pnpm --prefix web/client run build - - persist_to_workspace: - root: web/client - paths: - - dist - trigger_private_renovate: - docker: - - image: cimg/base:2021.11 - resource_class: small - steps: - - run: - name: Trigger private renovate - command: | - curl --request POST \ - --url $TOBIKO_PRIVATE_CIRCLECI_URL \ - --header "Circle-Token: $TOBIKO_PRIVATE_CIRCLECI_KEY" \ - --header "content-type: application/json" \ - --data '{ - "branch":"main", - "parameters":{ - "run_main_pr":false, - "run_sqlmesh_commit":false, - "run_renovate":true - } - }' - -workflows: - setup-workflow: - jobs: - - path-filtering/filter: - mapping: | - web/client/.* client true - (sqlmesh|tests|examples|web/server)/.* python true - pytest.ini|setup.cfg|setup.py|pyproject.toml python true - \.circleci/.*|Makefile|\.pre-commit-config\.yaml common true - vscode/extensions/.* vscode true - tag: "3.9" - - gh-release: - <<: *on_tag_filter - - ui-build: - <<: *on_main_or_tag_filter - - publish: - <<: *on_main_or_tag_filter - requires: - - ui-build - - trigger_private_renovate: - <<: *on_tag_filter - requires: - - publish diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml deleted file mode 100644 index e21f3d869b..0000000000 --- a/.circleci/continue_config.yml +++ /dev/null @@ -1,320 +0,0 @@ -version: 2.1 - -parameters: - client: - type: boolean - default: false - common: - type: boolean - default: false - python: - type: boolean - default: false - -orbs: - windows: circleci/windows@5.0 - -commands: - halt_unless_core: - steps: - - unless: - condition: - or: - - << pipeline.parameters.common >> - - << pipeline.parameters.python >> - - equal: [main, << pipeline.git.branch >>] - steps: - - run: circleci-agent step halt - halt_unless_client: - steps: - - unless: - condition: - or: - - << pipeline.parameters.common >> - - << pipeline.parameters.client >> - - equal: [main, << pipeline.git.branch >>] - steps: - - run: circleci-agent step halt - -jobs: - vscode_test: - docker: - - image: cimg/node:20.19.1-browsers - resource_class: small - steps: - - checkout - - run: - name: Install Dependencies - command: | - pnpm install - - run: - name: Run VSCode extension CI - command: | - cd vscode/extension - pnpm run ci - doc_tests: - docker: - - image: cimg/python:3.10 - resource_class: small - steps: - - halt_unless_core - - checkout - - run: - name: Install dependencies - command: make install-dev install-doc - - run: - name: Run doc tests - command: make doc-test - - style_and_cicd_tests: - parameters: - python_version: - type: string - docker: - - image: cimg/python:<< parameters.python_version >> - resource_class: large - environment: - PYTEST_XDIST_AUTO_NUM_WORKERS: 8 - steps: - - halt_unless_core - - checkout - - run: - name: Install OpenJDK - command: sudo apt-get update && sudo apt-get install default-jdk - - run: - name: Install ODBC - command: sudo apt-get install unixodbc-dev - - run: - name: Install SQLMesh dev dependencies - command: make install-dev - - run: - name: Fix Git URL override - command: git config --global --unset url."ssh://git@github.com".insteadOf - - run: - name: Run linters and code style checks - command: make py-style - - run: - name: Exercise the benchmarks - command: make benchmark-ci - - run: - name: Run cicd tests - command: make cicd-test - - store_test_results: - path: test-results - - cicd_tests_windows: - executor: - name: windows/default - size: large - steps: - - halt_unless_core - - run: - name: Enable symlinks in git config - command: git config --global core.symlinks true - - checkout - - run: - name: Install System Dependencies - command: | - choco install make which -y - refreshenv - - run: - name: Install SQLMesh dev dependencies - command: | - python -m venv venv - . ./venv/Scripts/activate - python.exe -m pip install --upgrade pip - make install-dev - - run: - name: Run fast unit tests - command: | - . ./venv/Scripts/activate - which python - python --version - make fast-test - - store_test_results: - path: test-results - - migration_test: - docker: - - image: cimg/python:3.10 - resource_class: small - environment: - SQLMESH__DISABLE_ANONYMIZED_ANALYTICS: "1" - steps: - - halt_unless_core - - checkout - - run: - name: Run the migration test - command: ./.circleci/test_migration.sh - - ui_style: - docker: - - image: cimg/node:20.19.0 - resource_class: small - steps: - - checkout - - restore_cache: - name: Restore pnpm Package Cache - keys: - - pnpm-packages-{{ checksum "pnpm-lock.yaml" }} - - run: - name: Install Dependencies - command: | - pnpm install - - save_cache: - name: Save pnpm Package Cache - key: pnpm-packages-{{ checksum "pnpm-lock.yaml" }} - paths: - - .pnpm-store - - run: - name: Run linters and code style checks - command: pnpm run lint - - ui_test: - docker: - - image: mcr.microsoft.com/playwright:v1.54.1-jammy - resource_class: medium - steps: - - halt_unless_client - - checkout - - restore_cache: - name: Restore pnpm Package Cache - keys: - - pnpm-packages-{{ checksum "pnpm-lock.yaml" }} - - run: - name: Install pnpm package manager - command: | - npm install --global corepack@latest - corepack enable - corepack prepare pnpm@latest-10 --activate - pnpm config set store-dir .pnpm-store - - run: - name: Install Dependencies - command: | - pnpm install - - save_cache: - name: Save pnpm Package Cache - key: pnpm-packages-{{ checksum "pnpm-lock.yaml" }} - paths: - - .pnpm-store - - run: - name: Run tests - command: npm --prefix web/client run test - - engine_tests_docker: - parameters: - engine: - type: string - machine: - image: ubuntu-2404:2024.05.1 - docker_layer_caching: true - resource_class: large - environment: - SQLMESH__DISABLE_ANONYMIZED_ANALYTICS: "1" - steps: - - halt_unless_core - - checkout - - run: - name: Install OS-level dependencies - command: ./.circleci/install-prerequisites.sh "<< parameters.engine >>" - - run: - name: Run tests - command: make << parameters.engine >>-test - no_output_timeout: 20m - - store_test_results: - path: test-results - - engine_tests_cloud: - parameters: - engine: - type: string - docker: - - image: cimg/python:3.12 - resource_class: medium - environment: - PYTEST_XDIST_AUTO_NUM_WORKERS: 4 - SQLMESH__DISABLE_ANONYMIZED_ANALYTICS: "1" - steps: - - halt_unless_core - - checkout - - run: - name: Install OS-level dependencies - command: ./.circleci/install-prerequisites.sh "<< parameters.engine >>" - - run: - name: Generate database name - command: | - UUID=`cat /proc/sys/kernel/random/uuid` - TEST_DB_NAME="circleci_${UUID:0:8}" - echo "export TEST_DB_NAME='$TEST_DB_NAME'" >> "$BASH_ENV" - echo "export SNOWFLAKE_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" - echo "export DATABRICKS_CATALOG='$TEST_DB_NAME'" >> "$BASH_ENV" - echo "export REDSHIFT_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" - echo "export GCP_POSTGRES_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" - echo "export FABRIC_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" - - run: - name: Create test database - command: ./.circleci/manage-test-db.sh << parameters.engine >> "$TEST_DB_NAME" up - - run: - name: Run tests - command: | - make << parameters.engine >>-test - no_output_timeout: 20m - - run: - name: Tear down test database - command: ./.circleci/manage-test-db.sh << parameters.engine >> "$TEST_DB_NAME" down - when: always - - store_test_results: - path: test-results - -workflows: - main_pr: - jobs: - - doc_tests - - style_and_cicd_tests: - matrix: - parameters: - python_version: - - "3.9" - - "3.10" - - "3.11" - - "3.12" - - "3.13" - - cicd_tests_windows - - engine_tests_docker: - name: engine_<< matrix.engine >> - matrix: - parameters: - engine: - - duckdb - - postgres - - mysql - - mssql - - trino - - spark - - clickhouse - - risingwave - - engine_tests_cloud: - name: cloud_engine_<< matrix.engine >> - context: - - sqlmesh_cloud_database_integration - requires: - - engine_tests_docker - matrix: - parameters: - engine: - - snowflake - - databricks - - redshift - - bigquery - - clickhouse-cloud - - athena - - fabric - - gcp-postgres - filters: - branches: - only: - - main - - ui_style - - ui_test - - vscode_test - - migration_test diff --git a/.circleci/test_migration.sh b/.circleci/test_migration.sh deleted file mode 100755 index a85d933bd3..0000000000 --- a/.circleci/test_migration.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -set -ex - -GATEWAY_NAME="duckdb_persistent" -TMP_DIR=$(mktemp -d) -SUSHI_DIR="$TMP_DIR/sushi" - - -if [[ -z $(git tag --points-at HEAD) ]]; then - # If the current commit is not tagged, we need to find the last tag - LAST_TAG=$(git describe --tags --abbrev=0) -else - # If the current commit is tagged, we need to find the previous tag - LAST_TAG=$(git tag --sort=-creatordate | head -n 2 | tail -n 1) -fi - -git checkout $LAST_TAG - -# Install dependencies from the previous release. -make install-dev - -cp -r ./examples/sushi $TMP_DIR - -# Run initial plan -pushd $SUSHI_DIR -rm -rf ./data/* -sqlmesh --gateway $GATEWAY_NAME plan --no-prompts --auto-apply -rm -rf .cache -popd - -# Switch back to the starting state of the repository -git checkout - - -# Install updated dependencies. -make install-dev - -# Migrate and make sure the diff is empty -pushd $SUSHI_DIR -sqlmesh --gateway $GATEWAY_NAME migrate -sqlmesh --gateway $GATEWAY_NAME diff prod -popd diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..7585f0ce10 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,16 @@ +## Description + + + +## Test Plan + + + +## Checklist + +- [ ] I have run `make style` and fixed any issues +- [ ] I have added tests for my changes (if applicable) +- [ ] All existing tests pass (`make fast-test`) +- [ ] My commits are signed off (`git commit -s`) per the [DCO](DCO) + + diff --git a/.circleci/install-prerequisites.sh b/.github/scripts/install-prerequisites.sh similarity index 89% rename from .circleci/install-prerequisites.sh rename to .github/scripts/install-prerequisites.sh index 446221dba6..6ab602fc37 100755 --- a/.circleci/install-prerequisites.sh +++ b/.github/scripts/install-prerequisites.sh @@ -1,6 +1,6 @@ #!/bin/bash -# This script is intended to be run by an Ubuntu build agent on CircleCI +# This script is intended to be run by an Ubuntu CI build agent # The goal is to install OS-level dependencies that are required before trying to install Python dependencies set -e @@ -25,7 +25,7 @@ elif [ "$ENGINE" == "fabric" ]; then sudo dpkg -i packages-microsoft-prod.deb rm packages-microsoft-prod.deb - ENGINE_DEPENDENCIES="msodbcsql18" + ENGINE_DEPENDENCIES="msodbcsql18" fi ALL_DEPENDENCIES="$COMMON_DEPENDENCIES $ENGINE_DEPENDENCIES" @@ -39,4 +39,4 @@ if [ "$ENGINE" == "spark" ]; then java -version fi -echo "All done" \ No newline at end of file +echo "All done" diff --git a/.circleci/manage-test-db.sh b/.github/scripts/manage-test-db.sh similarity index 77% rename from .circleci/manage-test-db.sh rename to .github/scripts/manage-test-db.sh index f90b567ce8..29d11afcc0 100755 --- a/.circleci/manage-test-db.sh +++ b/.github/scripts/manage-test-db.sh @@ -25,7 +25,7 @@ function_exists() { # Snowflake snowflake_init() { echo "Installing Snowflake CLI" - pip install "snowflake-cli-labs<3.8.0" + pip install "snowflake-cli" } snowflake_up() { @@ -40,20 +40,6 @@ snowflake_down() { databricks_init() { echo "Installing Databricks CLI" curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh || true - - echo "Writing out Databricks CLI config file" - echo -e "[DEFAULT]\nhost = $DATABRICKS_SERVER_HOSTNAME\ntoken = $DATABRICKS_ACCESS_TOKEN" > ~/.databrickscfg - - # this takes a path like 'sql/protocolv1/o/2934659247569/0723-005339-foobar' and extracts '0723-005339-foobar' from it - CLUSTER_ID=${DATABRICKS_HTTP_PATH##*/} - - echo "Extracted cluster id: $CLUSTER_ID from '$DATABRICKS_HTTP_PATH'" - - # Note: the cluster doesnt need to be running to create / drop catalogs, but it does need to be running to run the integration tests - echo "Ensuring cluster is running" - # the || true is to prevent the following error from causing an abort: - # > Error: is in unexpected state Running. - databricks clusters start $CLUSTER_ID || true } databricks_up() { @@ -82,10 +68,10 @@ redshift_down() { EXIT_CODE=1 ATTEMPTS=0 while [ $EXIT_CODE -ne 0 ] && [ $ATTEMPTS -lt 5 ]; do - # note: sometimes this pg_terminate_backend() call can randomly fail with: ERROR: Insufficient privileges + # note: sometimes this pg_terminate_backend() call can randomly fail with: ERROR: Insufficient privileges # if it does, let's proceed with the drop anyway rather than aborting and never attempting the drop redshift_exec "select pg_terminate_backend(procpid) from pg_stat_activity where datname = '$1'" || true - + # perform drop redshift_exec "drop database $1;" && EXIT_CODE=$? || EXIT_CODE=$? if [ $EXIT_CODE -ne 0 ]; then @@ -117,14 +103,16 @@ clickhouse-cloud_init() { # GCP Postgres gcp-postgres_init() { - # Download and start Cloud SQL Proxy - curl -fsSL -o cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.18.0/cloud-sql-proxy.linux.amd64 - chmod +x cloud-sql-proxy + # Download Cloud SQL Proxy if not already present + if [ ! -f cloud-sql-proxy ]; then + curl -fsSL -o cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.18.0/cloud-sql-proxy.linux.amd64 + chmod +x cloud-sql-proxy + fi echo "$GCP_POSTGRES_KEYFILE_JSON" > /tmp/keyfile.json - ./cloud-sql-proxy --credentials-file /tmp/keyfile.json $GCP_POSTGRES_INSTANCE_CONNECTION_STRING & - - # Wait for proxy to start - sleep 5 + if ! pgrep -x cloud-sql-proxy > /dev/null; then + ./cloud-sql-proxy --credentials-file /tmp/keyfile.json $GCP_POSTGRES_INSTANCE_CONNECTION_STRING & + sleep 5 + fi } gcp-postgres_exec() { @@ -140,13 +128,13 @@ gcp-postgres_down() { } # Fabric -fabric_init() { +fabric_init() { python --version #note: as at 2025-08-20, ms-fabric-cli is pinned to Python >= 3.10, <3.13 pip install ms-fabric-cli - + # to prevent the '[EncryptionFailed] An error occurred with the encrypted cache.' error # ref: https://microsoft.github.io/fabric-cli/#switch-to-interactive-mode-optional - fab config set encryption_fallback_enabled true + fab config set encryption_fallback_enabled true echo "Logging in to Fabric" fab auth login -u $FABRIC_CLIENT_ID -p $FABRIC_CLIENT_SECRET --tenant $FABRIC_TENANT_ID diff --git a/.github/scripts/test_migration.sh b/.github/scripts/test_migration.sh new file mode 100755 index 0000000000..ec45772c73 --- /dev/null +++ b/.github/scripts/test_migration.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -ex + +if [[ -z $(git tag --points-at HEAD) ]]; then + # If the current commit is not tagged, we need to find the last tag + LAST_TAG=$(git describe --tags --abbrev=0) +else + # If the current commit is tagged, we need to find the previous tag + LAST_TAG=$(git tag --sort=-creatordate | head -n 2 | tail -n 1) +fi + +if [ "$1" == "" ]; then + echo "Usage: $0 " + echo "eg $0 sushi '--gateway duckdb_persistent'" + exit 1 +fi + + +TMP_DIR=$(mktemp -d) +EXAMPLE_NAME="$1" +SQLMESH_OPTS="$2" +EXAMPLE_DIR="./examples/$EXAMPLE_NAME" +TEST_DIR="$TMP_DIR/$EXAMPLE_NAME" + +echo "Running migration test for '$EXAMPLE_NAME' in '$TEST_DIR' for example project '$EXAMPLE_DIR' using options '$SQLMESH_OPTS'" + +# Copy the example project from the *current* checkout so it's stable across old/new SQLMesh versions +cp -r "$EXAMPLE_DIR" "$TEST_DIR" + +git checkout $LAST_TAG + +# Install dependencies from the previous release. +uv venv .venv --clear +source .venv/bin/activate +make install-dev + +# this is only needed temporarily until the released tag for $LAST_TAG includes this config +if [ "$EXAMPLE_NAME" == "sushi_dbt" ]; then + echo 'migration_test_config = sqlmesh_config(Path(__file__).parent, dbt_target_name="duckdb")' >> $TEST_DIR/config.py +fi + +# Run initial plan +pushd $TEST_DIR +rm -rf ./data/* +sqlmesh $SQLMESH_OPTS plan --no-prompts --auto-apply +rm -rf .cache +popd + +# Switch back to the starting state of the repository +git checkout - + +# Install updated dependencies. +uv venv .venv --clear +source .venv/bin/activate +make install-dev + +# Migrate and make sure the diff is empty +pushd $TEST_DIR +sqlmesh $SQLMESH_OPTS migrate +sqlmesh $SQLMESH_OPTS diff prod +popd diff --git a/.circleci/update-pypirc.sh b/.github/scripts/update-pypirc.sh similarity index 100% rename from .circleci/update-pypirc.sh rename to .github/scripts/update-pypirc.sh diff --git a/.circleci/wait-for-db.sh b/.github/scripts/wait-for-db.sh similarity index 98% rename from .circleci/wait-for-db.sh rename to .github/scripts/wait-for-db.sh index a313320279..07502e3898 100755 --- a/.circleci/wait-for-db.sh +++ b/.github/scripts/wait-for-db.sh @@ -80,4 +80,4 @@ while [ $EXIT_CODE -ne 0 ]; do fi done -echo "$ENGINE is ready!" \ No newline at end of file +echo "$ENGINE is ready!" diff --git a/.github/workflows/dco.yml b/.github/workflows/dco.yml new file mode 100644 index 0000000000..a1c4e07300 --- /dev/null +++ b/.github/workflows/dco.yml @@ -0,0 +1,17 @@ +name: Sanity check +on: [pull_request] + +jobs: + commits_check_job: + runs-on: ubuntu-latest + name: Commits Check + steps: + - name: Get PR Commits + id: 'get-pr-commits' + uses: tim-actions/get-pr-commits@master + with: + token: ${{ secrets.GITHUB_TOKEN }} + - name: DCO Check + uses: tim-actions/dco@master + with: + commits: ${{ steps.get-pr-commits.outputs.commits }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2e94855d3c..4395c56313 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -6,18 +6,399 @@ on: branches: - main concurrency: - group: 'pr-${{ github.event.pull_request.number }}' + group: pr-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true permissions: contents: read jobs: + changes: + runs-on: ubuntu-latest + outputs: + python: ${{ steps.filter.outputs.python }} + client: ${{ steps.filter.outputs.client }} + ci: ${{ steps.filter.outputs.ci }} + steps: + - uses: actions/checkout@v5 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + python: + - 'sqlmesh/**' + - 'tests/**' + - 'examples/**' + - 'web/server/**' + - 'pytest.ini' + - 'setup.cfg' + - 'setup.py' + - 'pyproject.toml' + client: + - 'web/client/**' + ci: + - '.github/**' + - 'Makefile' + - '.pre-commit-config.yaml' + + doc-tests: + needs: changes + if: + needs.changes.outputs.python == 'true' || needs.changes.outputs.ci == + 'true' || github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + env: + UV: '1' + steps: + - uses: actions/checkout@v5 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.10' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies + run: | + uv venv .venv + source .venv/bin/activate + make install-dev install-doc + - name: Run doc tests + run: | + source .venv/bin/activate + make doc-test + + style-and-cicd-tests: + needs: changes + if: + needs.changes.outputs.python == 'true' || needs.changes.outputs.ci == + 'true' || github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + env: + PYTEST_XDIST_AUTO_NUM_WORKERS: 2 + UV: '1' + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install OpenJDK and ODBC + run: + sudo apt-get update && sudo apt-get install -y default-jdk + unixodbc-dev + - name: Install SQLMesh dev dependencies + run: | + uv venv .venv + source .venv/bin/activate + make install-dev + - name: Fix Git URL override + run: + git config --global --unset url."ssh://git@github.com".insteadOf || + true + - name: Run linters and code style checks + run: | + source .venv/bin/activate + make py-style + - name: Exercise the benchmarks + if: matrix.python-version != '3.9' + run: | + source .venv/bin/activate + make benchmark-ci + - name: Run cicd tests + run: | + source .venv/bin/activate + make cicd-test + - name: Upload test results + uses: actions/upload-artifact@v5 + if: ${{ !cancelled() }} + with: + name: test-results-style-cicd-${{ matrix.python-version }} + path: test-results/ + retention-days: 7 + + cicd-tests-windows: + needs: changes + if: + needs.changes.outputs.python == 'true' || needs.changes.outputs.ci == + 'true' || github.ref == 'refs/heads/main' + runs-on: windows-latest + steps: + - name: Enable symlinks in git config + run: git config --global core.symlinks true + - uses: actions/checkout@v5 + - name: Install make + run: choco install make which -y + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + - name: Install SQLMesh dev dependencies + run: | + python -m venv venv + . ./venv/Scripts/activate + python.exe -m pip install --upgrade pip + make install-dev + - name: Run fast unit tests + run: | + . ./venv/Scripts/activate + which python + python --version + make fast-test + - name: Upload test results + uses: actions/upload-artifact@v5 + if: ${{ !cancelled() }} + with: + name: test-results-windows + path: test-results/ + retention-days: 7 + + migration-test: + needs: changes + if: + needs.changes.outputs.python == 'true' || needs.changes.outputs.ci == + 'true' || github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + env: + SQLMESH__DISABLE_ANONYMIZED_ANALYTICS: '1' + UV: '1' + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.10' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Run migration test - sushi + run: + ./.github/scripts/test_migration.sh sushi "--gateway + duckdb_persistent" + - name: Run migration test - sushi_dbt + run: + ./.github/scripts/test_migration.sh sushi_dbt "--config + migration_test_config" + + ui-style: + needs: [changes] + if: + needs.changes.outputs.client == 'true' || needs.changes.outputs.ci == + 'true' || github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-node@v6 + with: + node-version: '20' + - uses: pnpm/action-setup@v4 + with: + version: latest + - name: Get pnpm store directory + id: pnpm-cache + run: echo "store=$(pnpm store path)" >> $GITHUB_OUTPUT + - uses: actions/cache@v4 + with: + path: ${{ steps.pnpm-cache.outputs.store }} + key: pnpm-store-${{ hashFiles('pnpm-lock.yaml') }} + restore-keys: pnpm-store- + - name: Install dependencies + run: pnpm install + - name: Run linters and code style checks + run: pnpm run lint + + ui-test: + needs: changes + if: + needs.changes.outputs.client == 'true' || needs.changes.outputs.ci == + 'true' || github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + container: + image: mcr.microsoft.com/playwright:v1.54.1-jammy + steps: + - uses: actions/checkout@v5 + - name: Install pnpm via corepack + run: | + npm install --global corepack@latest + corepack enable + corepack prepare pnpm@latest-10 --activate + pnpm config set store-dir .pnpm-store + - name: Install dependencies + run: pnpm install + - name: Build UI + run: npm --prefix web/client run build + - name: Run unit tests + run: npm --prefix web/client run test:unit + - name: Run e2e tests + run: npm --prefix web/client run test:e2e + env: + PLAYWRIGHT_SKIP_BUILD: '1' + HOME: /root + + engine-tests-docker: + needs: changes + if: + needs.changes.outputs.python == 'true' || needs.changes.outputs.ci == + 'true' || github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + timeout-minutes: 25 + strategy: + fail-fast: false + matrix: + engine: + [duckdb, postgres, mysql, mssql, trino, spark, clickhouse, risingwave] + env: + PYTEST_XDIST_AUTO_NUM_WORKERS: 2 + SQLMESH__DISABLE_ANONYMIZED_ANALYTICS: '1' + UV: '1' + steps: + - uses: actions/checkout@v5 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install SQLMesh dev dependencies + run: | + uv venv .venv + source .venv/bin/activate + make install-dev + - name: Install OS-level dependencies + run: ./.github/scripts/install-prerequisites.sh "${{ matrix.engine }}" + - name: Run tests + run: | + source .venv/bin/activate + make ${{ matrix.engine }}-test + - name: Upload test results + uses: actions/upload-artifact@v5 + if: ${{ !cancelled() }} + with: + name: test-results-docker-${{ matrix.engine }} + path: test-results/ + retention-days: 7 + + engine-tests-cloud: + needs: engine-tests-docker + if: github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + timeout-minutes: 25 + strategy: + fail-fast: false + matrix: + engine: + [ + snowflake, + databricks, + redshift, + bigquery, + clickhouse-cloud, + athena, + fabric, + gcp-postgres, + ] + env: + PYTEST_XDIST_AUTO_NUM_WORKERS: 4 + SQLMESH__DISABLE_ANONYMIZED_ANALYTICS: '1' + UV: '1' + SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} + SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} + SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }} + SNOWFLAKE_AUTHENTICATOR: SNOWFLAKE_JWT + DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_SERVER_HOSTNAME }} + DATABRICKS_HOST: ${{ secrets.DATABRICKS_SERVER_HOSTNAME }} + DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }} + DATABRICKS_CLIENT_ID: ${{ secrets.DATABRICKS_CLIENT_ID }} + DATABRICKS_CLIENT_SECRET: ${{ secrets.DATABRICKS_CLIENT_SECRET }} + DATABRICKS_CONNECT_VERSION: ${{ secrets.DATABRICKS_CONNECT_VERSION }} + REDSHIFT_HOST: ${{ secrets.REDSHIFT_HOST }} + REDSHIFT_PORT: ${{ secrets.REDSHIFT_PORT }} + REDSHIFT_USER: ${{ secrets.REDSHIFT_USER }} + REDSHIFT_PASSWORD: ${{ secrets.REDSHIFT_PASSWORD }} + BIGQUERY_KEYFILE: ${{ secrets.BIGQUERY_KEYFILE }} + BIGQUERY_KEYFILE_CONTENTS: ${{ secrets.BIGQUERY_KEYFILE_CONTENTS }} + CLICKHOUSE_CLOUD_HOST: ${{ secrets.CLICKHOUSE_CLOUD_HOST }} + CLICKHOUSE_CLOUD_USERNAME: ${{ secrets.CLICKHOUSE_CLOUD_USERNAME }} + CLICKHOUSE_CLOUD_PASSWORD: ${{ secrets.CLICKHOUSE_CLOUD_PASSWORD }} + GCP_POSTGRES_KEYFILE_JSON: ${{ secrets.GCP_POSTGRES_KEYFILE_JSON }} + GCP_POSTGRES_INSTANCE_CONNECTION_STRING: + ${{ secrets.GCP_POSTGRES_INSTANCE_CONNECTION_STRING }} + GCP_POSTGRES_USER: ${{ secrets.GCP_POSTGRES_USER }} + GCP_POSTGRES_PASSWORD: ${{ secrets.GCP_POSTGRES_PASSWORD }} + ATHENA_S3_WAREHOUSE_LOCATION: ${{ secrets.ATHENA_S3_WAREHOUSE_LOCATION }} + ATHENA_WORK_GROUP: ${{ secrets.ATHENA_WORK_GROUP }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_REGION }} + FABRIC_HOST: ${{ secrets.FABRIC_HOST }} + FABRIC_CLIENT_ID: ${{ secrets.FABRIC_CLIENT_ID }} + FABRIC_CLIENT_SECRET: ${{ secrets.FABRIC_CLIENT_SECRET }} + FABRIC_TENANT_ID: ${{ secrets.FABRIC_TENANT_ID }} + FABRIC_WORKSPACE_ID: ${{ secrets.FABRIC_WORKSPACE_ID }} + steps: + - uses: actions/checkout@v5 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install OS-level dependencies + run: ./.github/scripts/install-prerequisites.sh "${{ matrix.engine }}" + - name: Install SQLMesh dev dependencies + run: | + uv venv .venv + source .venv/bin/activate + make install-dev + - name: Generate database name and setup credentials + run: | + UUID=$(cat /proc/sys/kernel/random/uuid) + TEST_DB_NAME="ci_${UUID:0:8}" + echo "TEST_DB_NAME=$TEST_DB_NAME" >> $GITHUB_ENV + echo "SNOWFLAKE_DATABASE=$TEST_DB_NAME" >> $GITHUB_ENV + echo "DATABRICKS_CATALOG=$TEST_DB_NAME" >> $GITHUB_ENV + echo "REDSHIFT_DATABASE=$TEST_DB_NAME" >> $GITHUB_ENV + echo "GCP_POSTGRES_DATABASE=$TEST_DB_NAME" >> $GITHUB_ENV + echo "FABRIC_DATABASE=$TEST_DB_NAME" >> $GITHUB_ENV + + echo "$SNOWFLAKE_PRIVATE_KEY_RAW" | base64 -d > /tmp/snowflake-keyfile.p8 + echo "SNOWFLAKE_PRIVATE_KEY_FILE=/tmp/snowflake-keyfile.p8" >> $GITHUB_ENV + env: + SNOWFLAKE_PRIVATE_KEY_RAW: ${{ secrets.SNOWFLAKE_PRIVATE_KEY_RAW }} + - name: Create test database + run: + ./.github/scripts/manage-test-db.sh "${{ matrix.engine }}" + "$TEST_DB_NAME" up + - name: Run tests + run: | + source .venv/bin/activate + make ${{ matrix.engine }}-test + - name: Tear down test database + if: always() + run: + ./.github/scripts/manage-test-db.sh "${{ matrix.engine }}" + "$TEST_DB_NAME" down + - name: Upload test results + uses: actions/upload-artifact@v5 + if: ${{ !cancelled() }} + with: + name: test-results-cloud-${{ matrix.engine }} + path: test-results/ + retention-days: 7 + test-vscode: env: PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 1 runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v6 with: node-version: '22' - uses: pnpm/action-setup@v4 @@ -30,9 +411,11 @@ jobs: test-vscode-e2e: runs-on: labels: [ubuntu-2204-8] + # As at 2026-01-12 this job flakes 100% of the time. It needs investigation + if: false steps: - uses: actions/checkout@v5 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v6 with: node-version: '22' - uses: pnpm/action-setup@v4 @@ -41,11 +424,11 @@ jobs: - name: Install dependencies run: pnpm install - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install uv - uses: astral-sh/setup-uv@v6 + uses: astral-sh/setup-uv@v7 - name: Install python dependencies run: | python -m venv .venv @@ -62,7 +445,7 @@ jobs: run: | source ../../.venv/bin/activate pnpm run test:e2e - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v5 if: ${{ !cancelled() }} with: name: playwright-report @@ -77,11 +460,11 @@ jobs: steps: - uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.10' - name: Install uv - uses: astral-sh/setup-uv@v6 + uses: astral-sh/setup-uv@v7 - name: Install SQLMesh dev dependencies run: | uv venv .venv @@ -98,30 +481,30 @@ jobs: if [[ "${{ matrix.dbt-version }}" == "1.3" ]] || \ [[ "${{ matrix.dbt-version }}" == "1.4" ]] || \ [[ "${{ matrix.dbt-version }}" == "1.5" ]]; then - + echo "DBT version is ${{ matrix.dbt-version }} (< 1.6.0), removing semantic_models and metrics sections..." - + schema_file="tests/fixtures/dbt/sushi_test/models/schema.yml" if [[ -f "$schema_file" ]]; then echo "Modifying $schema_file..." - + # Create a temporary file temp_file=$(mktemp) - + # Use awk to remove semantic_models and metrics sections awk ' /^semantic_models:/ { in_semantic=1; next } /^metrics:/ { in_metrics=1; next } - /^[^ ]/ && (in_semantic || in_metrics) { - in_semantic=0; - in_metrics=0 + /^[^ ]/ && (in_semantic || in_metrics) { + in_semantic=0; + in_metrics=0 } !in_semantic && !in_metrics { print } ' "$schema_file" > "$temp_file" - + # Move the temp file back mv "$temp_file" "$schema_file" - + echo "Successfully removed semantic_models and metrics sections" else echo "Schema file not found at $schema_file, skipping..." diff --git a/.github/workflows/private-repo-test.yaml b/.github/workflows/private-repo-test.yaml deleted file mode 100644 index 07253f1a00..0000000000 --- a/.github/workflows/private-repo-test.yaml +++ /dev/null @@ -1,97 +0,0 @@ -name: Private Repo Testing - -on: - pull_request_target: - branches: - - main - -concurrency: - group: 'private-test-${{ github.event.pull_request.number }}' - cancel-in-progress: true - -permissions: - contents: read - -jobs: - trigger-private-test: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v5 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.sha || github.ref }} - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Install uv - uses: astral-sh/setup-uv@v6 - - name: Set up Node.js for UI build - uses: actions/setup-node@v4 - with: - node-version: '20' - - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - version: latest - - name: Install UI dependencies - run: pnpm install - - name: Build UI - run: pnpm --prefix web/client run build - - name: Install Python dependencies - run: | - python -m venv .venv - source .venv/bin/activate - pip install build twine setuptools_scm - - name: Generate development version - id: version - run: | - source .venv/bin/activate - # Generate a PEP 440 compliant unique version including run attempt - BASE_VERSION=$(python .github/scripts/get_scm_version.py) - COMMIT_SHA=$(git rev-parse --short HEAD) - # Use PEP 440 compliant format: base.devN+pr.sha.attempt - UNIQUE_VERSION="${BASE_VERSION}+pr${{ github.event.pull_request.number }}.${COMMIT_SHA}.run${{ github.run_attempt }}" - echo "version=$UNIQUE_VERSION" >> $GITHUB_OUTPUT - echo "Generated unique version with run attempt: $UNIQUE_VERSION" - - name: Build package - env: - SETUPTOOLS_SCM_PRETEND_VERSION: ${{ steps.version.outputs.version }} - run: | - source .venv/bin/activate - python -m build - - name: Configure PyPI for private repository - env: - TOBIKO_PRIVATE_PYPI_URL: ${{ secrets.TOBIKO_PRIVATE_PYPI_URL }} - TOBIKO_PRIVATE_PYPI_KEY: ${{ secrets.TOBIKO_PRIVATE_PYPI_KEY }} - run: ./.circleci/update-pypirc.sh - - name: Publish to private PyPI - run: | - source .venv/bin/activate - python -m twine upload -r tobiko-private dist/* - - name: Publish Python Tests package - env: - SETUPTOOLS_SCM_PRETEND_VERSION: ${{ steps.version.outputs.version }} - run: | - source .venv/bin/activate - unset TWINE_USERNAME TWINE_PASSWORD && make publish-tests - - name: Get GitHub App token - id: get_token - uses: actions/create-github-app-token@v2 - with: - private-key: ${{ secrets.TOBIKO_RENOVATE_BOT_PRIVATE_KEY }} - app-id: ${{ secrets.TOBIKO_RENOVATE_BOT_APP_ID }} - owner: ${{ secrets.PRIVATE_REPO_OWNER }} - - name: Trigger private repository workflow - uses: convictional/trigger-workflow-and-wait@v1.6.5 - with: - owner: ${{ secrets.PRIVATE_REPO_OWNER }} - repo: ${{ secrets.PRIVATE_REPO_NAME }} - github_token: ${{ steps.get_token.outputs.token }} - workflow_file_name: ${{ secrets.PRIVATE_WORKFLOW_FILE }} - client_payload: | - { - "package_version": "${{ steps.version.outputs.version }}", - "pr_number": "${{ github.event.pull_request.number }}" - } diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000000..75512ffd72 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,71 @@ +name: Release +on: + push: + tags: + - 'v*.*.*' +permissions: + contents: write +jobs: + ui-build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-node@v6 + with: + node-version: '20' + - uses: pnpm/action-setup@v4 + with: + version: latest + - name: Install dependencies + run: pnpm install + - name: Build UI + run: pnpm --prefix web/client run build + - name: Upload UI build artifact + uses: actions/upload-artifact@v5 + with: + name: ui-dist + path: web/client/dist/ + retention-days: 1 + + publish: + needs: ui-build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - name: Download UI build artifact + uses: actions/download-artifact@v4 + with: + name: ui-dist + path: web/client/dist/ + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.10' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install build dependencies + run: pip install build twine setuptools_scm + - name: Publish Python package + run: make publish + env: + TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} + TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} + - name: Update pypirc for private repository + run: ./.github/scripts/update-pypirc.sh + env: + TOBIKO_PRIVATE_PYPI_URL: ${{ secrets.TOBIKO_PRIVATE_PYPI_URL }} + TOBIKO_PRIVATE_PYPI_KEY: ${{ secrets.TOBIKO_PRIVATE_PYPI_KEY }} + - name: Publish Python Tests package + run: unset TWINE_USERNAME TWINE_PASSWORD && make publish-tests + + gh-release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + - name: Create release on GitHub + uses: softprops/action-gh-release@v2 + with: + generate_release_notes: true + tag_name: ${{ github.ref_name }} diff --git a/.github/workflows/release_extension.yaml b/.github/workflows/release_extension.yaml index bb52c32966..ed46d40d47 100644 --- a/.github/workflows/release_extension.yaml +++ b/.github/workflows/release_extension.yaml @@ -28,7 +28,7 @@ jobs: fi echo "Version format is valid: $version" - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: '20' - name: Install pnpm diff --git a/.github/workflows/release_shared_js.yaml b/.github/workflows/release_shared_js.yaml index 96992ae637..eb68163739 100644 --- a/.github/workflows/release_shared_js.yaml +++ b/.github/workflows/release_shared_js.yaml @@ -31,7 +31,7 @@ jobs: fi echo "Version format is valid: $version" - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: '20' registry-url: 'https://registry.npmjs.org' diff --git a/.gitignore b/.gitignore index 72b41b5ce1..16593984dd 100644 --- a/.gitignore +++ b/.gitignore @@ -138,6 +138,12 @@ dmypy.json *~ *# +# Vim +*.swp +*.swo +.null-ls* + + *.duckdb *.duckdb.wal @@ -158,3 +164,4 @@ spark-warehouse/ # claude .claude/ + diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..287a87dab5 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,5 @@ +# Code of Conduct + +SQLMesh follows the [LF Projects Code of Conduct](https://lfprojects.org/policies/code-of-conduct/). All participants in the project are expected to abide by it. + +If you believe someone is violating the code of conduct, please report it by following the instructions in the [LF Projects Code of Conduct](https://lfprojects.org/policies/code-of-conduct/). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..0e1d8e1c6e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,90 @@ +# Contributing to SQLMesh + +## Welcome + +SQLMesh is a project of the Linux Foundation. We welcome contributions from anyone — whether you're fixing a bug, improving documentation, or proposing a new feature. + +## Technical Steering Committee (TSC) + +The TSC is responsible for technical oversight of the SQLMesh project, including coordinating technical direction, approving contribution policies, and maintaining community norms. + +Initial TSC voting members are the project's Maintainers: + +| Name | GitHub Handle | Affiliation | Role | +|---------------------|---------------|----------------|------------| +| Alexander Butler | z3z1ma | Harness | TSC Member | +| Alexander Filipchik | afilipchik | Cloud Kitchens | TSC Member | +| Reid Hooper | rhooper9711 | Benzinga | TSC Member | +| Yuki Kakegawa | StuffbyYuki | Jump.ai | TSC Member | +| Toby Mao | tobymao | Fivetran | TSC Chair | +| Alex Wilde | alexminerv | Minerva | TSC Member | + + +## Roles + +**Contributors**: Anyone who contributes code, documentation, or other technical artifacts to the project. + +**Maintainers**: Contributors who have earned the ability to modify source code, documentation, or other technical artifacts. A Contributor may become a Maintainer by majority approval of the TSC. A Maintainer may be removed by majority approval of the TSC. + +## How to Contribute + +1. Fork the repository on GitHub +2. Create a branch for your changes +3. Make your changes and commit them with a sign-off (see DCO section below) +4. Submit a pull request against the `main` branch + +File issues at [github.com/sqlmesh/sqlmesh/issues](https://github.com/sqlmesh/sqlmesh/issues). + +## Developer Certificate of Origin (DCO) + +All contributions must include a `Signed-off-by` line in the commit message per the [Developer Certificate of Origin](DCO). This certifies that you wrote the contribution or have the right to submit it under the project's open source license. + +Use `git commit -s` to add the sign-off automatically: + +```bash +git commit -s -m "Your commit message" +``` + +To fix a commit that is missing the sign-off: + +```bash +git commit --amend -s +``` + +To add a sign-off to multiple commits: + +```bash +git rebase HEAD~N --signoff +``` + +## Development Setup + +See [docs/development.md](docs/development.md) for full setup instructions. Key commands: + +```bash +python -m venv .venv +source .venv/bin/activate +make install-dev +make style # Run before submitting +make fast-test # Quick test suite +``` + +## Coding Standards + +- Run `make style` before submitting a pull request +- Follow existing code patterns and conventions in the codebase +- New files should include an SPDX license header: + ```python + # SPDX-License-Identifier: Apache-2.0 + ``` + +## Pull Request Process + +- Describe your changes clearly in the pull request description +- Ensure all CI checks pass +- Include a DCO sign-off on all commits (`git commit -s`) +- Be responsive to review feedback from maintainers + +## Licensing + +Code contributions are licensed under the [Apache License 2.0](LICENSE). Documentation contributions are licensed under [Creative Commons Attribution 4.0 International (CC-BY-4.0)](https://creativecommons.org/licenses/by/4.0/). See the LICENSE file and the [technical charter](sqlmesh-technical-charter.pdf) for details. diff --git a/DCO b/DCO new file mode 100644 index 0000000000..49b8cb0549 --- /dev/null +++ b/DCO @@ -0,0 +1,34 @@ +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. diff --git a/GOVERNANCE.md b/GOVERNANCE.md new file mode 100644 index 0000000000..44b6bc9947 --- /dev/null +++ b/GOVERNANCE.md @@ -0,0 +1,62 @@ +# SQLMesh Project Governance + +## Overview + +SQLMesh is a Series of LF Projects, LLC. The project is governed by its [Technical Charter](sqlmesh-technical-charter.pdf) and overseen by the Technical Steering Committee (TSC). SQLMesh is a project of the [Linux Foundation](https://www.linuxfoundation.org/). + +## Technical Steering Committee + +The TSC is responsible for all technical oversight of the project, including: + +- Coordinating the technical direction of the project +- Approving project or system proposals +- Organizing sub-projects and removing sub-projects +- Creating sub-committees or working groups to focus on cross-project technical issues +- Appointing representatives to work with other open source or open standards communities +- Establishing community norms, workflows, issuing releases, and security vulnerability reports +- Approving and implementing policies for contribution requirements +- Coordinating any marketing, events, or communications regarding the project + +## TSC Composition + +TSC voting members are initially the project's Maintainers as listed in [CONTRIBUTING.md](CONTRIBUTING.md). The TSC may elect a Chair from among its voting members. The Chair presides over TSC meetings and serves as the primary point of contact with the Linux Foundation. + +## Decision Making + +The project operates as a consensus-based community. When a formal vote is required: + +- Each voting TSC member receives one vote +- A quorum of 50% of voting members is required to conduct a vote +- Decisions are made by a majority of those present when quorum is met +- Electronic votes (e.g., via GitHub issues or mailing list) require a majority of all voting members to pass +- Votes that do not meet quorum or remain unresolved may be referred to the Series Manager for resolution + +## Charter Amendments + +The technical charter may be amended by a two-thirds vote of the entire TSC, subject to approval by LF Projects, LLC. + +## Reference + +The full technical charter is available at [sqlmesh-technical-charter.pdf](sqlmesh-technical-charter.pdf). + +# TSC Meeting Minutes + +## 2026-03-10 — Initial TSC Meeting + +**Members present:** Toby Mao (tobymao) + +### Vote 1: Elect Toby Mao as TSC Chair +- **Motion by:** Toby Mao +- **Votes:** Toby Mao: Yes +- **Result:** Approved (1-0-0, yes-no-abstain) + +### Vote 2: Elect TSC founding members +- **Question:** Shall the following members be added to the TSC? + - Alexander Butler (z3z1ma) + - Alexander Filipchik (afilipchik) + - Reid Hooper (rhooper9711) + - Yuki Kakegawa (StuffbyYuki) + - Alex Wilde (alexminerv) +- **Motion by:** Toby Mao +- **Votes:** Toby Mao: Yes +- **Result:** Approved (1-0-0, yes-no-abstain) diff --git a/LICENSE b/LICENSE index eabfad022a..7e95724816 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2024 Tobiko Data Inc. + Copyright Contributors to the SQLMesh project Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/Makefile b/Makefile index 96305c4bfb..843beb0624 100644 --- a/Makefile +++ b/Makefile @@ -36,11 +36,16 @@ install-dev-dbt-%: if [ "$$version" = "1.10.0" ]; then \ echo "Applying special handling for dbt 1.10.0"; \ $(SED_INPLACE) -E 's/"(dbt-core)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \ - $(SED_INPLACE) -E 's/"(dbt-(bigquery|duckdb|snowflake|athena-community|clickhouse|databricks|redshift|trino))[^"]*"/"\1"/g' pyproject.toml; \ + $(SED_INPLACE) -E 's/"(dbt-(bigquery|duckdb|snowflake|athena-community|clickhouse|redshift|trino))[^"]*"/"\1"/g' pyproject.toml; \ + $(SED_INPLACE) -E 's/"(dbt-databricks)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \ else \ echo "Applying version $$version to all dbt packages"; \ $(SED_INPLACE) -E 's/"(dbt-[^"><=~!]+)[^"]*"/"\1~='"$$version"'"/g' pyproject.toml; \ fi; \ + if printf '%s\n' "$$version" | awk -F. '{ if ($$1 == 1 && (($$2 >= 3 && $$2 <= 5) || $$2 == 10)) exit 0; exit 1 }'; then \ + echo "Applying numpy<2 constraint for dbt $$version"; \ + $(SED_INPLACE) 's/"numpy"/"numpy<2"/g' pyproject.toml; \ + fi; \ $(MAKE) install-dev; \ if [ "$$version" = "1.6.0" ]; then \ echo "Applying overrides for dbt 1.6.0"; \ @@ -50,6 +55,14 @@ install-dev-dbt-%: echo "Applying overrides for dbt 1.7.0"; \ $(PIP) install 'databricks-sdk==0.28.0' --reinstall; \ fi; \ + if [ "$$version" = "1.5.0" ]; then \ + echo "Applying overrides for dbt 1.5.0"; \ + $(PIP) install 'dbt-databricks==1.5.6' 'numpy<2' --reinstall; \ + fi; \ + if [ "$$version" = "1.3.0" ]; then \ + echo "Applying overrides for dbt $$version - upgrading google-cloud-bigquery"; \ + $(PIP) install 'google-cloud-bigquery>=3.0.0' --upgrade; \ + fi; \ mv pyproject.toml.backup pyproject.toml; \ echo "Restored original pyproject.toml" @@ -98,6 +111,9 @@ ui-build: clean-build: rm -rf build/ && rm -rf dist/ && rm -rf *.egg-info +clear-caches: + find . -type d -name ".cache" -exec rm -rf {} + 2>/dev/null && echo "Successfully removed all .cache directories" + dev-publish: ui-build clean-build publish jupyter-example: @@ -108,13 +124,13 @@ engine-up: engine-clickhouse-up engine-mssql-up engine-mysql-up engine-postgres- engine-down: engine-clickhouse-down engine-mssql-down engine-mysql-down engine-postgres-down engine-spark-down engine-trino-down fast-test: - pytest -n auto -m "fast and not cicdonly" --junitxml=test-results/junit-fast-test.xml && pytest -m "isolated" && pytest -m "registry_isolation" + pytest -n auto -m "fast and not cicdonly" --junitxml=test-results/junit-fast-test.xml && pytest -m "isolated" && pytest -m "registry_isolation" && pytest -m "dialect_isolated" slow-test: - pytest -n auto -m "(fast or slow) and not cicdonly" && pytest -m "isolated" && pytest -m "registry_isolation" + pytest -n auto -m "(fast or slow) and not cicdonly" && pytest -m "isolated" && pytest -m "registry_isolation" && pytest -m "dialect_isolated" cicd-test: - pytest -n auto -m "fast or slow" --junitxml=test-results/junit-cicd.xml && pytest -m "isolated" && pytest -m "registry_isolation" + pytest -n auto -m "(fast or slow) and not pyspark" --junitxml=test-results/junit-cicd.xml && pytest -m "pyspark" && pytest -m "isolated" && pytest -m "registry_isolation" && pytest -m "dialect_isolated" core-fast-test: pytest -n auto -m "fast and not web and not github and not dbt and not jupyter" @@ -150,7 +166,7 @@ web-test: pytest -n auto -m "web" guard-%: - @ if [ "${${*}}" = "" ]; then \ + @ if ! printenv ${*} > /dev/null 2>&1; then \ echo "Environment variable $* not set"; \ exit 1; \ fi @@ -160,7 +176,7 @@ engine-%-install: engine-docker-%-up: docker compose -f ./tests/core/engine_adapter/integration/docker/compose.${*}.yaml up -d - ./.circleci/wait-for-db.sh ${*} + ./.github/scripts/wait-for-db.sh ${*} engine-%-up: engine-%-install engine-docker-%-up @echo "Engine '${*}' is up and running" @@ -200,14 +216,14 @@ risingwave-test: engine-risingwave-up # Cloud Engines # ################# -snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER guard-SNOWFLAKE_PASSWORD engine-snowflake-install +snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER engine-snowflake-install pytest -n auto -m "snowflake" --reruns 3 --junitxml=test-results/junit-snowflake.xml bigquery-test: guard-BIGQUERY_KEYFILE engine-bigquery-install $(PIP) install -e ".[bigframes]" pytest -n auto -m "bigquery" --reruns 3 --junitxml=test-results/junit-bigquery.xml -databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_ACCESS_TOKEN guard-DATABRICKS_CONNECT_VERSION engine-databricks-install +databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_CONNECT_VERSION engine-databricks-install $(PIP) install 'databricks-connect==${DATABRICKS_CONNECT_VERSION}' pytest -n auto -m "databricks" --reruns 3 --junitxml=test-results/junit-databricks.xml diff --git a/README.md b/README.md index 3215f7cceb..41f78cc138 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@

SQLMesh logo

+

SQLMesh is a project of the Linux Foundation.

SQLMesh is a next-generation data transformation framework designed to ship data quickly, efficiently, and without error. Data teams can run and deploy data transformations written in SQL or Python with visibility and control at any size. @@ -12,7 +13,7 @@ It is more than just a [dbt alternative](https://tobikodata.com/reduce_costs_wit ## Core Features -SQLMesh Plan Mode +SQLMesh Plan Mode > Get instant SQL impact and context of your changes, both in the CLI and in the [SQLMesh VSCode Extension](https://sqlmesh.readthedocs.io/en/latest/guides/vscode/?h=vs+cod) @@ -121,19 +122,19 @@ outputs: * Never build a table [more than once](https://tobikodata.com/simplicity-or-efficiency-how-dbt-makes-you-choose.html) * Track what data’s been modified and run only the necessary transformations for [incremental models](https://tobikodata.com/correctly-loading-incremental-data-at-scale.html) * Run [unit tests](https://tobikodata.com/we-need-even-greater-expectations.html) for free and configure automated audits -* Run [table diffs](https://sqlmesh.readthedocs.io/en/stable/examples/sqlmesh_cli_crash_course/?h=crash#run-data-diff-against-prod) between prod and dev based on tables/views impacted by a change +* Run [table diffs](https://sqlmesh.readthedocs.io/en/stable/examples/sqlmesh_cli_crash_course/?h=crash#run-data-diff-against-prod) between prod and dev based on tables/views impacted by a change
Level Up Your SQL Write SQL in any dialect and SQLMesh will transpile it to your target SQL dialect on the fly before sending it to the warehouse. -Transpile Example +Transpile Example
* Debug transformation errors *before* you run them in your warehouse in [10+ different SQL dialects](https://sqlmesh.readthedocs.io/en/stable/integrations/overview/#execution-engines) * Definitions using [simply SQL](https://sqlmesh.readthedocs.io/en/stable/concepts/models/sql_models/#sql-based-definition) (no need for redundant and confusing `Jinja` + `YAML`) * See impact of changes before you run them in your warehouse with column-level lineage -For more information, check out the [website](https://www.tobikodata.com/sqlmesh) and [documentation](https://sqlmesh.readthedocs.io/en/stable/). +For more information, check out the [documentation](https://sqlmesh.readthedocs.io/en/stable/). ## Getting Started Install SQLMesh through [pypi](https://pypi.org/project/sqlmesh/) by running: @@ -169,21 +170,24 @@ sqlmesh init # follow the prompts to get started (choose DuckDB) Follow the [quickstart guide](https://sqlmesh.readthedocs.io/en/stable/quickstart/cli/) to learn how to use SQLMesh. You already have a head start! -Follow the [crash course](https://sqlmesh.readthedocs.io/en/stable/examples/sqlmesh_cli_crash_course/) to learn the core movesets and use the easy to reference cheat sheet. +Follow the [crash course](https://sqlmesh.readthedocs.io/en/stable/examples/sqlmesh_cli_crash_course/) to learn the core movesets and use the easy to reference cheat sheet. Follow this [example](https://sqlmesh.readthedocs.io/en/stable/examples/incremental_time_full_walkthrough/) to learn how to use SQLMesh in a full walkthrough. ## Join Our Community -Together, we want to build data transformation without the waste. Connect with us in the following ways: +Connect with us in the following ways: * Join the [Tobiko Slack Community](https://tobikodata.com/slack) to ask questions, or just to say hi! -* File an issue on our [GitHub](https://github.com/TobikoData/sqlmesh/issues/new) +* File an issue on our [GitHub](https://github.com/SQLMesh/sqlmesh/issues/new) * Send us an email at [hello@tobikodata.com](mailto:hello@tobikodata.com) with your questions or feedback * Read our [blog](https://tobikodata.com/blog) -## Contribution -Contributions in the form of issues or pull requests (from fork) are greatly appreciated. +## Contributing +We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to contribute, including our DCO sign-off requirement. -[Read more](https://sqlmesh.readthedocs.io/en/stable/development/) on how to contribute to SQLMesh open source. +Please review our [Code of Conduct](CODE_OF_CONDUCT.md) and [Governance](GOVERNANCE.md) documents. -[Watch this video walkthrough](https://www.loom.com/share/2abd0d661c12459693fa155490633126?sid=b65c1c0f-8ef7-4036-ad19-3f85a3b87ff2) to see how our team contributes a feature to SQLMesh. +[Read more](https://sqlmesh.readthedocs.io/en/stable/development/) on how to set up your development environment. + +## License +This project is licensed under the [Apache License 2.0](LICENSE). Documentation is licensed under [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..2ffffacea3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +## Reporting a Vulnerability + +If you discover a security vulnerability in SQLMesh, please report it through [GitHub Security Advisories](https://github.com/sqlmesh/sqlmesh/security/advisories/new). Do not file a public issue for security vulnerabilities. + +## Response + +We will acknowledge receipt of your report within 72 hours and aim to provide an initial assessment within one week. + +## Disclosure + +We follow a coordinated disclosure process. We will work with you to understand and address the issue before any public disclosure. + +## Supported Versions + +Security fixes are generally applied to the latest release. Critical vulnerabilities may be backported to recent prior releases at the discretion of the maintainers. diff --git a/docs/HOWTO.md b/docs/HOWTO.md index 9ccefff077..edd7c9833f 100644 --- a/docs/HOWTO.md +++ b/docs/HOWTO.md @@ -92,7 +92,7 @@ You will work on the docs in a local copy of the sqlmesh git repository. If you don't have a copy of the repo on your machine, open a terminal and clone it into a `sqlmesh` directory by executing: ``` bash -git clone https://github.com/TobikoData/sqlmesh.git +git clone https://github.com/SQLMesh/sqlmesh.git ``` And navigate to the directory: diff --git a/docs/cloud/features/scheduler/hybrid_executors_docker_compose.md b/docs/cloud/features/scheduler/hybrid_executors_docker_compose.md index e3bd072752..8f8f323139 100644 --- a/docs/cloud/features/scheduler/hybrid_executors_docker_compose.md +++ b/docs/cloud/features/scheduler/hybrid_executors_docker_compose.md @@ -25,7 +25,7 @@ Both executors must be properly configured with environment variables to connect 1. **Get docker-compose file**: - Download the [docker-compose.yml](https://raw.githubusercontent.com/TobikoData/sqlmesh/refs/heads/main/docs/cloud/features/scheduler/scheduler/docker-compose.yml) and [.env.example](https://raw.githubusercontent.com/TobikoData/sqlmesh/refs/heads/main/docs/cloud/features/scheduler/scheduler/.env.example) files to a local directory. + Download the [docker-compose.yml](https://raw.githubusercontent.com/SQLMesh/sqlmesh/refs/heads/main/docs/cloud/features/scheduler/scheduler/docker-compose.yml) and [.env.example](https://raw.githubusercontent.com/SQLMesh/sqlmesh/refs/heads/main/docs/cloud/features/scheduler/scheduler/.env.example) files to a local directory. 2. **Create your environment file**: diff --git a/docs/concepts/macros/sqlmesh_macros.md b/docs/concepts/macros/sqlmesh_macros.md index f28e77e203..c7d967b12c 100644 --- a/docs/concepts/macros/sqlmesh_macros.md +++ b/docs/concepts/macros/sqlmesh_macros.md @@ -2111,7 +2111,7 @@ FROM some_table; Generics can be nested and are resolved recursively allowing for fairly robust type hinting. -See examples of the coercion function in action in the test suite [here](https://github.com/TobikoData/sqlmesh/blob/main/tests/core/test_macros.py). +See examples of the coercion function in action in the test suite [here](https://github.com/SQLMesh/sqlmesh/blob/main/tests/core/test_macros.py). #### Conclusion diff --git a/docs/concepts/models/sql_models.md b/docs/concepts/models/sql_models.md index 28bf0fbe78..217cd7a6a2 100644 --- a/docs/concepts/models/sql_models.md +++ b/docs/concepts/models/sql_models.md @@ -149,7 +149,8 @@ MODEL ( SELECT @field_a, - @{field_b} AS field_b + @{field_b} AS field_b, + @'prefix_@{field_a}_suffix' AS literal_example FROM @customer.some_source ``` @@ -163,8 +164,9 @@ MODEL ( ); SELECT - 'x', - y AS field_b + x, + y AS field_b, + 'prefix_x_suffix' AS literal_example FROM customer1.some_source -- This uses the second variable mapping @@ -174,14 +176,13 @@ MODEL ( ); SELECT - 'z', - w AS field_b + z, + w AS field_b, + 'prefix_z_suffix' AS literal_example FROM customer2.some_source ``` -Note the use of curly brace syntax `@{field_b} AS field_b` in the model query above. It is used to tell SQLMesh that the rendered variable value should be treated as a SQL identifier instead of a string literal. - -You can see the different behavior in the first rendered model. `@field_a` is resolved to the string literal `'x'` (with single quotes) and `@{field_b}` is resolved to the identifier `y` (without quotes). Learn more about the curly brace syntax [here](../../concepts/macros/sqlmesh_macros.md#embedding-variables-in-strings). +Both `@field_a` and `@{field_b}` resolve blueprint variable values as SQL identifiers. The curly brace syntax is useful when embedding a variable within a larger string where the variable boundary would otherwise be ambiguous (e.g. `@{customer}_suffix`). To produce a string literal with interpolated variables, use the `@'...@{var}...'` syntax as shown with `literal_example` above. Learn more about the curly brace syntax [here](../../concepts/macros/sqlmesh_macros.md#embedding-variables-in-strings). Blueprint variable mappings can also be constructed dynamically, e.g., by using a macro: `blueprints @gen_blueprints()`. This is useful in cases where the `blueprints` list needs to be sourced from external sources, such as CSV files. diff --git a/docs/development.md b/docs/development.md index 662ad17d6c..ff8b250d87 100644 --- a/docs/development.md +++ b/docs/development.md @@ -1,6 +1,6 @@ # Contribute to development -SQLMesh is licensed under [Apache 2.0](https://github.com/TobikoData/sqlmesh/blob/main/LICENSE). We encourage community contribution and would love for you to get involved. The following document outlines the process to contribute to SQLMesh. +SQLMesh is licensed under [Apache 2.0](https://github.com/SQLMesh/sqlmesh/blob/main/LICENSE). We encourage community contribution and would love for you to get involved. The following document outlines the process to contribute to SQLMesh. ## Prerequisites diff --git a/docs/examples/incremental_time_full_walkthrough.md b/docs/examples/incremental_time_full_walkthrough.md index 4e1d577d2c..ffa9def911 100644 --- a/docs/examples/incremental_time_full_walkthrough.md +++ b/docs/examples/incremental_time_full_walkthrough.md @@ -689,7 +689,7 @@ In the terminal output, I can see the change displayed like before, but I see so I leave the [effective date](../concepts/plans.md#effective-date) prompt blank because I do not want to reprocess historical data in `prod` - I only want to apply this new business logic going forward. -However, I do want to preview the new business logic in my `dev` environment before pushing to `prod`. Because I have [configured SQLMesh to create previews](https://github.com/TobikoData/sqlmesh-demos/blob/e0e3899e173cf7b8447ae707402a9df59911d1c0/config.yaml#L42) for forward-only models in my `config.yaml` file, SQLMesh has created a temporary copy of the `prod` table in my `dev` environment, so I can test the new logic on historical data. +However, I do want to preview the new business logic in my `dev` environment before pushing to `prod`. Because I have [configured SQLMesh to create previews](https://github.com/SQLMesh/sqlmesh-demos/blob/e0e3899e173cf7b8447ae707402a9df59911d1c0/config.yaml#L42) for forward-only models in my `config.yaml` file, SQLMesh has created a temporary copy of the `prod` table in my `dev` environment, so I can test the new logic on historical data. I specify the beginning of the preview's historical data window as `2024-10-27` in the preview start date prompt, and I specify the end of the window as now by leaving the preview end date prompt blank. diff --git a/docs/examples/overview.md b/docs/examples/overview.md index a252b3f9c2..e7dbc1916d 100644 --- a/docs/examples/overview.md +++ b/docs/examples/overview.md @@ -27,16 +27,16 @@ Walkthroughs are easy to follow and provide lots of information in a self-contai ## Projects -SQLMesh example projects are stored in the [sqlmesh-examples Github repository](https://github.com/TobikoData/sqlmesh-examples). The repository's front page includes additional information about how to download the files and set up the projects. +SQLMesh example projects are stored in the [sqlmesh-examples Github repository](https://github.com/SQLMesh/sqlmesh-examples). The repository's front page includes additional information about how to download the files and set up the projects. The two most comprehensive example projects use the SQLMesh `sushi` data, based on a fictional sushi restaurant. ("Tobiko" is the Japanese word for flying fish roe, commonly used in sushi.) -The `sushi` data is described in an [overview notebook](https://github.com/TobikoData/sqlmesh-examples/blob/main/001_sushi/sushi-overview.ipynb) in the repository. +The `sushi` data is described in an [overview notebook](https://github.com/SQLMesh/sqlmesh-examples/blob/main/001_sushi/sushi-overview.ipynb) in the repository. The example repository include two versions of the `sushi` project, at different levels of complexity: -- The [`simple` project](https://github.com/TobikoData/sqlmesh-examples/tree/main/001_sushi/1_simple) contains four `VIEW` and one `SEED` model +- The [`simple` project](https://github.com/SQLMesh/sqlmesh-examples/tree/main/001_sushi/1_simple) contains four `VIEW` and one `SEED` model - The `VIEW` model kind refreshes every run, making it easy to reason about SQLMesh's behavior -- The [`moderate` project](https://github.com/TobikoData/sqlmesh-examples/tree/main/001_sushi/2_moderate) contains five `INCREMENTAL_BY_TIME_RANGE`, one `FULL`, one `VIEW`, and one `SEED` model +- The [`moderate` project](https://github.com/SQLMesh/sqlmesh-examples/tree/main/001_sushi/2_moderate) contains five `INCREMENTAL_BY_TIME_RANGE`, one `FULL`, one `VIEW`, and one `SEED` model - The incremental models allow you to observe how and when new data is transformed by SQLMesh - Some models, like `customer_revenue_lifetime`, demonstrate more advanced incremental queries like customer lifetime value calculation diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index d2e294a589..d6d4f20c11 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -21,6 +21,9 @@ The sources have the following order of precedence: 2. `config.yaml` or `config.py` in the `~/.sqlmesh` folder. 3. `config.yaml` or `config.py` in a project folder. [LOWEST PRECEDENCE] +!!! note + To relocate the `.sqlmesh` folder, set the `SQLMESH_HOME` environment variable to your preferred directory path. + ### File type You can specify a SQLMesh configuration in either YAML or Python. diff --git a/docs/guides/custom_materializations.md b/docs/guides/custom_materializations.md index 58eb64026d..905a3d017e 100644 --- a/docs/guides/custom_materializations.md +++ b/docs/guides/custom_materializations.md @@ -24,13 +24,13 @@ A custom materialization must: - Be written in Python code - Be a Python class that inherits the SQLMesh `CustomMaterialization` base class -- Use or override the `insert` method from the SQLMesh [`MaterializableStrategy`](https://github.com/TobikoData/sqlmesh/blob/034476e7f64d261860fd630c3ac56d8a9c9f3e3a/sqlmesh/core/snapshot/evaluator.py#L1146) class/subclasses +- Use or override the `insert` method from the SQLMesh [`MaterializableStrategy`](https://github.com/SQLMesh/sqlmesh/blob/034476e7f64d261860fd630c3ac56d8a9c9f3e3a/sqlmesh/core/snapshot/evaluator.py#L1146) class/subclasses - Be loaded or imported by SQLMesh at runtime A custom materialization may: -- Use or override methods from the SQLMesh [`MaterializableStrategy`](https://github.com/TobikoData/sqlmesh/blob/034476e7f64d261860fd630c3ac56d8a9c9f3e3a/sqlmesh/core/snapshot/evaluator.py#L1146) class/subclasses -- Use or override methods from the SQLMesh [`EngineAdapter`](https://github.com/TobikoData/sqlmesh/blob/034476e7f64d261860fd630c3ac56d8a9c9f3e3a/sqlmesh/core/engine_adapter/base.py#L67) class/subclasses +- Use or override methods from the SQLMesh [`MaterializableStrategy`](https://github.com/SQLMesh/sqlmesh/blob/034476e7f64d261860fd630c3ac56d8a9c9f3e3a/sqlmesh/core/snapshot/evaluator.py#L1146) class/subclasses +- Use or override methods from the SQLMesh [`EngineAdapter`](https://github.com/SQLMesh/sqlmesh/blob/034476e7f64d261860fd630c3ac56d8a9c9f3e3a/sqlmesh/core/engine_adapter/base.py#L67) class/subclasses - Execute arbitrary SQL code and fetch results with the engine adapter `execute` and related methods A custom materialization may perform arbitrary Python processing with Pandas or other libraries, but in most cases that logic should reside in a [Python model](../concepts/models/python_models.md) instead of the materialization. @@ -157,7 +157,7 @@ class CustomFullMaterialization(CustomMaterialization): ) -> None: config_value = model.custom_materialization_properties["config_key"] # Proceed with implementing the insertion logic. - # Example existing materialization for look and feel: https://github.com/TobikoData/sqlmesh/blob/main/sqlmesh/core/snapshot/evaluator.py + # Example existing materialization for look and feel: https://github.com/SQLMesh/sqlmesh/blob/main/sqlmesh/core/snapshot/evaluator.py ``` ## Extending `CustomKind` @@ -292,4 +292,4 @@ setup( ) ``` -Refer to the SQLMesh Github [custom_materializations](https://github.com/TobikoData/sqlmesh/tree/main/examples/custom_materializations) example for more details on Python packaging. +Refer to the SQLMesh Github [custom_materializations](https://github.com/SQLMesh/sqlmesh/tree/main/examples/custom_materializations) example for more details on Python packaging. diff --git a/docs/guides/linter.md b/docs/guides/linter.md index 22cc5077b8..6cdac167ec 100644 --- a/docs/guides/linter.md +++ b/docs/guides/linter.md @@ -16,7 +16,7 @@ Some rules validate that a pattern is *not* present, such as not allowing `SELEC Rules are defined in Python. Each rule is an individual Python class that inherits from SQLMesh's `Rule` base class and defines the logic for validating a pattern. -We display a portion of the `Rule` base class's code below ([full source code](https://github.com/TobikoData/sqlmesh/blob/main/sqlmesh/core/linter/rule.py)). Its methods and properties illustrate the most important components of the subclassed rules you define. +We display a portion of the `Rule` base class's code below ([full source code](https://github.com/SQLMesh/sqlmesh/blob/main/sqlmesh/core/linter/rule.py)). Its methods and properties illustrate the most important components of the subclassed rules you define. Each rule class you create has four vital components: diff --git a/docs/guides/model_selection.md b/docs/guides/model_selection.md index 9cc0a4358a..79fd17a18c 100644 --- a/docs/guides/model_selection.md +++ b/docs/guides/model_selection.md @@ -78,7 +78,7 @@ NOTE: the `--backfill-model` argument can only be used in development environmen ## Examples -We now demonstrate the use of `--select-model` and `--backfill-model` with the SQLMesh `sushi` example project, available in the `examples/sushi` directory of the [SQLMesh Github repository](https://github.com/TobikoData/sqlmesh). +We now demonstrate the use of `--select-model` and `--backfill-model` with the SQLMesh `sushi` example project, available in the `examples/sushi` directory of the [SQLMesh Github repository](https://github.com/SQLMesh/sqlmesh). ### sushi @@ -242,8 +242,9 @@ Models: #### Select with git changes The git-based selector allows you to select models whose files have changed compared to a target branch (default: main). This includes: + - Untracked files (new files not in git) -- Uncommitted changes in working directory +- Uncommitted changes in working directory (both staged and unstaged) - Committed changes different from the target branch For example: diff --git a/docs/guides/multi_repo.md b/docs/guides/multi_repo.md index bf34c7d21a..4dae4de57e 100644 --- a/docs/guides/multi_repo.md +++ b/docs/guides/multi_repo.md @@ -5,7 +5,7 @@ SQLMesh provides native support for multiple repos and makes it easy to maintain If you are wanting to separate your systems/data and provide isolation, checkout the [isolated systems guide](https://sqlmesh.readthedocs.io/en/stable/guides/isolated_systems/?h=isolated). ## Bootstrapping multiple projects -Setting up SQLMesh with multiple repos is quite simple. Copy the contents of this example [multi-repo project](https://github.com/TobikoData/sqlmesh/tree/main/examples/multi). +Setting up SQLMesh with multiple repos is quite simple. Copy the contents of this example [multi-repo project](https://github.com/SQLMesh/sqlmesh/tree/main/examples/multi). To bootstrap the project, you can point SQLMesh at both projects. @@ -196,7 +196,7 @@ $ sqlmesh -p examples/multi/repo_1 migrate SQLMesh also supports multiple repos for dbt projects, allowing it to correctly detect changes and orchestrate backfills even when changes span multiple dbt projects. -You can watch a [quick demo](https://www.loom.com/share/69c083428bb348da8911beb2cd4d30b2) of this setup or experiment with the [multi-repo dbt example](https://github.com/TobikoData/sqlmesh/tree/main/examples/multi_dbt) yourself. +You can watch a [quick demo](https://www.loom.com/share/69c083428bb348da8911beb2cd4d30b2) of this setup or experiment with the [multi-repo dbt example](https://github.com/SQLMesh/sqlmesh/tree/main/examples/multi_dbt) yourself. ## Multi-repo mixed projects @@ -212,4 +212,4 @@ $ sqlmesh -p examples/multi_hybrid/dbt_repo -p examples/multi_hybrid/sqlmesh_rep SQLMesh will automatically detect dependencies and lineage across both SQLMesh and dbt projects, even when models are sourcing from different project types. -For an example of this setup, refer to the [mixed SQLMesh and dbt example](https://github.com/TobikoData/sqlmesh/tree/main/examples/multi_hybrid). +For an example of this setup, refer to the [mixed SQLMesh and dbt example](https://github.com/SQLMesh/sqlmesh/tree/main/examples/multi_hybrid). diff --git a/docs/guides/notifications.md b/docs/guides/notifications.md index 03405b8252..749a71c842 100644 --- a/docs/guides/notifications.md +++ b/docs/guides/notifications.md @@ -256,7 +256,7 @@ This example shows an email notification target, where `sushi@example.com` email In Python configuration files, new notification targets can be configured to send custom messages. -To customize a notification, create a new notification target class as a subclass of one of the three target classes described above (`SlackWebhookNotificationTarget`, `SlackApiNotificationTarget`, or `BasicSMTPNotificationTarget`). See the definitions of these classes on Github [here](https://github.com/TobikoData/sqlmesh/blob/main/sqlmesh/core/notification_target.py). +To customize a notification, create a new notification target class as a subclass of one of the three target classes described above (`SlackWebhookNotificationTarget`, `SlackApiNotificationTarget`, or `BasicSMTPNotificationTarget`). See the definitions of these classes on Github [here](https://github.com/SQLMesh/sqlmesh/blob/main/sqlmesh/core/notification_target.py). Each of those notification target classes is a subclass of `BaseNotificationTarget`, which contains a `notify` function corresponding to each event type. This table lists the notification functions, along with the contextual information available to them at calling time (e.g., the environment name for start/end events): diff --git a/docs/guides/vscode.md b/docs/guides/vscode.md index 151e630f27..5ef3cd71ce 100644 --- a/docs/guides/vscode.md +++ b/docs/guides/vscode.md @@ -6,7 +6,7 @@ The SQLMesh Visual Studio Code extension is in preview and undergoing active development. You may encounter bugs or API incompatibilities with the SQLMesh version you are running. - We encourage you to try the extension and [create Github issues](https://github.com/tobikodata/sqlmesh/issues) for any problems you encounter. + We encourage you to try the extension and [create Github issues](https://github.com/SQLMesh/sqlmesh/issues) for any problems you encounter. In this guide, you'll set up the SQLMesh extension in the Visual Studio Code IDE software (which we refer to as "VSCode"). @@ -187,7 +187,7 @@ The most common problem is the extension not using the correct Python interprete Follow the [setup process described above](#vscode-python-interpreter) to ensure that the extension is using the correct Python interpreter. -If you have checked the VSCode `sqlmesh` output channel and the extension is still not using the correct Python interpreter, please raise an issue [here](https://github.com/tobikodata/sqlmesh/issues). +If you have checked the VSCode `sqlmesh` output channel and the extension is still not using the correct Python interpreter, please raise an issue [here](https://github.com/SQLMesh/sqlmesh/issues). ### Missing Python dependencies @@ -205,4 +205,4 @@ If you are using Tobiko Cloud, make sure `lsp` is included in the list of extras While the SQLMesh VSCode extension is in preview and the APIs to the underlying SQLMesh version are not stable, we do not guarantee compatibility between the extension and the SQLMesh version you are using. -If you encounter a problem, please raise an issue [here](https://github.com/tobikodata/sqlmesh/issues). \ No newline at end of file +If you encounter a problem, please raise an issue [here](https://github.com/SQLMesh/sqlmesh/issues). \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 3e9330f83f..83c1b0a431 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,7 +1,7 @@ #

- SQLMesh logo + SQLMesh logo

SQLMesh is a next-generation data transformation framework designed to ship data quickly, efficiently, and without error. Data teams can efficiently run and deploy data transformations written in SQL or Python with visibility and control at any size. @@ -9,11 +9,11 @@ SQLMesh is a next-generation data transformation framework designed to ship data It is more than just a [dbt alternative](https://tobikodata.com/reduce_costs_with_cron_and_partitions.html).

- Architecture Diagram + Architecture Diagram

## Core Features -SQLMesh Plan Mode +SQLMesh Plan Mode > Get instant SQL impact analysis of your changes, whether in the CLI or in [SQLMesh Plan Mode](https://sqlmesh.readthedocs.io/en/stable/guides/ui/?h=modes#working-with-an-ide) @@ -121,7 +121,7 @@ It is more than just a [dbt alternative](https://tobikodata.com/reduce_costs_wit ??? tip "Level Up Your SQL" Write SQL in any dialect and SQLMesh will transpile it to your target SQL dialect on the fly before sending it to the warehouse. - Transpile Example + Transpile Example * Debug transformation errors *before* you run them in your warehouse in [10+ different SQL dialects](https://sqlmesh.readthedocs.io/en/stable/integrations/overview/#execution-engines) * Definitions using [simply SQL](https://sqlmesh.readthedocs.io/en/stable/concepts/models/sql_models/#sql-based-definition) (no need for redundant and confusing `Jinja` + `YAML`) @@ -153,7 +153,7 @@ Follow this [example](https://sqlmesh.readthedocs.io/en/stable/examples/incremen Together, we want to build data transformation without the waste. Connect with us in the following ways: * Join the [Tobiko Slack Community](https://tobikodata.com/slack) to ask questions, or just to say hi! -* File an issue on our [GitHub](https://github.com/TobikoData/sqlmesh/issues/new) +* File an issue on our [GitHub](https://github.com/SQLMesh/sqlmesh/issues/new) * Send us an email at [hello@tobikodata.com](mailto:hello@tobikodata.com) with your questions or feedback * Read our [blog](https://tobikodata.com/blog) diff --git a/docs/integrations/dbt.md b/docs/integrations/dbt.md index 7cbef5b8fa..5854236aa2 100644 --- a/docs/integrations/dbt.md +++ b/docs/integrations/dbt.md @@ -358,4 +358,4 @@ The dbt jinja methods that are not currently supported are: ## Missing something you need? -Submit an [issue](https://github.com/TobikoData/sqlmesh/issues), and we'll look into it! +Submit an [issue](https://github.com/SQLMesh/sqlmesh/issues), and we'll look into it! diff --git a/docs/integrations/dlt.md b/docs/integrations/dlt.md index a53dc184ea..7125510de9 100644 --- a/docs/integrations/dlt.md +++ b/docs/integrations/dlt.md @@ -70,7 +70,7 @@ SQLMesh will retrieve the data warehouse connection credentials from your dlt pr ### Example -Generating a SQLMesh project dlt is quite simple. In this example, we'll use the example `sushi_pipeline.py` from the [sushi-dlt project](https://github.com/TobikoData/sqlmesh/tree/main/examples/sushi_dlt). +Generating a SQLMesh project dlt is quite simple. In this example, we'll use the example `sushi_pipeline.py` from the [sushi-dlt project](https://github.com/SQLMesh/sqlmesh/tree/main/examples/sushi_dlt). First, run the pipeline within the project directory: diff --git a/docs/integrations/engines/bigquery.md b/docs/integrations/engines/bigquery.md index a454996ecd..b93d6837ed 100644 --- a/docs/integrations/engines/bigquery.md +++ b/docs/integrations/engines/bigquery.md @@ -193,6 +193,23 @@ If the `impersonated_service_account` argument is set, SQLMesh will: The user account must have [sufficient permissions to impersonate the service account](https://cloud.google.com/docs/authentication/use-service-account-impersonation). +## Query Label + +BigQuery supports a `query_label` session variable which is attached to query jobs and can be used for auditing / attribution. + +SQLMesh supports setting it via `session_properties.query_label` on a model, as an array (or tuple) of key/value tuples. + +Example: +```sql +MODEL ( + name my_project.my_dataset.my_model, + dialect 'bigquery', + session_properties ( + query_label = [('team', 'data_platform'), ('env', 'prod')] + ) +); +``` + ## Permissions Required With any of the above connection methods, ensure these BigQuery permissions are enabled to allow SQLMesh to work correctly. diff --git a/docs/integrations/engines/duckdb.md b/docs/integrations/engines/duckdb.md index bc0af4f242..5f63a4688d 100644 --- a/docs/integrations/engines/duckdb.md +++ b/docs/integrations/engines/duckdb.md @@ -81,8 +81,9 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme data_path: data/ducklake encrypted: True data_inlining_row_limit: 10 + metadata_schema: main ``` - + === "Python" ```python linenums="1" @@ -106,6 +107,7 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme data_path="data/ducklake", encrypted=True, data_inlining_row_limit=10, + metadata_schema="main", ), } ) @@ -114,6 +116,14 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme ) ``` +**DuckLake Configuration Options:** + +- `path`: Path to the DuckLake catalog file +- `data_path`: Path where DuckLake data files are stored +- `encrypted`: Whether to enable encryption for the catalog (default: `False`) +- `data_inlining_row_limit`: Maximum number of rows to inline in the catalog (default: `0`) +- `metadata_schema`: The schema in the catalog server in which to store the DuckLake metadata tables (default: `main`) + #### Other Connection Catalogs Example Catalogs can also be defined to connect to anything that [DuckDB can be attached to](https://duckdb.org/docs/sql/statements/attach.html). diff --git a/docs/integrations/engines/trino.md b/docs/integrations/engines/trino.md index c590ee32ba..db732f0cc1 100644 --- a/docs/integrations/engines/trino.md +++ b/docs/integrations/engines/trino.md @@ -81,19 +81,22 @@ hive.metastore.glue.default-warehouse-dir=s3://my-bucket/ ### Connection options -| Option | Description | Type | Required | -|----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:| -| `type` | Engine type name - must be `trino` | string | Y | -| `user` | The username (of the account) to log in to your cluster. When connecting to Starburst Galaxy clusters, you must include the role of the user as a suffix to the username. | string | Y | -| `host` | The hostname of your cluster. Don't include the `http://` or `https://` prefix. | string | Y | -| `catalog` | The name of a catalog in your cluster. | string | Y | -| `http_scheme` | The HTTP scheme to use when connecting to your cluster. By default, it's `https` and can only be `http` for no-auth or basic auth. | string | N | -| `port` | The port to connect to your cluster. By default, it's `443` for `https` scheme and `80` for `http` | int | N | -| `roles` | Mapping of catalog name to a role | dict | N | -| `http_headers` | Additional HTTP headers to send with each request. | dict | N | -| `session_properties` | Trino session properties. Run `SHOW SESSION` to see all options. | dict | N | -| `retries` | Number of retries to attempt when a request fails. Default: `3` | int | N | -| `timezone` | Timezone to use for the connection. Default: client-side local timezone | string | N | +| Option | Description | Type | Required | +|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:| +| `type` | Engine type name - must be `trino` | string | Y | +| `user` | The username (of the account) to log in to your cluster. When connecting to Starburst Galaxy clusters, you must include the role of the user as a suffix to the username. | string | Y | +| `host` | The hostname of your cluster. Don't include the `http://` or `https://` prefix. | string | Y | +| `catalog` | The name of a catalog in your cluster. | string | Y | +| `http_scheme` | The HTTP scheme to use when connecting to your cluster. By default, it's `https` and can only be `http` for no-auth or basic auth. | string | N | +| `port` | The port to connect to your cluster. By default, it's `443` for `https` scheme and `80` for `http` | int | N | +| `roles` | Mapping of catalog name to a role | dict | N | +| `source` | Value to send as Trino's `source` field for query attribution / auditing. Default: `sqlmesh`. | string | N | +| `http_headers` | Additional HTTP headers to send with each request. | dict | N | +| `session_properties` | Trino session properties. Run `SHOW SESSION` to see all options. | dict | N | +| `retries` | Number of retries to attempt when a request fails. Default: `3` | int | N | +| `timezone` | Timezone to use for the connection. Default: client-side local timezone | string | N | +| `schema_location_mapping` | A mapping of regex patterns to S3 locations to use for the `LOCATION` property when creating schemas. See [Table and Schema locations](#table-and-schema-locations) for more details. | dict | N | +| `catalog_type_overrides` | A mapping of catalog names to their connector type. This is used to enable/disable connector specific behavior. See [Catalog Type Overrides](#catalog-type-overrides) for more details. | dict | N | ## Table and Schema locations @@ -204,6 +207,25 @@ SELECT ... This will cause SQLMesh to set the specified `LOCATION` when issuing a `CREATE TABLE` statement. +## Catalog Type Overrides + +SQLMesh attempts to determine the connector type of a catalog by querying the `system.metadata.catalogs` table and checking the `connector_name` column. +It checks if the connector name is `hive` for Hive connector behavior or contains `iceberg` or `delta_lake` for Iceberg or Delta Lake connector behavior respectively. +However, the connector name may not always be a reliable way to determine the connector type, for example when using a custom connector or a fork of an existing connector. +To handle such cases, you can use the `catalog_type_overrides` connection property to explicitly specify the connector type for specific catalogs. +For example, to specify that the `datalake` catalog is using the Iceberg connector and the `analytics` catalog is using the Hive connector, you can configure the connection as follows: + +```yaml title="config.yaml" +gateways: + trino: + connection: + type: trino + ... + catalog_type_overrides: + datalake: iceberg + analytics: hive +``` + ## Authentication === "No Auth" diff --git a/docs/integrations/github.md b/docs/integrations/github.md index a11d90d044..07903fce56 100644 --- a/docs/integrations/github.md +++ b/docs/integrations/github.md @@ -286,21 +286,22 @@ Below is an example of how to define the default config for the bot in either YA ### Configuration Properties -| Option | Description | Type | Required | -|---------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:| -| `invalidate_environment_after_deploy` | Indicates if the PR environment created should be automatically invalidated after changes are deployed. Invalidated environments are cleaned up automatically by the Janitor. Default: `True` | bool | N | -| `merge_method` | The merge method to use when automatically merging a PR after deploying to prod. Defaults to `None` meaning automatic merge is not done. Options: `merge`, `squash`, `rebase` | string | N | -| `enable_deploy_command` | Indicates if the `/deploy` command should be enabled in order to allowed synchronized deploys to production. Default: `False` | bool | N | -| `command_namespace` | The namespace to use for SQLMesh commands. For example if you provide `#SQLMesh` as a value then commands will be expected in the format of `#SQLMesh/`. Default: `None` meaning no namespace is used. | string | N | -| `auto_categorize_changes` | Auto categorization behavior to use for the bot. If not provided then the project-wide categorization behavior is used. See [Auto-categorize model changes](https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#auto-categorize-model-changes) for details. | dict | N | -| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed.| str | N | -| `pr_min_intervals` | Intended for use when `default_pr_start` is set to a relative time, eg `1 week ago`. This ensures that at least this many intervals across every model are included for backfill in the PR environment. Without this, models with an interval unit wider than `default_pr_start` (such as `@monthly` models if `default_pr_start` was set to `1 week ago`) will be excluded from backfill entirely. | int | N | -| `skip_pr_backfill` | Indicates if the bot should skip backfilling models in the PR environment. Default: `True` | bool | N | -| `pr_include_unmodified` | Indicates whether to include unmodified models in the PR environment. Default to the project's config value (which defaults to `False`) | bool | N | -| `run_on_deploy_to_prod` | Indicates whether to run latest intervals when deploying to prod. If set to false, the deployment will backfill only the changed models up to the existing latest interval in production, ignoring any missing intervals beyond this point. Default: `False` | bool | N | -| `pr_environment_name` | The name of the PR environment to create for which a PR number will be appended to. Defaults to the repo name if not provided. Note: The name will be normalized to alphanumeric + underscore and lowercase. | str | N | -| `prod_branch_name` | The name of the git branch associated with production. Ex: `prod`. Default: `main` or `master` is considered prod | str | N | -| `forward_only_branch_suffix` | If the git branch has this suffix, trigger a [forward-only](../concepts/plans.md#forward-only-plans) plan instead of a normal plan. Default: `-forward-only` | str | N | +| Option | Description | Type | Required | +|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:| +| `invalidate_environment_after_deploy` | Indicates if the PR environment created should be automatically invalidated after changes are deployed. Invalidated environments are cleaned up automatically by the Janitor. Default: `True` | bool | N | +| `merge_method` | The merge method to use when automatically merging a PR after deploying to prod. Defaults to `None` meaning automatic merge is not done. Options: `merge`, `squash`, `rebase` | string | N | +| `enable_deploy_command` | Indicates if the `/deploy` command should be enabled in order to allowed synchronized deploys to production. Default: `False` | bool | N | +| `command_namespace` | The namespace to use for SQLMesh commands. For example if you provide `#SQLMesh` as a value then commands will be expected in the format of `#SQLMesh/`. Default: `None` meaning no namespace is used. | string | N | +| `auto_categorize_changes` | Auto categorization behavior to use for the bot. If not provided then the project-wide categorization behavior is used. See [Auto-categorize model changes](https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#auto-categorize-model-changes) for details. | dict | N | +| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed. | str | N | +| `pr_min_intervals` | Intended for use when `default_pr_start` is set to a relative time, eg `1 week ago`. This ensures that at least this many intervals across every model are included for backfill in the PR environment. Without this, models with an interval unit wider than `default_pr_start` (such as `@monthly` models if `default_pr_start` was set to `1 week ago`) will be excluded from backfill entirely. | int | N | +| `skip_pr_backfill` | Indicates if the bot should skip backfilling models in the PR environment. Default: `True` | bool | N | +| `pr_include_unmodified` | Indicates whether to include unmodified models in the PR environment. Default to the project's config value (which defaults to `False`) | bool | N | +| `run_on_deploy_to_prod` | Indicates whether to run latest intervals when deploying to prod. If set to false, the deployment will backfill only the changed models up to the existing latest interval in production, ignoring any missing intervals beyond this point. Default: `False` | bool | N | +| `pr_environment_name` | The name of the PR environment to create for which a PR number will be appended to. Defaults to the repo name if not provided. Note: The name will be normalized to alphanumeric + underscore and lowercase. | str | N | +| `prod_branch_name` | The name of the git branch associated with production. Ex: `prod`. Default: `main` or `master` is considered prod | str | N | +| `forward_only_branch_suffix` | If the git branch has this suffix, trigger a [forward-only](../concepts/plans.md#forward-only-plans) plan instead of a normal plan. Default: `-forward-only` | str | N | +| `check_if_blocked_on_deploy_to_prod` | The bot normally checks if a PR is blocked from merging before deploying to production. Setting this to `False` will skip that check. Default: `True` | bool | N | Example with all properties defined: @@ -363,7 +364,7 @@ These are the possible outputs (based on how the bot is configured) that are cre * `prod_plan_preview` * `prod_environment_synced` -[There are many possible conclusions](https://github.com/TobikoData/sqlmesh/blob/main/sqlmesh/integrations/github/cicd/controller.py#L96-L102) so the best use case for this is likely to check for `success` conclusion in order to potentially run follow up steps. +[There are many possible conclusions](https://github.com/SQLMesh/sqlmesh/blob/main/sqlmesh/integrations/github/cicd/controller.py#L96-L102) so the best use case for this is likely to check for `success` conclusion in order to potentially run follow up steps. Note that in error cases conclusions may not be set and therefore you will get an empty string. Example of running a step after pr environment has been synced: diff --git a/docs/quickstart/cli.md b/docs/quickstart/cli.md index 7b77b2af1e..a592847470 100644 --- a/docs/quickstart/cli.md +++ b/docs/quickstart/cli.md @@ -160,7 +160,7 @@ https://sqlmesh.readthedocs.io/en/stable/quickstart/cli/ Need help? - Docs: https://sqlmesh.readthedocs.io - Slack: https://www.tobikodata.com/slack -- GitHub: https://github.com/TobikoData/sqlmesh/issues +- GitHub: https://github.com/SQLMesh/sqlmesh/issues ``` ??? info "Learn more about the project's configuration: `config.yaml`" diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 676f9d7389..b13438ee2d 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -125,9 +125,10 @@ Formatting settings for the `sqlmesh format` command and UI. Configuration for the `sqlmesh janitor` command. -| Option | Description | Type | Required | -|--------------------------|----------------------------------------------------------------------------------------------------------------------------|:-------:|:--------:| -| `warn_on_delete_failure` | Whether to warn instead of erroring if the janitor fails to delete the expired environment schema / views (Default: False) | boolean | N | +| Option | Description | Type | Required | +|---------------------------------|----------------------------------------------------------------------------------------------------------------------------|:-------:|:--------:| +| `warn_on_delete_failure` | Whether to warn instead of erroring if the janitor fails to delete the expired environment schema / views (Default: False) | boolean | N | +| `expired_snapshots_batch_size` | Maximum number of expired snapshots to clean in a single batch (Default: 200) | int | N | ## UI diff --git a/docs/reference/model_configuration.md b/docs/reference/model_configuration.md index a5a96ebbf9..9d040fe6db 100644 --- a/docs/reference/model_configuration.md +++ b/docs/reference/model_configuration.md @@ -282,7 +282,7 @@ Configuration options for [`SCD_TYPE_2` models](../concepts/models/model_kinds.m | `unique_key` | The model column(s) containing each row's unique key | array[str] | Y | | `valid_from_name` | The model column containing each row's valid from date. (Default: `valid_from`) | str | N | | `valid_to_name` | The model column containing each row's valid to date. (Default: `valid_to`) | str | N | -| `invalidate_hard_deletes` | If set to true, when a record is missing from the source table it will be marked as invalid - see [here](../concepts/models/model_kinds.md#deletes) for more information. (Default: `True`) | bool | N | +| `invalidate_hard_deletes` | If set to true, when a record is missing from the source table it will be marked as invalid - see [here](../concepts/models/model_kinds.md#deletes) for more information. (Default: `False`) | bool | N | ##### SCD Type 2 By Time diff --git a/docs/reference/python.md b/docs/reference/python.md index 14e0da84c8..1c4c9191ff 100644 --- a/docs/reference/python.md +++ b/docs/reference/python.md @@ -4,6 +4,6 @@ SQLMesh is built in Python, and its complete Python API reference is located [he The Python API reference is comprehensive and includes the internal components of SQLMesh. Those components are likely only of interest if you want to modify SQLMesh itself. -If you want to use SQLMesh via its Python API, the best approach is to study how the SQLMesh [CLI](./cli.md) calls it behind the scenes. The CLI implementation code shows exactly which Python methods are called for each CLI command and can be viewed [on Github](https://github.com/TobikoData/sqlmesh/blob/main/sqlmesh/cli/main.py). For example, the Python code executed by the `plan` command is located [here](https://github.com/TobikoData/sqlmesh/blob/15c8788100fa1cfb8b0cc1879ccd1ad21dc3e679/sqlmesh/cli/main.py#L302). +If you want to use SQLMesh via its Python API, the best approach is to study how the SQLMesh [CLI](./cli.md) calls it behind the scenes. The CLI implementation code shows exactly which Python methods are called for each CLI command and can be viewed [on Github](https://github.com/SQLMesh/sqlmesh/blob/main/sqlmesh/cli/main.py). For example, the Python code executed by the `plan` command is located [here](https://github.com/SQLMesh/sqlmesh/blob/15c8788100fa1cfb8b0cc1879ccd1ad21dc3e679/sqlmesh/cli/main.py#L302). Almost all the relevant Python methods are in the [SQLMesh `Context` class](https://sqlmesh.readthedocs.io/en/stable/_readthedocs/html/sqlmesh/core/context.html#Context). diff --git a/examples/sushi/models/customers.sql b/examples/sushi/models/customers.sql index f91f1166e8..d2bda09ed3 100644 --- a/examples/sushi/models/customers.sql +++ b/examples/sushi/models/customers.sql @@ -42,4 +42,4 @@ LEFT JOIN ( ON o.customer_id = m.customer_id LEFT JOIN raw.demographics AS d ON o.customer_id = d.customer_id -WHERE sushi.orders.customer_id > 0 \ No newline at end of file +WHERE o.customer_id > 0 \ No newline at end of file diff --git a/examples/sushi_dbt/config.py b/examples/sushi_dbt/config.py index e7e28c98e4..2305cf79f2 100644 --- a/examples/sushi_dbt/config.py +++ b/examples/sushi_dbt/config.py @@ -5,3 +5,5 @@ config = sqlmesh_config(Path(__file__).parent) test_config = config + +migration_test_config = sqlmesh_config(Path(__file__).parent, dbt_target_name="duckdb") diff --git a/examples/sushi_dbt/models/schema.yml b/examples/sushi_dbt/models/schema.yml index d42d64bcce..8fd62c4efe 100644 --- a/examples/sushi_dbt/models/schema.yml +++ b/examples/sushi_dbt/models/schema.yml @@ -42,8 +42,6 @@ models: columns: - name: waiter_id description: Waiter id - tests: - - not_null - name: ds description: Date - name: waiter_as_customer_by_day diff --git a/mkdocs.yml b/mkdocs.yml index 47ddca54e9..86761de9d7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,6 +1,6 @@ site_name: SQLMesh -repo_url: https://github.com/TobikoData/sqlmesh -repo_name: TobikoData/sqlmesh +repo_url: https://github.com/SQLMesh/sqlmesh +repo_name: SQLMesh/sqlmesh nav: - "Overview": index.md - Get started: @@ -202,7 +202,7 @@ extra: - icon: fontawesome/solid/paper-plane link: mailto:hello@tobikodata.com - icon: fontawesome/brands/github - link: https://github.com/TobikoData/sqlmesh/issues/new + link: https://github.com/SQLMesh/sqlmesh/issues/new analytics: provider: google property: G-JXQ1R227VS diff --git a/pdoc/cli.py b/pdoc/cli.py index 5833c59207..9301ae0444 100755 --- a/pdoc/cli.py +++ b/pdoc/cli.py @@ -29,7 +29,7 @@ def mocked_import(*args, **kwargs): opts.logo_link = "https://tobikodata.com" opts.footer_text = "Copyright Tobiko Data Inc. 2022" opts.template_directory = Path(__file__).parent.joinpath("templates").absolute() - opts.edit_url = ["sqlmesh=https://github.com/TobikoData/sqlmesh/tree/main/sqlmesh/"] + opts.edit_url = ["sqlmesh=https://github.com/SQLMesh/sqlmesh/tree/main/sqlmesh/"] with mock.patch("pdoc.__main__.parser", **{"parse_args.return_value": opts}): cli() diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index daaf7eb993..aeacb362d0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -400,124 +400,145 @@ importers: web/common: devDependencies: '@eslint/js': - specifier: ^9.31.0 + specifier: 9.31.0 version: 9.31.0 '@radix-ui/react-slot': - specifier: ^1.2.3 + specifier: 1.2.3 version: 1.2.3(@types/react@18.3.23)(react@18.3.1) '@radix-ui/react-tooltip': - specifier: ^1.2.8 + specifier: 1.2.8 version: 1.2.8(@types/react-dom@18.3.7(@types/react@18.3.23))(@types/react@18.3.23)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) '@storybook/addon-docs': - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(@types/react@18.3.23)(storybook@9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0))) '@storybook/react-vite': - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(rollup@4.45.1)(storybook@9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)))(typescript@5.8.3)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) '@tailwindcss/typography': - specifier: ^0.5.16 + specifier: 0.5.16 version: 0.5.16(tailwindcss@3.4.17) '@tanstack/react-virtual': - specifier: ^3.13.12 + specifier: 3.13.12 version: 3.13.12(react-dom@18.3.1(react@18.3.1))(react@18.3.1) '@testing-library/dom': - specifier: ^10.4.1 + specifier: 10.4.1 version: 10.4.1 '@testing-library/jest-dom': - specifier: ^6.6.3 + specifier: 6.6.3 version: 6.6.3 '@testing-library/react': - specifier: ^16.3.0 + specifier: 16.3.0 version: 16.3.0(@testing-library/dom@10.4.1)(@types/react-dom@18.3.7(@types/react@18.3.23))(@types/react@18.3.23)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@testing-library/user-event': + specifier: 14.6.1 + version: 14.6.1(@testing-library/dom@10.4.1) + '@types/dagre': + specifier: 0.7.53 + version: 0.7.53 '@types/node': - specifier: ^20.11.25 + specifier: 20.11.25 version: 20.11.25 '@types/react': - specifier: ^18.3.23 + specifier: 18.3.23 version: 18.3.23 '@types/react-dom': - specifier: ^18.3.7 + specifier: 18.3.7 version: 18.3.7(@types/react@18.3.23) '@vitejs/plugin-react': - specifier: ^4.7.0 + specifier: 4.7.0 version: 4.7.0(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) '@vitest/browser': - specifier: ^3.2.4 + specifier: 3.2.4 version: 3.2.4(playwright@1.54.1)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0))(vitest@3.2.4) '@xyflow/react': - specifier: ^12.8.4 + specifier: 12.8.4 version: 12.8.4(@types/react@18.3.23)(immer@9.0.21)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) autoprefixer: - specifier: ^10.4.21 + specifier: 10.4.21 version: 10.4.21(postcss@8.5.6) + browserslist: + specifier: 4.26.2 + version: 4.26.2 + caniuse-lite: + specifier: 1.0.30001746 + version: 1.0.30001746 class-variance-authority: - specifier: ^0.7.1 + specifier: 0.7.1 version: 0.7.1 clsx: - specifier: ^2.1.1 + specifier: 2.1.1 version: 2.1.1 + cronstrue: + specifier: 3.3.0 + version: 3.3.0 + dagre: + specifier: 0.8.5 + version: 0.8.5 + deepmerge: + specifier: 4.3.1 + version: 4.3.1 eslint: - specifier: ^9.31.0 + specifier: 9.31.0 version: 9.31.0(jiti@2.4.2) eslint-plugin-react-hooks: - specifier: ^5.2.0 + specifier: 5.2.0 version: 5.2.0(eslint@9.31.0(jiti@2.4.2)) eslint-plugin-storybook: - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(eslint@9.31.0(jiti@2.4.2))(storybook@9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)))(typescript@5.8.3) fuse.js: - specifier: ^7.1.0 + specifier: 7.1.0 version: 7.1.0 globals: - specifier: ^16.3.0 + specifier: 16.3.0 version: 16.3.0 lucide-react: - specifier: ^0.542.0 + specifier: 0.542.0 version: 0.542.0(react@18.3.1) playwright: - specifier: ^1.54.1 + specifier: 1.54.1 version: 1.54.1 postcss: - specifier: ^8.5.6 + specifier: 8.5.6 version: 8.5.6 react: - specifier: ^18.3.1 + specifier: 18.3.1 version: 18.3.1 react-dom: - specifier: ^18.3.1 + specifier: 18.3.1 version: 18.3.1(react@18.3.1) storybook: - specifier: ^9.1.5 + specifier: 9.1.5 version: 9.1.5(@testing-library/dom@10.4.1)(prettier@3.6.2)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) syncpack: - specifier: ^13.0.4 + specifier: 13.0.4 version: 13.0.4(typescript@5.8.3) tailwind-merge: - specifier: ^3.3.1 + specifier: 3.3.1 version: 3.3.1 tailwind-scrollbar: - specifier: ^3.1.0 + specifier: 3.1.0 version: 3.1.0(tailwindcss@3.4.17) tailwindcss: - specifier: ^3.4.17 + specifier: 3.4.17 version: 3.4.17 typescript: - specifier: ^5.8.3 + specifier: 5.8.3 version: 5.8.3 typescript-eslint: - specifier: ^8.38.0 + specifier: 8.38.0 version: 8.38.0(eslint@9.31.0(jiti@2.4.2))(typescript@5.8.3) vite: - specifier: ^6.3.5 + specifier: 6.3.5 version: 6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0) vite-plugin-dts: - specifier: ^4.5.4 + specifier: 4.5.4 version: 4.5.4(@types/node@20.11.25)(rollup@4.45.1)(typescript@5.8.3)(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) vite-plugin-static-copy: - specifier: ^3.1.1 + specifier: 3.1.1 version: 3.1.1(vite@6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0)) vitest: - specifier: ^3.2.4 + specifier: 3.2.4 version: 3.2.4(@types/debug@4.1.12)(@types/node@20.11.25)(@vitest/browser@3.2.4)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0) packages: @@ -766,8 +787,8 @@ packages: '@codemirror/autocomplete@6.18.6': resolution: {integrity: sha512-PHHBXFomUs5DF+9tCOM/UoW6XQ4R44lLNNhRaW9PKPTU0D7lIjRg3ElxaJnTwsl/oHiR93WSXDBrekhoUGCPtg==} - '@codemirror/autocomplete@6.18.7': - resolution: {integrity: sha512-8EzdeIoWPJDsMBwz3zdzwXnUpCzMiCyz5/A3FIPpriaclFCGDkAzK13sMcnsu5rowqiyeQN2Vs2TsOcoDPZirQ==} + '@codemirror/autocomplete@6.19.0': + resolution: {integrity: sha512-61Hfv3cF07XvUxNeC3E7jhG8XNi1Yom1G0lRC936oLnlF+jrbrv8rc/J98XlYzcsAoTVupfsf5fLej1aI8kyIg==} '@codemirror/commands@6.8.1': resolution: {integrity: sha512-KlGVYufHMQzxbdQONiLyGQDUW0itrLZwq3CcY7xpv9ZLRHqzkBSoteocBHtMCoY7/Ci4xhzSrToIeLg7FxHuaw==} @@ -802,8 +823,8 @@ packages: '@codemirror/view@6.38.1': resolution: {integrity: sha512-RmTOkE7hRU3OVREqFVITWHz6ocgBjv08GoePscAakgVQfciA3SGCEk7mb9IzwW61cKKmlTpHXG6DUE5Ubx+MGQ==} - '@codemirror/view@6.38.2': - resolution: {integrity: sha512-bTWAJxL6EOFLPzTx+O5P5xAO3gTqpatQ2b/ARQ8itfU/v2LlpS3pH2fkL0A3E/Fx8Y2St2KES7ZEV0sHTsSW/A==} + '@codemirror/view@6.38.4': + resolution: {integrity: sha512-hduz0suCcUSC/kM8Fq3A9iLwInJDl8fD1xLpTIk+5xkNm8z/FT7UsIa9sOXrkpChh+XXc18RzswE8QqELsVl+g==} '@csstools/color-helpers@5.0.2': resolution: {integrity: sha512-JqWH1vsgdGcw2RR6VliXXdA0/59LttzlU8UlRT/iUUsEeWfYq8I+K0yhihEUTTHLRm1EXvpsCx3083EU15ecsA==} @@ -1179,6 +1200,9 @@ packages: '@jridgewell/trace-mapping@0.3.30': resolution: {integrity: sha512-GQ7Nw5G2lTu/BtHTKfXhKHok2WGetd4XYcVKGx00SjAk8GMwgJM3zr6zORiPGuOE+/vkc90KtTosSSvaCjKb2Q==} + '@jridgewell/trace-mapping@0.3.31': + resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@jsdevtools/ono@7.1.3': resolution: {integrity: sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==} @@ -2587,6 +2611,9 @@ packages: '@types/d3@7.4.3': resolution: {integrity: sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww==} + '@types/dagre@0.7.53': + resolution: {integrity: sha512-f4gkWqzPZvYmKhOsDnhq/R8mO4UMcKdxZo+i5SCkOU1wvGeHJeUXGIHeE9pnwGyPMDof1Vx5ZQo4nxpeg2TTVQ==} + '@types/debug@4.1.12': resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} @@ -3244,6 +3271,10 @@ packages: base64-js@1.5.1: resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + baseline-browser-mapping@2.8.9: + resolution: {integrity: sha512-hY/u2lxLrbecMEWSB0IpGzGyDyeoMFQhCvZd2jGFSE5I17Fh01sYUBPCJtkWERw7zrac9+cIghxm/ytJa2X8iA==} + hasBin: true + better-opn@3.0.2: resolution: {integrity: sha512-aVNobHnJqLiUelTaHat9DZ1qM2w0C0Eym4LPI/3JxOnSokGVdsl1T1kN7TFvsEAD8G47A6VKQ0TVHqbBnYMJlQ==} engines: {node: '>=12.0.0'} @@ -3278,13 +3309,8 @@ packages: browser-stdout@1.3.1: resolution: {integrity: sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==} - browserslist@4.25.1: - resolution: {integrity: sha512-KGj0KoOMXLpSNkkEI6Z6mShmQy0bc1I+T7K9N81k4WWMrfz+6fQ6es80B/YLAeRoKvjYE1YSHHOW1qe9xIVzHw==} - engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} - hasBin: true - - browserslist@4.25.4: - resolution: {integrity: sha512-4jYpcjabC606xJ3kw2QwGEZKX0Aw7sgQdZCvIK9dhVSPh76BKo+C+btT1RRofH7B+8iNpEbgGNVWiLki5q93yg==} + browserslist@4.26.2: + resolution: {integrity: sha512-ECFzp6uFOSB+dcZ5BK/IBaGWssbSYBHvuMeMt3MMFyhI0Z8SqGgEkBLARgpRH3hutIgPVsALcMwbDrJqPxQ65A==} engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} hasBin: true @@ -3340,11 +3366,8 @@ packages: resolution: {integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==} engines: {node: '>=10'} - caniuse-lite@1.0.30001727: - resolution: {integrity: sha512-pB68nIHmbN6L/4C6MH1DokyR3bYqFwjaSs/sWDHGj4CTcFtQUQMuJftVwWkXq7mNWOybD3KhUv3oWHoGxgP14Q==} - - caniuse-lite@1.0.30001741: - resolution: {integrity: sha512-QGUGitqsc8ARjLdgAfxETDhRbJ0REsP6O3I96TAth/mVjh2cYzN2u+3AzPP3aVSm2FehEItaJw1xd+IGBXWeSw==} + caniuse-lite@1.0.30001746: + resolution: {integrity: sha512-eA7Ys/DGw+pnkWWSE/id29f2IcPHVoE8wxtvE5JdvD2V28VTDPy1yEeo11Guz0sJ4ZeGRcm3uaTcAqK1LXaphA==} ccount@2.0.1: resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} @@ -3538,6 +3561,10 @@ packages: crelt@1.0.6: resolution: {integrity: sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==} + cronstrue@3.3.0: + resolution: {integrity: sha512-iwJytzJph1hosXC09zY8F5ACDJKerr0h3/2mOxg9+5uuFObYlgK0m35uUPk4GCvhHc2abK7NfnR9oMqY0qZFAg==} + hasBin: true + cross-spawn@7.0.6: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} @@ -3602,6 +3629,9 @@ packages: resolution: {integrity: sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==} engines: {node: '>=12'} + dagre@0.8.5: + resolution: {integrity: sha512-/aTqmnRta7x7MCCpExk7HQL2O4owCT2h8NT//9I1OQ9vt29Pa0BzSAkR5lwFUcQ7491yVi/3CXU9jQ5o0Mn2Sw==} + data-urls@5.0.0: resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==} engines: {node: '>=18'} @@ -3655,6 +3685,10 @@ packages: deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} + deepmerge@4.3.1: + resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} + engines: {node: '>=0.10.0'} + default-browser-id@5.0.0: resolution: {integrity: sha512-A6p/pu/6fyBcA1TRz/GqWYPViplrftcW2gZC9q79ngNCKAeR/X3gcEdXQHl4KNXV+3wgIJ1CPkJQ3IHM6lcsyA==} engines: {node: '>=18'} @@ -3762,11 +3796,8 @@ packages: effect@3.17.9: resolution: {integrity: sha512-Nkkn9n1zhy30Dq0MpQatDCH7nfYnOIiebkOHNxmmvoVnEDKCto+2ZwDDWFGzcN/ojwfqjRXWGC9Lo91K5kwZCg==} - electron-to-chromium@1.5.190: - resolution: {integrity: sha512-k4McmnB2091YIsdCgkS0fMVMPOJgxl93ltFzaryXqwip1AaxeDqKCGLxkXODDA5Ab/D+tV5EL5+aTx76RvLRxw==} - - electron-to-chromium@1.5.215: - resolution: {integrity: sha512-TIvGp57UpeNetj/wV/xpFNpWGb0b/ROw372lHPx5Aafx02gjTBtWnEEcaSX3W2dLM3OSdGGyHX/cHl01JQsLaQ==} + electron-to-chromium@1.5.227: + resolution: {integrity: sha512-ITxuoPfJu3lsNWUi2lBM2PaBPYgH3uqmxut5vmBxgYvyI4AlJ6P3Cai1O76mOrkJCBzq0IxWg/NtqOrpu/0gKA==} elkjs@0.8.2: resolution: {integrity: sha512-L6uRgvZTH+4OF5NE/MBbzQx/WYpru1xCBE9respNj6qznEewGUIfhzmm7horWWxbNO2M0WckQypGctR8lH79xQ==} @@ -4209,6 +4240,9 @@ packages: graphemer@1.4.0: resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==} + graphlib@2.1.8: + resolution: {integrity: sha512-jcLLfkpoVGmH7/InMC/1hIvOPSUh38oJtGhvrOFGzioE1DZ+0YW16RgmOJhHiuWTvGiJQ9Z1Ik43JvkRPRvE+A==} + has-bigints@1.1.0: resolution: {integrity: sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==} engines: {node: '>= 0.4'} @@ -5139,11 +5173,8 @@ packages: node-readfiles@0.2.0: resolution: {integrity: sha512-SU00ZarexNlE4Rjdm83vglt5Y9yiQ+XI1XpflWlb7q7UTN1JUItm69xMeiQCTxtTfnzt+83T8Cx+vI2ED++VDA==} - node-releases@2.0.19: - resolution: {integrity: sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==} - - node-releases@2.0.20: - resolution: {integrity: sha512-7gK6zSXEH6neM212JgfYFXe+GmZQM+fia5SsusuBIUgnPheLFBmIPhtFoAQRj8/7wASYQnbDlHPVwY0BefoFgA==} + node-releases@2.0.21: + resolution: {integrity: sha512-5b0pgg78U3hwXkCM8Z9b2FJdPZlr9Psr9V2gQPESdGHqbntyFJKFW4r5TeWGFzafGY3hzs1JC62VEQMbl1JFkw==} node-sarif-builder@3.2.0: resolution: {integrity: sha512-kVIOdynrF2CRodHZeP/97Rh1syTUHBNiw17hUCIVhlhEsWlfJm19MuO56s4MdKbr22xWx6mzMnNAgXzVlIYM9Q==} @@ -7024,7 +7055,7 @@ snapshots: '@babel/traverse': 7.28.0 '@babel/types': 7.28.1 convert-source-map: 2.0.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 gensync: 1.0.0-beta.2 json5: 2.2.3 semver: 6.3.1 @@ -7047,7 +7078,7 @@ snapshots: dependencies: '@babel/compat-data': 7.28.0 '@babel/helper-validator-option': 7.27.1 - browserslist: 4.25.1 + browserslist: 4.26.2 lru-cache: 5.1.1 semver: 6.3.1 @@ -7192,7 +7223,7 @@ snapshots: '@babel/parser': 7.28.0 '@babel/template': 7.27.2 '@babel/types': 7.28.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 transitivePeerDependencies: - supports-color @@ -7224,11 +7255,11 @@ snapshots: '@codemirror/view': 6.38.1 '@lezer/common': 1.2.3 - '@codemirror/autocomplete@6.18.7': + '@codemirror/autocomplete@6.19.0': dependencies: '@codemirror/language': 6.11.3 '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 '@lezer/common': 1.2.3 '@codemirror/commands@6.8.1': @@ -7267,7 +7298,7 @@ snapshots: '@codemirror/language@6.11.3': dependencies: '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 '@lezer/common': 1.2.3 '@lezer/highlight': 1.2.1 '@lezer/lr': 1.4.2 @@ -7280,13 +7311,13 @@ snapshots: '@codemirror/lint@6.8.5': dependencies: '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 crelt: 1.0.6 '@codemirror/search@6.5.10': dependencies: '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 crelt: 1.0.6 '@codemirror/state@6.5.2': @@ -7297,7 +7328,7 @@ snapshots: dependencies: '@codemirror/language': 6.11.3 '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 '@lezer/highlight': 1.2.1 '@codemirror/view@6.38.1': @@ -7307,7 +7338,7 @@ snapshots: style-mod: 4.1.2 w3c-keyname: 2.2.8 - '@codemirror/view@6.38.2': + '@codemirror/view@6.38.4': dependencies: '@codemirror/state': 6.5.2 crelt: 1.0.6 @@ -7449,7 +7480,7 @@ snapshots: '@eslint/config-array@0.21.0': dependencies: '@eslint/object-schema': 2.1.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 minimatch: 3.1.2 transitivePeerDependencies: - supports-color @@ -7463,7 +7494,7 @@ snapshots: '@eslint/eslintrc@3.3.1': dependencies: ajv: 6.12.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 espree: 10.4.0 globals: 14.0.0 ignore: 5.3.2 @@ -7617,7 +7648,7 @@ snapshots: '@jridgewell/source-map@0.3.11': dependencies: '@jridgewell/gen-mapping': 0.3.13 - '@jridgewell/trace-mapping': 0.3.30 + '@jridgewell/trace-mapping': 0.3.31 '@jridgewell/sourcemap-codec@1.5.4': {} @@ -7633,6 +7664,11 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@jridgewell/trace-mapping@0.3.31': + dependencies: + '@jridgewell/resolve-uri': 3.1.2 + '@jridgewell/sourcemap-codec': 1.5.5 + '@jsdevtools/ono@7.1.3': {} '@jsep-plugin/assignment@1.3.0(jsep@1.4.0)': @@ -9331,6 +9367,8 @@ snapshots: '@types/d3-transition': 3.0.9 '@types/d3-zoom': 3.0.8 + '@types/dagre@0.7.53': {} + '@types/debug@4.1.12': dependencies: '@types/ms': 2.1.0 @@ -9453,7 +9491,7 @@ snapshots: '@typescript-eslint/types': 8.38.0 '@typescript-eslint/typescript-estree': 8.38.0(typescript@5.8.3) '@typescript-eslint/visitor-keys': 8.38.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 eslint: 9.31.0(jiti@2.4.2) typescript: 5.8.3 transitivePeerDependencies: @@ -9463,7 +9501,7 @@ snapshots: dependencies: '@typescript-eslint/tsconfig-utils': 8.38.0(typescript@5.8.3) '@typescript-eslint/types': 8.38.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 typescript: 5.8.3 transitivePeerDependencies: - supports-color @@ -9482,7 +9520,7 @@ snapshots: '@typescript-eslint/types': 8.38.0 '@typescript-eslint/typescript-estree': 8.38.0(typescript@5.8.3) '@typescript-eslint/utils': 8.38.0(eslint@9.31.0(jiti@2.4.2))(typescript@5.8.3) - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 eslint: 9.31.0(jiti@2.4.2) ts-api-utils: 2.1.0(typescript@5.8.3) typescript: 5.8.3 @@ -9497,7 +9535,7 @@ snapshots: '@typescript-eslint/tsconfig-utils': 8.38.0(typescript@5.8.3) '@typescript-eslint/types': 8.38.0 '@typescript-eslint/visitor-keys': 8.38.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 fast-glob: 3.3.3 is-glob: 4.0.3 minimatch: 9.0.5 @@ -10173,8 +10211,8 @@ snapshots: autoprefixer@10.4.21(postcss@8.5.6): dependencies: - browserslist: 4.25.1 - caniuse-lite: 1.0.30001727 + browserslist: 4.26.2 + caniuse-lite: 1.0.30001746 fraction.js: 4.3.7 normalize-range: 0.1.2 picocolors: 1.1.1 @@ -10208,6 +10246,8 @@ snapshots: base64-js@1.5.1: optional: true + baseline-browser-mapping@2.8.9: {} + better-opn@3.0.2: dependencies: open: 8.4.2 @@ -10244,19 +10284,13 @@ snapshots: browser-stdout@1.3.1: {} - browserslist@4.25.1: - dependencies: - caniuse-lite: 1.0.30001727 - electron-to-chromium: 1.5.190 - node-releases: 2.0.19 - update-browserslist-db: 1.1.3(browserslist@4.25.1) - - browserslist@4.25.4: + browserslist@4.26.2: dependencies: - caniuse-lite: 1.0.30001741 - electron-to-chromium: 1.5.215 - node-releases: 2.0.20 - update-browserslist-db: 1.1.3(browserslist@4.25.4) + baseline-browser-mapping: 2.8.9 + caniuse-lite: 1.0.30001746 + electron-to-chromium: 1.5.227 + node-releases: 2.0.21 + update-browserslist-db: 1.1.3(browserslist@4.26.2) buffer-crc32@0.2.13: {} @@ -10315,9 +10349,7 @@ snapshots: camelcase@6.3.0: {} - caniuse-lite@1.0.30001727: {} - - caniuse-lite@1.0.30001741: {} + caniuse-lite@1.0.30001746: {} ccount@2.0.1: {} @@ -10437,13 +10469,13 @@ snapshots: codemirror@6.0.1: dependencies: - '@codemirror/autocomplete': 6.18.7 + '@codemirror/autocomplete': 6.19.0 '@codemirror/commands': 6.8.1 '@codemirror/language': 6.11.3 '@codemirror/lint': 6.8.5 '@codemirror/search': 6.5.10 '@codemirror/state': 6.5.2 - '@codemirror/view': 6.38.2 + '@codemirror/view': 6.38.4 color-convert@2.0.1: dependencies: @@ -10506,6 +10538,8 @@ snapshots: crelt@1.0.6: {} + cronstrue@3.3.0: {} + cross-spawn@7.0.6: dependencies: path-key: 3.1.1 @@ -10569,6 +10603,11 @@ snapshots: d3-selection: 3.0.0 d3-transition: 3.0.1(d3-selection@3.0.0) + dagre@0.8.5: + dependencies: + graphlib: 2.1.8 + lodash: 4.17.21 + data-urls@5.0.0: dependencies: whatwg-mimetype: 4.0.0 @@ -10594,6 +10633,10 @@ snapshots: de-indent@1.0.2: {} + debug@4.4.1: + dependencies: + ms: 2.1.3 + debug@4.4.1(supports-color@8.1.1): dependencies: ms: 2.1.3 @@ -10620,6 +10663,8 @@ snapshots: deep-is@0.1.4: {} + deepmerge@4.3.1: {} + default-browser-id@5.0.0: {} default-browser@5.2.1: @@ -10722,9 +10767,7 @@ snapshots: '@standard-schema/spec': 1.0.0 fast-check: 3.23.2 - electron-to-chromium@1.5.190: {} - - electron-to-chromium@1.5.215: {} + electron-to-chromium@1.5.227: {} elkjs@0.8.2: {} @@ -10866,7 +10909,7 @@ snapshots: esbuild-register@3.6.0(esbuild@0.25.8): dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 esbuild: 0.25.8 transitivePeerDependencies: - supports-color @@ -10949,7 +10992,7 @@ snapshots: ajv: 6.12.6 chalk: 4.1.2 cross-spawn: 7.0.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 escape-string-regexp: 4.0.0 eslint-scope: 8.4.0 eslint-visitor-keys: 4.2.1 @@ -11274,6 +11317,10 @@ snapshots: graphemer@1.4.0: {} + graphlib@2.1.8: + dependencies: + lodash: 4.17.21 + has-bigints@1.1.0: {} has-flag@4.0.0: {} @@ -11356,7 +11403,7 @@ snapshots: http-proxy-agent@7.0.2: dependencies: agent-base: 7.1.4 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 transitivePeerDependencies: - supports-color @@ -11365,7 +11412,7 @@ snapshots: https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.4 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 transitivePeerDependencies: - supports-color @@ -12304,9 +12351,7 @@ snapshots: dependencies: es6-promise: 3.3.1 - node-releases@2.0.19: {} - - node-releases@2.0.20: {} + node-releases@2.0.21: {} node-sarif-builder@3.2.0: dependencies: @@ -13427,7 +13472,7 @@ snapshots: sucrase@3.35.0: dependencies: - '@jridgewell/gen-mapping': 0.3.12 + '@jridgewell/gen-mapping': 0.3.13 commander: 4.1.1 glob: 10.4.5 lines-and-columns: 1.2.4 @@ -13579,7 +13624,7 @@ snapshots: terser-webpack-plugin@5.3.14(esbuild@0.25.8)(webpack@5.99.8(esbuild@0.25.8)): dependencies: - '@jridgewell/trace-mapping': 0.3.30 + '@jridgewell/trace-mapping': 0.3.31 jest-worker: 27.5.1 schema-utils: 4.3.2 serialize-javascript: 6.0.2 @@ -13871,15 +13916,9 @@ snapshots: picomatch: 4.0.3 webpack-virtual-modules: 0.6.2 - update-browserslist-db@1.1.3(browserslist@4.25.1): + update-browserslist-db@1.1.3(browserslist@4.26.2): dependencies: - browserslist: 4.25.1 - escalade: 3.2.0 - picocolors: 1.1.1 - - update-browserslist-db@1.1.3(browserslist@4.25.4): - dependencies: - browserslist: 4.25.4 + browserslist: 4.26.2 escalade: 3.2.0 picocolors: 1.1.1 @@ -13946,7 +13985,7 @@ snapshots: vite-node@3.2.4(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 es-module-lexer: 1.7.0 pathe: 2.0.3 vite: 6.3.5(@types/node@20.11.25)(jiti@2.4.2)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.20.3)(yaml@2.8.0) @@ -13996,7 +14035,7 @@ snapshots: '@volar/typescript': 2.4.23 '@vue/language-core': 2.2.0(typescript@5.8.3) compare-versions: 6.1.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 kolorist: 1.8.0 local-pkg: 1.1.1 magic-string: 0.30.17 @@ -14062,7 +14101,7 @@ snapshots: '@vitest/spy': 3.2.4 '@vitest/utils': 3.2.4 chai: 5.2.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.1 expect-type: 1.2.2 magic-string: 0.30.17 pathe: 2.0.3 @@ -14235,7 +14274,7 @@ snapshots: '@webassemblyjs/wasm-edit': 1.14.1 '@webassemblyjs/wasm-parser': 1.14.1 acorn: 8.15.0 - browserslist: 4.25.4 + browserslist: 4.26.2 chrome-trace-event: 1.0.4 enhanced-resolve: 5.18.3 es-module-lexer: 1.7.0 diff --git a/posts/virtual_data_environments.md b/posts/virtual_data_environments.md index dc3b2cb46e..5cde9dba51 100644 --- a/posts/virtual_data_environments.md +++ b/posts/virtual_data_environments.md @@ -8,7 +8,7 @@ In this post, I'm going to explain why existing approaches to managing developme I'll introduce [Virtual Data Environments](#virtual-data-environments-1) - a novel approach that provides low-cost, efficient, scalable, and safe data environments that are easy to use and manage. They significantly boost the productivity of anyone who has to create or maintain data pipelines. -Finally, I’m going to explain how **Virtual Data Environments** are implemented in [SQLMesh](https://github.com/TobikoData/sqlmesh) and share details on each core component involved: +Finally, I’m going to explain how **Virtual Data Environments** are implemented in [SQLMesh](https://github.com/SQLMesh/sqlmesh) and share details on each core component involved: - Data [fingerprinting](#fingerprinting) - [Automatic change categorization](#automatic-change-categorization) - Decoupling of [physical](#physical-layer) and [virtual](#virtual-layer) layers @@ -156,6 +156,6 @@ With **Virtual Data Environments**, SQLMesh is able to provide fully **isolated* - Rolling back a change happens almost instantaneously since no data movement is involved and only views that are part of the **virtual layer** get updated. - Deploying changes to production is a **virtual layer** operation, which ensures that results observed during development are exactly the same in production and that data and code are always in sync. -To streamline deploying changes to production, our team is about to release the SQLMesh [CI/CD bot](https://github.com/TobikoData/sqlmesh/blob/main/docs/integrations/github.md), which will help automate this process. +To streamline deploying changes to production, our team is about to release the SQLMesh [CI/CD bot](https://github.com/SQLMesh/sqlmesh/blob/main/docs/integrations/github.md), which will help automate this process. Don't miss out - join our [Slack channel](https://tobikodata.com/slack) and stay tuned! diff --git a/pyproject.toml b/pyproject.toml index 6823f7750b..56d66ecff5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "sqlmesh" dynamic = ["version"] description = "Next-generation data transformation framework" readme = "README.md" -authors = [{ name = "TobikoData Inc.", email = "engineering@tobikodata.com" }] +authors = [{ name = "SQLMesh Contributors" }] license = { file = "LICENSE" } requires-python = ">= 3.9" dependencies = [ @@ -18,13 +18,13 @@ dependencies = [ "ipywidgets", "jinja2", "packaging", - "pandas", + "pandas<3.0.0", "pydantic>=2.0.0", "python-dotenv", "requests", "rich[jupyter]", "ruamel.yaml", - "sqlglot[rs]~=27.14.0", + "sqlglot~=30.0.1", "tenacity", "time-machine", "json-stream" @@ -76,6 +76,7 @@ dev = [ "google-cloud-bigquery-storage", "httpx", "mypy~=1.13.0", + "numpy", "pandas-stubs", "pre-commit", "psycopg2-binary", @@ -110,7 +111,7 @@ duckdb = [] fabric = ["pyodbc>=5.0.0"] gcppostgres = ["cloud-sql-python-connector[pg8000]>=1.8.0"] github = ["PyGithub>=2.6.0"] -motherduck = ["duckdb>=1.2.0"] +motherduck = ["duckdb>=1.3.2"] mssql = ["pymssql"] mssql-odbc = ["pyodbc>=5.0.0"] mysql = ["pymysql"] @@ -125,7 +126,7 @@ snowflake = [ ] trino = ["trino"] web = [ - "fastapi==0.115.5", + "fastapi==0.120.1", "watchfiles>=0.19.0", "uvicorn[standard]==0.22.0", "sse-starlette>=0.2.2", @@ -133,9 +134,9 @@ web = [ ] lsp = [ # Duplicate of web - "fastapi==0.115.5", + "fastapi==0.120.1", "watchfiles>=0.19.0", - "uvicorn[standard]==0.22.0", + # "uvicorn[standard]==0.22.0", "sse-starlette>=0.2.2", "pyarrow", # For lsp @@ -153,8 +154,8 @@ sqlmesh_lsp = "sqlmesh.lsp.main:main" [project.urls] Homepage = "https://sqlmesh.com/" Documentation = "https://sqlmesh.readthedocs.io/en/stable/" -Repository = "https://github.com/TobikoData/sqlmesh" -Issues = "https://github.com/TobikoData/sqlmesh/issues" +Repository = "https://github.com/SQLMesh/sqlmesh" +Issues = "https://github.com/SQLMesh/sqlmesh/issues" [build-system] requires = ["setuptools >= 61.0", "setuptools_scm"] @@ -225,7 +226,8 @@ module = [ "pydantic_core.*", "dlt.*", "bigframes.*", - "json_stream.*" + "json_stream.*", + "duckdb.*" ] ignore_missing_imports = true @@ -239,6 +241,7 @@ markers = [ "remote: test that involves interacting with a remote DB", "cicdonly: test that only runs on CI/CD", "isolated: tests that need to run sequentially usually because they use fork", + "dialect_isolated: tests that need to run separately due to global dialect overrides", # Test Domain Markers # default: core functionality diff --git a/sqlmesh-technical-charter.pdf b/sqlmesh-technical-charter.pdf new file mode 100644 index 0000000000..107f015050 Binary files /dev/null and b/sqlmesh-technical-charter.pdf differ diff --git a/sqlmesh/__init__.py b/sqlmesh/__init__.py index 47e9bacce2..577a3aaf02 100644 --- a/sqlmesh/__init__.py +++ b/sqlmesh/__init__.py @@ -126,6 +126,8 @@ def is_cicd_environment() -> bool: def is_interactive_environment() -> bool: + if sys.stdin is None or sys.stdout is None: + return False return sys.stdin.isatty() and sys.stdout.isatty() @@ -186,6 +188,7 @@ def configure_logging( write_to_file: bool = True, log_file_dir: t.Optional[t.Union[str, Path]] = None, ignore_warnings: bool = False, + log_level: t.Optional[t.Union[str, int]] = None, ) -> None: # Remove noisy grpc logs that are not useful for users os.environ["GRPC_VERBOSITY"] = os.environ.get("GRPC_VERBOSITY", "NONE") @@ -193,8 +196,15 @@ def configure_logging( logger = logging.getLogger() debug = force_debug or debug_mode_enabled() - # base logger needs to be the lowest level that we plan to log - level = logging.DEBUG if debug else logging.INFO + if log_level is not None: + if isinstance(log_level, str): + level = logging._nameToLevel.get(log_level.upper()) or logging.INFO + else: + level = log_level + else: + # base logger needs to be the lowest level that we plan to log + level = logging.DEBUG if debug else logging.INFO + logger.setLevel(level) if debug: diff --git a/sqlmesh/cli/main.py b/sqlmesh/cli/main.py index 2f18c0a4b7..ec5acbea59 100644 --- a/sqlmesh/cli/main.py +++ b/sqlmesh/cli/main.py @@ -246,7 +246,7 @@ def init( Need help? • Docs: https://sqlmesh.readthedocs.io • Slack: https://www.tobikodata.com/slack -• GitHub: https://github.com/TobikoData/sqlmesh/issues +• GitHub: https://github.com/SQLMesh/sqlmesh/issues """) @@ -535,7 +535,7 @@ def diff(ctx: click.Context, environment: t.Optional[str] = None) -> None: ) @click.option( "--min-intervals", - default=0, + default=None, help="For every model, ensure at least this many intervals are covered by a missing intervals check regardless of the plan start date", ) @opt.verbose diff --git a/sqlmesh/cli/project_init.py b/sqlmesh/cli/project_init.py index 0790562de7..e3132a6de3 100644 --- a/sqlmesh/cli/project_init.py +++ b/sqlmesh/cli/project_init.py @@ -116,7 +116,15 @@ def _gen_config( - invalidselectstarexpansion - noambiguousprojections """, - ProjectTemplate.DBT: f"""# --- Virtual Data Environment Mode --- + ProjectTemplate.DBT: f"""# --- DBT-specific options --- +dbt: + # This configuration ensures that each dbt target gets its own isolated state. + # The inferred state schemas are named "sqlmesh_state__", eg "sqlmesh_state_jaffle_shop_dev" + # If this is undesirable, you may manually configure the gateway to use a specific state schema name + # https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#selecting-a-different-state-connection + infer_state_schema_name: True + +# --- Virtual Data Environment Mode --- # Enable Virtual Data Environments (VDE) for *development* environments. # Note that the production environment in dbt projects is not virtual by default to maintain compatibility with existing tooling. # https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#virtual-data-environment-modes @@ -298,6 +306,7 @@ def init_example_project( dlt_path: t.Optional[str] = None, schema_name: str = "sqlmesh_example", cli_mode: InitCliMode = InitCliMode.DEFAULT, + start: t.Optional[str] = None, ) -> Path: root_path = Path(path) @@ -336,7 +345,6 @@ def init_example_project( models: t.Set[t.Tuple[str, str]] = set() settings = None - start = None if engine_type and template == ProjectTemplate.DLT: project_dialect = dialect or DIALECT_TO_TYPE.get(engine_type) if pipeline and project_dialect: diff --git a/sqlmesh/core/_typing.py b/sqlmesh/core/_typing.py index e495df169e..2bc69e901b 100644 --- a/sqlmesh/core/_typing.py +++ b/sqlmesh/core/_typing.py @@ -8,8 +8,9 @@ if t.TYPE_CHECKING: TableName = t.Union[str, exp.Table] SchemaName = t.Union[str, exp.Table] - SessionProperties = t.Dict[str, t.Union[exp.Expression, str, int, float, bool]] - CustomMaterializationProperties = t.Dict[str, t.Union[exp.Expression, str, int, float, bool]] + SessionProperties = t.Dict[str, t.Union[exp.Expr, str, int, float, bool]] + CustomMaterializationProperties = t.Dict[str, t.Union[exp.Expr, str, int, float, bool]] + if sys.version_info >= (3, 11): from typing import Self as Self diff --git a/sqlmesh/core/audit/definition.py b/sqlmesh/core/audit/definition.py index 561ee539f6..4c90151ee4 100644 --- a/sqlmesh/core/audit/definition.py +++ b/sqlmesh/core/audit/definition.py @@ -19,7 +19,7 @@ sorted_python_env_payloads, ) from sqlmesh.core.model.common import make_python_env, single_value_or_tuple, ParsableSql -from sqlmesh.core.node import _Node +from sqlmesh.core.node import _Node, DbtInfoMixin, DbtNodeInfo from sqlmesh.core.renderer import QueryRenderer from sqlmesh.utils.date import TimeLike from sqlmesh.utils.errors import AuditConfigError, SQLMeshError, raise_config_error @@ -67,7 +67,7 @@ class AuditMixin(AuditCommonMetaMixin): """ query_: ParsableSql - defaults: t.Dict[str, exp.Expression] + defaults: t.Dict[str, exp.Expr] expressions_: t.Optional[t.List[ParsableSql]] jinja_macros: JinjaMacroRegistry formatting: t.Optional[bool] @@ -77,10 +77,10 @@ def query(self) -> t.Union[exp.Query, d.JinjaQuery]: return t.cast(t.Union[exp.Query, d.JinjaQuery], self.query_.parse(self.dialect)) @property - def expressions(self) -> t.List[exp.Expression]: + def expressions(self) -> t.List[exp.Expr]: if not self.expressions_: return [] - result = [] + result: t.List[exp.Expr] = [] for e in self.expressions_: parsed = e.parse(self.dialect) if not isinstance(parsed, exp.Semicolon): @@ -95,7 +95,7 @@ def macro_definitions(self) -> t.List[d.MacroDef]: @field_validator("name", "dialect", mode="before", check_fields=False) def audit_string_validator(cls: t.Type, v: t.Any) -> t.Optional[str]: - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return v.name.lower() return str(v).lower() if v is not None else None @@ -111,16 +111,14 @@ def audit_map_validator(cls: t.Type, v: t.Any, values: t.Any) -> t.Dict[str, t.A if isinstance(v, dict): dialect = get_dialect(values) return { - key: value - if isinstance(value, exp.Expression) - else d.parse_one(str(value), dialect=dialect) + key: value if isinstance(value, exp.Expr) else d.parse_one(str(value), dialect=dialect) for key, value in v.items() } raise_config_error("Defaults must be a tuple of exp.EQ or a dict", error_type=AuditConfigError) return {} -class ModelAudit(PydanticModel, AuditMixin, frozen=True): +class ModelAudit(PydanticModel, AuditMixin, DbtInfoMixin, frozen=True): """ Audit is an assertion made about your tables. @@ -133,10 +131,11 @@ class ModelAudit(PydanticModel, AuditMixin, frozen=True): blocking: bool = True standalone: t.Literal[False] = False query_: ParsableSql = Field(alias="query") - defaults: t.Dict[str, exp.Expression] = {} + defaults: t.Dict[str, exp.Expr] = {} expressions_: t.Optional[t.List[ParsableSql]] = Field(default=None, alias="expressions") jinja_macros: JinjaMacroRegistry = JinjaMacroRegistry() formatting: t.Optional[bool] = Field(default=None, exclude=True) + dbt_node_info_: t.Optional[DbtNodeInfo] = Field(alias="dbt_node_info", default=None) _path: t.Optional[Path] = None @@ -150,6 +149,10 @@ def __str__(self) -> str: path = f": {self._path.name}" if self._path else "" return f"{self.__class__.__name__}<{self.name}{path}>" + @property + def dbt_node_info(self) -> t.Optional[DbtNodeInfo]: + return self.dbt_node_info_ + class StandaloneAudit(_Node, AuditMixin): """ @@ -164,7 +167,7 @@ class StandaloneAudit(_Node, AuditMixin): blocking: bool = False standalone: t.Literal[True] = True query_: ParsableSql = Field(alias="query") - defaults: t.Dict[str, exp.Expression] = {} + defaults: t.Dict[str, exp.Expr] = {} expressions_: t.Optional[t.List[ParsableSql]] = Field(default=None, alias="expressions") jinja_macros: JinjaMacroRegistry = JinjaMacroRegistry() default_catalog: t.Optional[str] = None @@ -318,13 +321,13 @@ def render_definition( include_python: bool = True, include_defaults: bool = False, render_query: bool = False, - ) -> t.List[exp.Expression]: + ) -> t.List[exp.Expr]: """Returns the original list of sql expressions comprising the model definition. Args: include_python: Whether or not to include Python code in the rendered definition. """ - expressions: t.List[exp.Expression] = [] + expressions: t.List[exp.Expr] = [] comment = None for field_name in sorted(self.meta_fields): field_value = getattr(self, field_name) @@ -376,7 +379,7 @@ def meta_fields(self) -> t.Iterable[str]: return set(AuditCommonMetaMixin.__annotations__) | set(_Node.all_field_infos()) @property - def audits_with_args(self) -> t.List[t.Tuple[Audit, t.Dict[str, exp.Expression]]]: + def audits_with_args(self) -> t.List[t.Tuple[Audit, t.Dict[str, exp.Expr]]]: return [(self, {})] @@ -384,7 +387,7 @@ def audits_with_args(self) -> t.List[t.Tuple[Audit, t.Dict[str, exp.Expression]] def load_audit( - expressions: t.List[exp.Expression], + expressions: t.List[exp.Expr], *, path: Path = Path(), module_path: Path = Path(), @@ -494,7 +497,7 @@ def load_audit( def load_multiple_audits( - expressions: t.List[exp.Expression], + expressions: t.List[exp.Expr], *, path: Path = Path(), module_path: Path = Path(), @@ -505,7 +508,7 @@ def load_multiple_audits( variables: t.Optional[t.Dict[str, t.Any]] = None, project: t.Optional[str] = None, ) -> t.Generator[Audit, None, None]: - audit_block: t.List[exp.Expression] = [] + audit_block: t.List[exp.Expr] = [] for expression in expressions: if isinstance(expression, d.Audit): if audit_block: @@ -538,7 +541,7 @@ def _raise_config_error(msg: str, path: pathlib.Path) -> None: # mypy doesn't realize raise_config_error raises an exception @t.no_type_check -def _maybe_parse_arg_pair(e: exp.Expression) -> t.Tuple[str, exp.Expression]: +def _maybe_parse_arg_pair(e: exp.Expr) -> t.Tuple[str, exp.Expr]: if isinstance(e, exp.EQ): return e.left.name, e.right @@ -552,4 +555,5 @@ def _maybe_parse_arg_pair(e: exp.Expression) -> t.Tuple[str, exp.Expression]: "depends_on_": lambda value: exp.Tuple(expressions=sorted(value)), "tags": single_value_or_tuple, "default_catalog": exp.to_identifier, + "dbt_node_info_": lambda value: value.to_expression(), } diff --git a/sqlmesh/core/config/__init__.py b/sqlmesh/core/config/__init__.py index 0dc99c0fd1..42ed82c6e6 100644 --- a/sqlmesh/core/config/__init__.py +++ b/sqlmesh/core/config/__init__.py @@ -36,6 +36,6 @@ from sqlmesh.core.config.naming import NameInferenceConfig as NameInferenceConfig from sqlmesh.core.config.linter import LinterConfig as LinterConfig from sqlmesh.core.config.plan import PlanConfig as PlanConfig -from sqlmesh.core.config.root import Config as Config +from sqlmesh.core.config.root import Config as Config, DbtConfig as DbtConfig from sqlmesh.core.config.run import RunConfig as RunConfig from sqlmesh.core.config.scheduler import BuiltInSchedulerConfig as BuiltInSchedulerConfig diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 1678f5d147..7a002faebb 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -17,6 +17,7 @@ from packaging import version from sqlglot import exp from sqlglot.helper import subclasses +from sqlglot.errors import ParseError from sqlmesh.core import engine_adapter from sqlmesh.core.config.base import BaseConfig @@ -58,6 +59,7 @@ "clickhouse", } MOTHERDUCK_TOKEN_REGEX = re.compile(r"(\?|\&)(motherduck_token=)(\S*)") +PASSWORD_REGEX = re.compile(r"(password=)(\S+)") def _get_engine_import_validator( @@ -101,6 +103,7 @@ class ConnectionConfig(abc.ABC, BaseConfig): pre_ping: bool pretty_sql: bool = False schema_differ_overrides: t.Optional[t.Dict[str, t.Any]] = None + catalog_type_overrides: t.Optional[t.Dict[str, str]] = None # Whether to share a single connection across threads or create a new connection per thread. shared_connection: t.ClassVar[bool] = False @@ -176,6 +179,7 @@ def create_engine_adapter( pretty_sql=self.pretty_sql, shared_connection=self.shared_connection, schema_differ_overrides=self.schema_differ_overrides, + catalog_type_overrides=self.catalog_type_overrides, **self._extra_engine_config, ) @@ -235,6 +239,7 @@ class DuckDBAttachOptions(BaseConfig): data_path: t.Optional[str] = None encrypted: bool = False data_inlining_row_limit: t.Optional[int] = None + metadata_schema: t.Optional[str] = None def to_sql(self, alias: str) -> str: options = [] @@ -256,6 +261,8 @@ def to_sql(self, alias: str) -> str: options.append("ENCRYPTED") if self.data_inlining_row_limit is not None: options.append(f"DATA_INLINING_ROW_LIMIT {self.data_inlining_row_limit}") + if self.metadata_schema is not None: + options.append(f"METADATA_SCHEMA '{self.metadata_schema}'") options_sql = f" ({', '.join(options)})" if options else "" alias_sql = "" @@ -477,13 +484,13 @@ def create_engine_adapter( adapter = BaseDuckDBConnectionConfig._data_file_to_adapter.get(key) if adapter is not None: logger.info( - f"Using existing DuckDB adapter due to overlapping data file: {self._mask_motherduck_token(key)}" + f"Using existing DuckDB adapter due to overlapping data file: {self._mask_sensitive_data(key)}" ) return adapter if data_files: masked_files = { - self._mask_motherduck_token(file if isinstance(file, str) else file.path) + self._mask_sensitive_data(file if isinstance(file, str) else file.path) for file in data_files } logger.info(f"Creating new DuckDB adapter for data files: {masked_files}") @@ -505,10 +512,14 @@ def get_catalog(self) -> t.Optional[str]: return list(self.catalogs)[0] return None - def _mask_motherduck_token(self, string: str) -> str: - return MOTHERDUCK_TOKEN_REGEX.sub( - lambda m: f"{m.group(1)}{m.group(2)}{'*' * len(m.group(3))}", string + def _mask_sensitive_data(self, string: str) -> str: + # Mask MotherDuck tokens with fixed number of asterisks + result = MOTHERDUCK_TOKEN_REGEX.sub( + lambda m: f"{m.group(1)}{m.group(2)}{'*' * 8 if m.group(3) else ''}", string ) + # Mask PostgreSQL/MySQL passwords with fixed number of asterisks + result = PASSWORD_REGEX.sub(lambda m: f"{m.group(1)}{'*' * 8}", result) + return result class MotherDuckConnectionConfig(BaseDuckDBConnectionConfig): @@ -1051,6 +1062,7 @@ class BigQueryConnectionConfig(ConnectionConfig): job_retry_deadline_seconds: t.Optional[int] = None priority: t.Optional[BigQueryPriority] = None maximum_bytes_billed: t.Optional[int] = None + reservation: t.Optional[str] = None concurrent_tasks: int = 1 register_comments: bool = True @@ -1160,6 +1172,7 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: "job_retry_deadline_seconds", "priority", "maximum_bytes_billed", + "reservation", } } @@ -1753,6 +1766,7 @@ class SparkConnectionConfig(ConnectionConfig): config_dir: t.Optional[str] = None catalog: t.Optional[str] = None config: t.Dict[str, t.Any] = {} + wap_enabled: bool = False concurrent_tasks: int = 4 register_comments: bool = True @@ -1799,6 +1813,10 @@ def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: .getOrCreate(), } + @property + def _extra_engine_config(self) -> t.Dict[str, t.Any]: + return {"wap_enabled": self.wap_enabled} + class TrinoAuthenticationMethod(str, Enum): NO_AUTH = "no-auth" @@ -1872,9 +1890,11 @@ class TrinoConnectionConfig(ConnectionConfig): client_certificate: t.Optional[str] = None client_private_key: t.Optional[str] = None cert: t.Optional[str] = None + source: str = "sqlmesh" # SQLMesh options schema_location_mapping: t.Optional[dict[re.Pattern, str]] = None + timestamp_mapping: t.Optional[dict[exp.DataType, exp.DataType]] = None concurrent_tasks: int = 4 register_comments: bool = True pre_ping: t.Literal[False] = False @@ -1899,6 +1919,34 @@ def _validate_regex_keys( ) return compiled + @field_validator("timestamp_mapping", mode="before") + @classmethod + def _validate_timestamp_mapping( + cls, value: t.Optional[dict[str, str]] + ) -> t.Optional[dict[exp.DataType, exp.DataType]]: + if value is None: + return value + + result: dict[exp.DataType, exp.DataType] = {} + for source_type, target_type in value.items(): + try: + source_datatype = exp.DataType.build(source_type) + except ParseError: + raise ConfigError( + f"Invalid SQL type string in timestamp_mapping: " + f"'{source_type}' is not a valid SQL data type." + ) + try: + target_datatype = exp.DataType.build(target_type) + except ParseError: + raise ConfigError( + f"Invalid SQL type string in timestamp_mapping: " + f"'{target_type}' is not a valid SQL data type." + ) + result[source_datatype] = target_datatype + + return result + @model_validator(mode="after") def _root_validator(self) -> Self: port = self.port @@ -1939,6 +1987,7 @@ def _connection_kwargs_keys(self) -> t.Set[str]: "port", "catalog", "roles", + "source", "http_scheme", "http_headers", "session_properties", @@ -1966,7 +2015,17 @@ def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: OAuth2Authentication, ) + auth: t.Optional[ + t.Union[ + BasicAuthentication, + KerberosAuthentication, + OAuth2Authentication, + JWTAuthentication, + CertificateAuthentication, + ] + ] = None if self.method.is_basic or self.method.is_ldap: + assert self.password is not None # for mypy since validator already checks this auth = BasicAuthentication(self.user, self.password) elif self.method.is_kerberos: if self.keytab: @@ -1985,23 +2044,27 @@ def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: elif self.method.is_oauth: auth = OAuth2Authentication() elif self.method.is_jwt: + assert self.jwt_token is not None auth = JWTAuthentication(self.jwt_token) elif self.method.is_certificate: + assert self.client_certificate is not None + assert self.client_private_key is not None auth = CertificateAuthentication(self.client_certificate, self.client_private_key) - else: - auth = None return { "auth": auth, "user": self.impersonation_user or self.user, "max_attempts": self.retries, "verify": self.cert if self.cert is not None else self.verify, - "source": "sqlmesh", + "source": self.source, } @property def _extra_engine_config(self) -> t.Dict[str, t.Any]: - return {"schema_location_mapping": self.schema_location_mapping} + return { + "schema_location_mapping": self.schema_location_mapping, + "timestamp_mapping": self.timestamp_mapping, + } class ClickhouseConnectionConfig(ConnectionConfig): @@ -2284,7 +2347,7 @@ def init(cursor: t.Any) -> None: for tpe in subclasses( __name__, ConnectionConfig, - exclude=(ConnectionConfig, BaseDuckDBConnectionConfig), + exclude={ConnectionConfig, BaseDuckDBConnectionConfig}, ) } @@ -2293,7 +2356,7 @@ def init(cursor: t.Any) -> None: for tpe in subclasses( __name__, ConnectionConfig, - exclude=(ConnectionConfig, BaseDuckDBConnectionConfig), + exclude={ConnectionConfig, BaseDuckDBConnectionConfig}, ) } @@ -2305,7 +2368,7 @@ def init(cursor: t.Any) -> None: for tpe in subclasses( __name__, ConnectionConfig, - exclude=(ConnectionConfig, BaseDuckDBConnectionConfig), + exclude={ConnectionConfig, BaseDuckDBConnectionConfig}, ) } diff --git a/sqlmesh/core/config/dbt.py b/sqlmesh/core/config/dbt.py new file mode 100644 index 0000000000..e3132c40a4 --- /dev/null +++ b/sqlmesh/core/config/dbt.py @@ -0,0 +1,13 @@ +from sqlmesh.core.config.base import BaseConfig + + +class DbtConfig(BaseConfig): + """ + Represents dbt-specific options on the SQLMesh root config. + + These options are only taken into account for dbt projects and are ignored on native projects + """ + + infer_state_schema_name: bool = False + """If set, indicates to the dbt loader that the state schema should be inferred based on the profile/target + so that each target gets its own isolated state""" diff --git a/sqlmesh/core/config/janitor.py b/sqlmesh/core/config/janitor.py index d288c90b3e..0f1c953bc0 100644 --- a/sqlmesh/core/config/janitor.py +++ b/sqlmesh/core/config/janitor.py @@ -1,7 +1,9 @@ from __future__ import annotations +import typing as t from sqlmesh.core.config.base import BaseConfig +from sqlmesh.utils.pydantic import field_validator class JanitorConfig(BaseConfig): @@ -9,6 +11,16 @@ class JanitorConfig(BaseConfig): Args: warn_on_delete_failure: Whether to warn instead of erroring if the janitor fails to delete the expired environment schema / views. + expired_snapshots_batch_size: Maximum number of expired snapshots to clean in a single batch. """ warn_on_delete_failure: bool = False + expired_snapshots_batch_size: t.Optional[int] = None + + @field_validator("expired_snapshots_batch_size", mode="before") + @classmethod + def _validate_batch_size(cls, value: int) -> int: + batch_size = int(value) + if batch_size <= 0: + raise ValueError("expired_snapshots_batch_size must be greater than 0") + return batch_size diff --git a/sqlmesh/core/config/linter.py b/sqlmesh/core/config/linter.py index c2a40e09aa..11d700c540 100644 --- a/sqlmesh/core/config/linter.py +++ b/sqlmesh/core/config/linter.py @@ -34,7 +34,7 @@ def _validate_rules(cls, v: t.Any) -> t.Set[str]: v = v.unnest().name elif isinstance(v, (exp.Tuple, exp.Array)): v = [e.name for e in v.expressions] - elif isinstance(v, exp.Expression): + elif isinstance(v, exp.Expr): v = v.name return {name.lower() for name in ensure_collection(v)} diff --git a/sqlmesh/core/config/loader.py b/sqlmesh/core/config/loader.py index 75915800e6..e92c62960a 100644 --- a/sqlmesh/core/config/loader.py +++ b/sqlmesh/core/config/loader.py @@ -172,11 +172,18 @@ def load_config_from_paths( if dbt_project_file: from sqlmesh.dbt.loader import sqlmesh_config + infer_state_schema_name = False + if dbt := non_python_config.dbt: + infer_state_schema_name = dbt.infer_state_schema_name + dbt_python_config = sqlmesh_config( project_root=dbt_project_file.parent, + profiles_dir=kwargs.pop("profiles_dir", None), dbt_profile_name=kwargs.pop("profile", None), dbt_target_name=kwargs.pop("target", None), variables=variables, + threads=kwargs.pop("threads", None), + infer_state_schema_name=infer_state_schema_name, ) if type(dbt_python_config) != config_type: dbt_python_config = convert_config_type(dbt_python_config, config_type) diff --git a/sqlmesh/core/config/model.py b/sqlmesh/core/config/model.py index 5406a5497b..ac41d75fe3 100644 --- a/sqlmesh/core/config/model.py +++ b/sqlmesh/core/config/model.py @@ -45,6 +45,7 @@ class ModelDefaultsConfig(BaseConfig): allow_partials: Whether the models can process partial (incomplete) data intervals. enabled: Whether the models are enabled. interval_unit: The temporal granularity of the models data intervals. By default computed from cron. + batch_concurrency: The maximum number of batches that can run concurrently for an incremental model. pre_statements: The list of SQL statements that get executed before a model runs. post_statements: The list of SQL statements that get executed before a model runs. on_virtual_update: The list of SQL statements to be executed after the virtual update. @@ -69,9 +70,10 @@ class ModelDefaultsConfig(BaseConfig): interval_unit: t.Optional[t.Union[str, IntervalUnit]] = None enabled: t.Optional[t.Union[str, bool]] = None formatting: t.Optional[t.Union[str, bool]] = None - pre_statements: t.Optional[t.List[t.Union[str, exp.Expression]]] = None - post_statements: t.Optional[t.List[t.Union[str, exp.Expression]]] = None - on_virtual_update: t.Optional[t.List[t.Union[str, exp.Expression]]] = None + batch_concurrency: t.Optional[int] = None + pre_statements: t.Optional[t.List[t.Union[str, exp.Expr]]] = None + post_statements: t.Optional[t.List[t.Union[str, exp.Expr]]] = None + on_virtual_update: t.Optional[t.List[t.Union[str, exp.Expr]]] = None _model_kind_validator = model_kind_validator _on_destructive_change_validator = on_destructive_change_validator diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index 9b6fae63e3..211d271b01 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -36,6 +36,7 @@ from sqlmesh.core.config.linter import LinterConfig as LinterConfig from sqlmesh.core.config.plan import PlanConfig from sqlmesh.core.config.run import RunConfig +from sqlmesh.core.config.dbt import DbtConfig from sqlmesh.core.config.scheduler import ( BuiltInSchedulerConfig, SchedulerConfig, @@ -173,6 +174,7 @@ class Config(BaseConfig): linter: LinterConfig = LinterConfig() janitor: JanitorConfig = JanitorConfig() cache_dir: t.Optional[str] = None + dbt: t.Optional[DbtConfig] = None _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { "gateways": UpdateStrategy.NESTED_UPDATE, @@ -191,6 +193,7 @@ class Config(BaseConfig): "before_all": UpdateStrategy.EXTEND, "after_all": UpdateStrategy.EXTEND, "linter": UpdateStrategy.NESTED_UPDATE, + "dbt": UpdateStrategy.NESTED_UPDATE, } _connection_config_validator = connection_config_validator diff --git a/sqlmesh/core/config/scheduler.py b/sqlmesh/core/config/scheduler.py index 69adcafe70..970defee62 100644 --- a/sqlmesh/core/config/scheduler.py +++ b/sqlmesh/core/config/scheduler.py @@ -146,7 +146,7 @@ def get_default_catalog_per_gateway(self, context: GenericContext) -> t.Dict[str SCHEDULER_CONFIG_TO_TYPE = { tpe.all_field_infos()["type_"].default: tpe - for tpe in subclasses(__name__, BaseConfig, exclude=(BaseConfig,)) + for tpe in subclasses(__name__, BaseConfig, exclude={BaseConfig}) } diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py index af28f75932..8af837b08a 100644 --- a/sqlmesh/core/console.py +++ b/sqlmesh/core/console.py @@ -551,6 +551,22 @@ def log_skipped_models(self, snapshot_names: t.Set[str]) -> None: def log_failed_models(self, errors: t.List[NodeExecutionFailedError]) -> None: """Display list of models that failed during evaluation to the user.""" + @abc.abstractmethod + def log_models_updated_during_restatement( + self, + snapshots: t.List[t.Tuple[SnapshotTableInfo, SnapshotTableInfo]], + environment_naming_info: EnvironmentNamingInfo, + default_catalog: t.Optional[str], + ) -> None: + """Display a list of models where new versions got deployed to the specified :environment while we were restating data the old versions + + Args: + snapshots: a list of (snapshot_we_restated, snapshot_it_got_replaced_with_during_restatement) tuples + environment: which environment got updated while we were restating models + environment_naming_info: how snapshots are named in that :environment (for display name purposes) + default_catalog: the configured default catalog (for display name purposes) + """ + @abc.abstractmethod def loading_start(self, message: t.Optional[str] = None) -> uuid.UUID: """Starts loading and returns a unique ID that can be used to stop the loading. Optionally can display a message.""" @@ -771,6 +787,14 @@ def log_skipped_models(self, snapshot_names: t.Set[str]) -> None: def log_failed_models(self, errors: t.List[NodeExecutionFailedError]) -> None: pass + def log_models_updated_during_restatement( + self, + snapshots: t.List[t.Tuple[SnapshotTableInfo, SnapshotTableInfo]], + environment_naming_info: EnvironmentNamingInfo, + default_catalog: t.Optional[str], + ) -> None: + pass + def log_destructive_change( self, snapshot_name: str, @@ -1998,7 +2022,34 @@ def _prompt_categorize( plan = plan_builder.build() if plan.restatements: - self._print("\n[bold]Restating models\n") + # A plan can have restatements for the following reasons: + # - The user specifically called `sqlmesh plan` with --restate-model. + # This creates a "restatement plan" which disallows all other changes and simply force-backfills + # the selected models and their downstream dependencies using the versions of the models stored in state. + # - There are no specific restatements (so changes are allowed) AND dev previews need to be computed. + # The "restatements" feature is currently reused for dev previews. + if plan.selected_models_to_restate: + # There were legitimate restatements, no dev previews + tree = Tree( + "[bold]Models selected for restatement:[/bold]\n" + "This causes backfill of the model itself as well as affected downstream models" + ) + model_fqn_to_snapshot = {s.name: s for s in plan.snapshots.values()} + for model_fqn in plan.selected_models_to_restate: + snapshot = model_fqn_to_snapshot[model_fqn] + display_name = snapshot.display_name( + plan.environment_naming_info, + default_catalog if self.verbosity < Verbosity.VERY_VERBOSE else None, + dialect=self.dialect, + ) + tree.add( + display_name + ) # note: we deliberately dont show any intervals here; they get shown in the backfill section + self._print(tree) + else: + # We are computing dev previews, do not confuse the user by printing out something to do + # with restatements. Dev previews are already highlighted in the backfill step + pass else: self.show_environment_difference_summary( plan.context_diff, @@ -2225,6 +2276,30 @@ def log_failed_models(self, errors: t.List[NodeExecutionFailedError]) -> None: for node_name, msg in error_messages.items(): self._print(f" [red]{node_name}[/red]\n\n{msg}") + def log_models_updated_during_restatement( + self, + snapshots: t.List[t.Tuple[SnapshotTableInfo, SnapshotTableInfo]], + environment_naming_info: EnvironmentNamingInfo, + default_catalog: t.Optional[str] = None, + ) -> None: + if snapshots: + tree = Tree( + f"[yellow]The following models had new versions deployed while data was being restated:[/yellow]" + ) + + for restated_snapshot, updated_snapshot in snapshots: + display_name = restated_snapshot.display_name( + environment_naming_info, + default_catalog if self.verbosity < Verbosity.VERY_VERBOSE else None, + dialect=self.dialect, + ) + current_branch = tree.add(display_name) + current_branch.add(f"restated version: '{restated_snapshot.version}'") + current_branch.add(f"currently active version: '{updated_snapshot.version}'") + + self._print(tree) + self._print("") # newline spacer + def log_destructive_change( self, snapshot_name: str, @@ -3566,7 +3641,10 @@ def show_linter_violations( msg = f"\nLinter {severity} for `{model._path}`:\n{violations_msg}\n" self._print(msg) - self._errors.append(msg) + if is_error: + self._errors.append(msg) + else: + self._warnings.append(msg) @property def captured_warnings(self) -> str: diff --git a/sqlmesh/core/constants.py b/sqlmesh/core/constants.py index a1d117f4fb..66dadb0b5d 100644 --- a/sqlmesh/core/constants.py +++ b/sqlmesh/core/constants.py @@ -8,7 +8,7 @@ SQLMESH = "sqlmesh" SQLMESH_MANAGED = "sqlmesh_managed" -SQLMESH_PATH = Path.home() / ".sqlmesh" +SQLMESH_PATH = Path(os.getenv("SQLMESH_HOME") or Path.home() / ".sqlmesh") PROD = "prod" """Prod""" diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 0339f6506c..dc51aad2a7 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -93,7 +93,7 @@ from sqlmesh.core.reference import ReferenceGraph from sqlmesh.core.scheduler import Scheduler, CompletionStatus from sqlmesh.core.schema_loader import create_external_models_file -from sqlmesh.core.selector import Selector +from sqlmesh.core.selector import Selector, NativeSelector from sqlmesh.core.snapshot import ( DeployabilityIndex, Snapshot, @@ -107,14 +107,15 @@ CachingStateSync, StateReader, StateSync, - cleanup_expired_views, ) +from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots from sqlmesh.core.table_diff import TableDiff from sqlmesh.core.test import ( ModelTextTestResult, ModelTestMetadata, generate_test, run_tests, + filter_tests_by_patterns, ) from sqlmesh.core.user import User from sqlmesh.utils import UniqueKeyDict, Verbosity @@ -139,6 +140,7 @@ ) from sqlmesh.utils.config import print_config from sqlmesh.utils.jinja import JinjaMacroRegistry +from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path if t.TYPE_CHECKING: import pandas as pd @@ -153,6 +155,8 @@ ) from sqlmesh.core.snapshot import Node + from sqlmesh.core.snapshot.definition import Intervals + ModelOrSnapshot = t.Union[str, Model, Snapshot] NodeOrSnapshot = t.Union[str, Model, StandaloneAudit, Snapshot] @@ -230,7 +234,7 @@ def resolve_table(self, model_name: str) -> str: ) def fetchdf( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> pd.DataFrame: """Fetches a dataframe given a sql string or sqlglot expression. @@ -244,7 +248,7 @@ def fetchdf( return self.engine_adapter.fetchdf(query, quote_identifiers=quote_identifiers) def fetch_pyspark_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> PySparkDataFrame: """Fetches a PySpark dataframe given a sql string or sqlglot expression. @@ -274,6 +278,8 @@ def __init__( deployability_index: t.Optional[DeployabilityIndex] = None, default_dialect: t.Optional[str] = None, default_catalog: t.Optional[str] = None, + is_restatement: t.Optional[bool] = None, + parent_intervals: t.Optional[Intervals] = None, variables: t.Optional[t.Dict[str, t.Any]] = None, blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, ): @@ -284,6 +290,8 @@ def __init__( self._default_dialect = default_dialect self._variables = variables or {} self._blueprint_variables = blueprint_variables or {} + self._is_restatement = is_restatement + self._parent_intervals = parent_intervals @property def default_dialect(self) -> t.Optional[str]: @@ -308,6 +316,14 @@ def gateway(self) -> t.Optional[str]: """Returns the gateway name.""" return self.var(c.GATEWAY) + @property + def is_restatement(self) -> t.Optional[bool]: + return self._is_restatement + + @property + def parent_intervals(self) -> t.Optional[Intervals]: + return self._parent_intervals + def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: """Returns a variable value.""" return self._variables.get(var_name.lower(), default) @@ -328,6 +344,7 @@ def with_variables( self.deployability_index, self._default_dialect, self._default_catalog, + self._is_restatement, variables=variables, blueprint_variables=blueprint_variables, ) @@ -368,6 +385,7 @@ def __init__( load: bool = True, users: t.Optional[t.List[User]] = None, config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, + selector: t.Optional[t.Type[Selector]] = None, ): self.configs = ( config @@ -381,6 +399,11 @@ def __init__( self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( "standaloneaudits" ) + self._model_test_metadata: t.List[ModelTestMetadata] = [] + self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} + self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} + self._models_with_tests: t.Set[str] = set() + self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") self._jinja_macros = JinjaMacroRegistry() @@ -390,6 +413,7 @@ def __init__( self._engine_adapter: t.Optional[EngineAdapter] = None self._linters: t.Dict[str, Linter] = {} self._loaded: bool = False + self._selector_cls = selector or NativeSelector self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) @@ -618,6 +642,10 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: self._excluded_requirements.clear() self._linters.clear() self._environment_statements = [] + self._model_test_metadata.clear() + self._model_test_metadata_path_index.clear() + self._model_test_metadata_fully_qualified_name_index.clear() + self._models_with_tests.clear() for loader, project in zip(self._loaders, loaded_projects): self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) @@ -630,6 +658,16 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: self._excluded_requirements.update(project.excluded_requirements) self._environment_statements.extend(project.environment_statements) + self._model_test_metadata.extend(project.model_test_metadata) + for metadata in project.model_test_metadata: + if metadata.path not in self._model_test_metadata_path_index: + self._model_test_metadata_path_index[metadata.path] = [] + self._model_test_metadata_path_index[metadata.path].append(metadata) + self._model_test_metadata_fully_qualified_name_index[ + metadata.fully_qualified_test_name + ] = metadata + self._models_with_tests.add(metadata.model_name) + config = loader.config self._linters[config.project] = Linter.from_rules( BUILTIN_RULES.union(project.user_rules), config.linter @@ -654,8 +692,11 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: if snapshot.node.project in self._projects: uncached.add(snapshot.name) else: - store = self._standalone_audits if snapshot.is_audit else self._models - store[snapshot.name] = snapshot.node # type: ignore + local_store = self._standalone_audits if snapshot.is_audit else self._models + if snapshot.name in local_store: + uncached.add(snapshot.name) + else: + local_store[snapshot.name] = snapshot.node # type: ignore for model in self._models.values(): self.dag.add(model.fqn, model.depends_on) @@ -1031,6 +1072,11 @@ def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: """Returns all registered standalone audits in this context.""" return MappingProxyType(self._standalone_audits) + @property + def models_with_tests(self) -> t.Set[str]: + """Returns all models with tests in this context.""" + return self._models_with_tests + @property def snapshots(self) -> t.Dict[str, Snapshot]: """Generates and returns snapshots based on models registered in this context. @@ -1059,7 +1105,7 @@ def render( execution_time: t.Optional[TimeLike] = None, expand: t.Union[bool, t.Iterable[str]] = False, **kwargs: t.Any, - ) -> exp.Expression: + ) -> exp.Expr: """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. Args: @@ -1429,6 +1475,7 @@ def plan_builder( explain: t.Optional[bool] = None, ignore_cron: t.Optional[bool] = None, min_intervals: t.Optional[int] = None, + always_include_local_changes: t.Optional[bool] = None, ) -> PlanBuilder: """Creates a plan builder. @@ -1467,6 +1514,8 @@ def plan_builder( diff_rendered: Whether the diff should compare raw vs rendered models min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered on every model when checking for missing intervals + always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. + However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour. Returns: The plan builder. @@ -1510,6 +1559,7 @@ def plan_builder( run = run or False diff_rendered = diff_rendered or False skip_linter = skip_linter or False + min_intervals = min_intervals or 0 environment = environment or self.config.default_target_environment environment = Environment.sanitize_name(environment) @@ -1583,13 +1633,20 @@ def plan_builder( "Selector did not return any models. Please check your model selection and try again." ) + if always_include_local_changes is None: + # default behaviour - if restatements are detected; we operate entirely out of state and ignore local changes + force_no_diff = restate_models is not None or ( + backfill_models is not None and not backfill_models + ) + else: + force_no_diff = not always_include_local_changes + snapshots = self._snapshots(models_override) context_diff = self._context_diff( environment or c.PROD, snapshots=snapshots, create_from=create_from, - force_no_diff=restate_models is not None - or (backfill_models is not None and not backfill_models), + force_no_diff=force_no_diff, ensure_finalized_snapshots=self.config.plan.use_finalized_state, diff_rendered=diff_rendered, always_recreate_environment=self.config.plan.always_recreate_environment, @@ -1644,6 +1701,14 @@ def plan_builder( elif forward_only is None: forward_only = self.config.plan.forward_only + # When handling prod restatements, only clear intervals from other model versions if we are using full virtual environments + # If we are not, then there is no point, because none of the data in dev environments can be promoted by definition + restate_all_snapshots = ( + expanded_restate_models is not None + and not is_dev + and self.config.virtual_environment_mode.is_full + ) + return self.PLAN_BUILDER_TYPE( context_diff=context_diff, start=start, @@ -1651,6 +1716,7 @@ def plan_builder( execution_time=execution_time, apply=self.apply, restate_models=expanded_restate_models, + restate_all_snapshots=restate_all_snapshots, backfill_models=backfill_models, no_gaps=no_gaps, skip_backfill=skip_backfill, @@ -1678,9 +1744,9 @@ def plan_builder( console=self.console, user_provided_flags=user_provided_flags, selected_models={ - dbt_name + dbt_unique_id for model in model_selector.expand_model_selections(select_models or "*") - if (dbt_name := snapshots[model].node.dbt_name) + if (dbt_unique_id := snapshots[model].node.dbt_unique_id) }, explain=explain or False, ignore_cron=ignore_cron or False, @@ -1794,10 +1860,10 @@ def table_diff( self, source: str, target: str, - on: t.Optional[t.List[str] | exp.Condition] = None, + on: t.Optional[t.List[str] | exp.Expr] = None, skip_columns: t.Optional[t.List[str]] = None, select_models: t.Optional[t.Collection[str]] = None, - where: t.Optional[str | exp.Condition] = None, + where: t.Optional[str | exp.Expr] = None, limit: int = 20, show: bool = True, show_sample: bool = True, @@ -1856,7 +1922,7 @@ def table_diff( raise SQLMeshError(e) models_to_diff: t.List[ - t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Condition]] + t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Expr]] ] = [] models_without_grain: t.List[Model] = [] source_snapshots_to_name = { @@ -1975,9 +2041,9 @@ def _model_diff( target_alias: str, limit: int, decimals: int, - on: t.Optional[t.List[str] | exp.Condition] = None, + on: t.Optional[t.List[str] | exp.Expr] = None, skip_columns: t.Optional[t.List[str]] = None, - where: t.Optional[str | exp.Condition] = None, + where: t.Optional[str | exp.Expr] = None, show: bool = True, temp_schema: t.Optional[str] = None, skip_grain_check: bool = False, @@ -2017,10 +2083,10 @@ def _table_diff( limit: int, decimals: int, adapter: EngineAdapter, - on: t.Optional[t.List[str] | exp.Condition] = None, + on: t.Optional[t.List[str] | exp.Expr] = None, model: t.Optional[Model] = None, skip_columns: t.Optional[t.List[str]] = None, - where: t.Optional[str | exp.Condition] = None, + where: t.Optional[str | exp.Expr] = None, schema_diff_ignore_case: bool = False, ) -> TableDiff: if not on: @@ -2183,7 +2249,7 @@ def test( pd.set_option("display.max_columns", None) - test_meta = self.load_model_tests(tests=tests, patterns=match_patterns) + test_meta = self.select_tests(tests=tests, patterns=match_patterns) result = run_tests( model_test_metadata=test_meta, @@ -2242,6 +2308,7 @@ def audit( snapshot=snapshot, start=start, end=end, + execution_time=execution_time, snapshots=self.snapshots, ): audit_id = f"{audit_result.audit.name}" @@ -2277,7 +2344,7 @@ def audit( return not errors @python_api_analytics - def rewrite(self, sql: str, dialect: str = "") -> exp.Expression: + def rewrite(self, sql: str, dialect: str = "") -> exp.Expr: """Rewrite a sql expression with semantic references into an executable query. https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/ @@ -2562,12 +2629,15 @@ def table_name( ) def clear_caches(self) -> None: - for path in self.configs: - cache_path = path / c.CACHE - if cache_path.exists(): - rmtree(cache_path) - if self.cache_dir.exists(): - rmtree(self.cache_dir) + paths_to_remove = [path / c.CACHE for path in self.configs] + paths_to_remove.append(self.cache_dir) + + if IS_WINDOWS: + paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] + + for path in paths_to_remove: + if path.exists(): + rmtree(path) if isinstance(self._state_sync, CachingStateSync): self._state_sync.clear_cache() @@ -2824,19 +2894,14 @@ def _run_janitor(self, ignore_ttl: bool = False) -> None: # Clean up expired environments by removing their views and schemas self._cleanup_environments(current_ts=current_ts) - cleanup_targets = self.state_sync.get_expired_snapshots( - ignore_ttl=ignore_ttl, current_ts=current_ts - ) - - # Remove the expired snapshots tables - self.snapshot_evaluator.cleanup( - target_snapshots=cleanup_targets, - on_complete=self.console.update_cleanup_progress, + delete_expired_snapshots( + self.state_sync, + self.snapshot_evaluator, + current_ts=current_ts, + ignore_ttl=ignore_ttl, + console=self.console, + batch_size=self.config.janitor.expired_snapshots_batch_size, ) - - # Delete the expired snapshot records from the state sync - self.state_sync.delete_expired_snapshots(ignore_ttl=ignore_ttl, current_ts=current_ts) - self.state_sync.compact_intervals() def _cleanup_environments(self, current_ts: t.Optional[int] = None) -> None: @@ -2874,7 +2939,7 @@ def _new_state_sync(self) -> StateSync: def _new_selector( self, models: t.Optional[UniqueKeyDict[str, Model]] = None, dag: t.Optional[DAG[str]] = None ) -> Selector: - return Selector( + return self._selector_cls( self.state_reader, models=models or self._models, context_path=self.path, @@ -2981,10 +3046,17 @@ def _get_plan_default_start_end( modified_model_names: t.Set[str], execution_time: t.Optional[TimeLike] = None, ) -> t.Tuple[t.Optional[int], t.Optional[int]]: - if not max_interval_end_per_model: + # exclude seeds so their stale interval ends does not become the default plan end date + # when they're the only ones that contain intervals in this plan + non_seed_interval_ends = { + model_fqn: end + for model_fqn, end in max_interval_end_per_model.items() + if model_fqn not in snapshots or not snapshots[model_fqn].is_seed + } + if not non_seed_interval_ends: return None, None - default_end = to_timestamp(max(max_interval_end_per_model.values())) + default_end = to_timestamp(max(non_seed_interval_ends.values())) default_start: t.Optional[int] = None # Infer the default start by finding the smallest interval start that corresponds to the default end. for model_name in backfill_models or modified_model_names or max_interval_end_per_model: @@ -3157,18 +3229,34 @@ def lint_models( return all_violations - def load_model_tests( - self, tests: t.Optional[t.List[str]] = None, patterns: list[str] | None = None + def select_tests( + self, + tests: t.Optional[t.List[str]] = None, + patterns: t.Optional[t.List[str]] = None, ) -> t.List[ModelTestMetadata]: - # If a set of specific test path(s) are provided, we can use a single loader - # since it's not required to walk every tests/ folder in each repo - loaders = [self._loaders[0]] if tests else self._loaders + """Filter pre-loaded test metadata based on tests and patterns.""" + + test_meta = self._model_test_metadata + + if tests: + filtered_tests = [] + for test in tests: + if "::" in test: + if test in self._model_test_metadata_fully_qualified_name_index: + filtered_tests.append( + self._model_test_metadata_fully_qualified_name_index[test] + ) + else: + test_path = Path(test) + if test_path in self._model_test_metadata_path_index: + filtered_tests.extend(self._model_test_metadata_path_index[test_path]) + + test_meta = filtered_tests - model_tests = [] - for loader in loaders: - model_tests.extend(loader.load_model_tests(tests=tests, patterns=patterns)) + if patterns: + test_meta = filter_tests_by_patterns(test_meta, patterns) - return model_tests + return test_meta class Context(GenericContext[Config]): diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 07d13b1c2f..047e58609a 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -36,7 +36,7 @@ from sqlmesh.utils.metaprogramming import Executable # noqa from sqlmesh.core.environment import EnvironmentStatements -IGNORED_PACKAGES = {"sqlmesh", "sqlglot"} +IGNORED_PACKAGES = {"sqlmesh", "sqlglot", "sqlglotc"} class ContextDiff(PydanticModel): diff --git a/sqlmesh/core/dialect.py b/sqlmesh/core/dialect.py index ed904cc4b3..3e8f4fe9a7 100644 --- a/sqlmesh/core/dialect.py +++ b/sqlmesh/core/dialect.py @@ -12,8 +12,9 @@ from sqlglot import Dialect, Generator, ParseError, Parser, Tokenizer, TokenType, exp from sqlglot.dialects.dialect import DialectType -from sqlglot.dialects import DuckDB, Snowflake +from sqlglot.dialects import DuckDB, Snowflake, TSQL import sqlglot.dialects.athena as athena +from sqlglot.parsers.athena import AthenaTrinoParser from sqlglot.helper import seq_get from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.qualify_columns import quote_identifiers @@ -52,7 +53,7 @@ class Metric(exp.Expression): arg_types = {"expressions": True} -class Jinja(exp.Func): +class Jinja(exp.Expression, exp.Func): arg_types = {"this": True} @@ -76,7 +77,7 @@ class MacroVar(exp.Var): pass -class MacroFunc(exp.Func): +class MacroFunc(exp.Expression, exp.Func): @property def name(self) -> str: return self.this.name @@ -102,7 +103,7 @@ class DColonCast(exp.Cast): pass -class MetricAgg(exp.AggFunc): +class MetricAgg(exp.Expression, exp.AggFunc): """Used for computing metrics.""" arg_types = {"this": True} @@ -118,7 +119,7 @@ class StagedFilePath(exp.Expression): arg_types = exp.Table.arg_types.copy() -def _parse_statement(self: Parser) -> t.Optional[exp.Expression]: +def _parse_statement(self: Parser) -> t.Optional[exp.Expr]: if self._curr is None: return None @@ -152,7 +153,7 @@ def _parse_statement(self: Parser) -> t.Optional[exp.Expression]: raise -def _parse_lambda(self: Parser, alias: bool = False) -> t.Optional[exp.Expression]: +def _parse_lambda(self: Parser, alias: bool = False) -> t.Optional[exp.Expr]: node = self.__parse_lambda(alias=alias) # type: ignore if isinstance(node, exp.Lambda): node.set("this", self._parse_alias(node.this)) @@ -163,7 +164,7 @@ def _parse_id_var( self: Parser, any_token: bool = True, tokens: t.Optional[t.Collection[TokenType]] = None, -) -> t.Optional[exp.Expression]: +) -> t.Optional[exp.Expr]: if self._prev and self._prev.text == SQLMESH_MACRO_PREFIX and self._match(TokenType.L_BRACE): identifier = self.__parse_id_var(any_token=any_token, tokens=tokens) # type: ignore if not self._match(TokenType.R_BRACE): @@ -174,6 +175,7 @@ def _parse_id_var( while ( identifier + and not identifier.args.get("quoted") and self._is_connected() and ( self._match_texts(("{", SQLMESH_MACRO_PREFIX)) @@ -206,12 +208,12 @@ def _parse_id_var( else: self.raise_error("Expecting }") - identifier = self.expression(exp.Identifier, this=this, quoted=identifier.quoted) + identifier = self.expression(exp.Identifier(this=this, quoted=identifier.quoted)) return identifier -def _parse_macro(self: Parser, keyword_macro: str = "") -> t.Optional[exp.Expression]: +def _parse_macro(self: Parser, keyword_macro: str = "") -> t.Optional[exp.Expr]: if self._prev.text != SQLMESH_MACRO_PREFIX: return self._parse_parameter() @@ -219,7 +221,7 @@ def _parse_macro(self: Parser, keyword_macro: str = "") -> t.Optional[exp.Expres index = self._index field = self._parse_primary() or self._parse_function(functions={}) or self._parse_id_var() - def _build_macro(field: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + def _build_macro(field: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: if isinstance(field, exp.Func): macro_name = field.name.upper() if macro_name != keyword_macro and macro_name in KEYWORD_MACROS: @@ -229,37 +231,39 @@ def _build_macro(field: t.Optional[exp.Expression]) -> t.Optional[exp.Expression if isinstance(field, exp.Anonymous): if macro_name == "DEF": return self.expression( - MacroDef, - this=field.expressions[0], - expression=field.expressions[1], + MacroDef( + this=field.expressions[0], + expression=field.expressions[1], + ), comments=comments, ) if macro_name == "SQL": into = field.expressions[1].this.lower() if len(field.expressions) > 1 else None return self.expression( - MacroSQL, this=field.expressions[0], into=into, comments=comments + MacroSQL(this=field.expressions[0], into=into), comments=comments ) else: field = self.expression( - exp.Anonymous, - this=field.sql_name(), - expressions=list(field.args.values()), + exp.Anonymous( + this=field.sql_name(), + expressions=list(field.args.values()), + ), comments=comments, ) - return self.expression(MacroFunc, this=field, comments=comments) + return self.expression(MacroFunc(this=field), comments=comments) if field is None: return None if field.is_string or (isinstance(field, exp.Identifier) and field.quoted): return self.expression( - MacroStrReplace, this=exp.Literal.string(field.this), comments=comments + MacroStrReplace(this=exp.Literal.string(field.this)), comments=comments ) if "@" in field.this: - return field - return self.expression(MacroVar, this=field.this, comments=comments) + return field # type: ignore[return-value] + return self.expression(MacroVar(this=field.this), comments=comments) if isinstance(field, (exp.Window, exp.IgnoreNulls, exp.RespectNulls)): field.set("this", _build_macro(field.this)) @@ -272,7 +276,7 @@ def _build_macro(field: t.Optional[exp.Expression]) -> t.Optional[exp.Expression KEYWORD_MACROS = {"WITH", "JOIN", "WHERE", "GROUP_BY", "HAVING", "ORDER_BY", "LIMIT"} -def _parse_matching_macro(self: Parser, name: str) -> t.Optional[exp.Expression]: +def _parse_matching_macro(self: Parser, name: str) -> t.Optional[exp.Expr]: if not self._match_pair(TokenType.PARAMETER, TokenType.VAR, advance=False) or ( self._next and self._next.text.upper() != name.upper() ): @@ -282,7 +286,7 @@ def _parse_matching_macro(self: Parser, name: str) -> t.Optional[exp.Expression] return _parse_macro(self, keyword_macro=name) -def _parse_body_macro(self: Parser) -> t.Tuple[str, t.Optional[exp.Expression]]: +def _parse_body_macro(self: Parser) -> t.Tuple[str, t.Optional[exp.Expr]]: name = self._next and self._next.text.upper() if name == "JOIN": @@ -300,7 +304,7 @@ def _parse_body_macro(self: Parser) -> t.Tuple[str, t.Optional[exp.Expression]]: return ("", None) -def _parse_with(self: Parser, skip_with_token: bool = False) -> t.Optional[exp.Expression]: +def _parse_with(self: Parser, skip_with_token: bool = False) -> t.Optional[exp.Expr]: macro = _parse_matching_macro(self, "WITH") if not macro: return self.__parse_with(skip_with_token=skip_with_token) # type: ignore @@ -311,7 +315,7 @@ def _parse_with(self: Parser, skip_with_token: bool = False) -> t.Optional[exp.E def _parse_join( self: Parser, skip_join_token: bool = False, parse_bracket: bool = False -) -> t.Optional[exp.Expression]: +) -> t.Optional[exp.Expr]: index = self._index method, side, kind = self._parse_join_parts() macro = _parse_matching_macro(self, "JOIN") @@ -349,13 +353,15 @@ def _parse_select( parse_subquery_alias: bool = True, parse_set_operation: bool = True, consume_pipe: bool = True, -) -> t.Optional[exp.Expression]: + from_: t.Optional[exp.From] = None, +) -> t.Optional[exp.Expr]: select = self.__parse_select( # type: ignore nested=nested, table=table, parse_subquery_alias=parse_subquery_alias, parse_set_operation=parse_set_operation, consume_pipe=consume_pipe, + from_=from_, ) if ( @@ -369,7 +375,7 @@ def _parse_select( return select -def _parse_where(self: Parser, skip_where_token: bool = False) -> t.Optional[exp.Expression]: +def _parse_where(self: Parser, skip_where_token: bool = False) -> t.Optional[exp.Expr]: macro = _parse_matching_macro(self, "WHERE") if not macro: return self.__parse_where(skip_where_token=skip_where_token) # type: ignore @@ -378,7 +384,7 @@ def _parse_where(self: Parser, skip_where_token: bool = False) -> t.Optional[exp return macro -def _parse_group(self: Parser, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: +def _parse_group(self: Parser, skip_group_by_token: bool = False) -> t.Optional[exp.Expr]: macro = _parse_matching_macro(self, "GROUP_BY") if not macro: return self.__parse_group(skip_group_by_token=skip_group_by_token) # type: ignore @@ -387,7 +393,7 @@ def _parse_group(self: Parser, skip_group_by_token: bool = False) -> t.Optional[ return macro -def _parse_having(self: Parser, skip_having_token: bool = False) -> t.Optional[exp.Expression]: +def _parse_having(self: Parser, skip_having_token: bool = False) -> t.Optional[exp.Expr]: macro = _parse_matching_macro(self, "HAVING") if not macro: return self.__parse_having(skip_having_token=skip_having_token) # type: ignore @@ -397,8 +403,8 @@ def _parse_having(self: Parser, skip_having_token: bool = False) -> t.Optional[e def _parse_order( - self: Parser, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False -) -> t.Optional[exp.Expression]: + self: Parser, this: t.Optional[exp.Expr] = None, skip_order_token: bool = False +) -> t.Optional[exp.Expr]: macro = _parse_matching_macro(self, "ORDER_BY") if not macro: return self.__parse_order(this, skip_order_token=skip_order_token) # type: ignore @@ -409,10 +415,10 @@ def _parse_order( def _parse_limit( self: Parser, - this: t.Optional[exp.Expression] = None, + this: t.Optional[exp.Expr] = None, top: bool = False, skip_limit_token: bool = False, -) -> t.Optional[exp.Expression]: +) -> t.Optional[exp.Expr]: macro = _parse_matching_macro(self, "TOP" if top else "LIMIT") if not macro: return self.__parse_limit(this, top=top, skip_limit_token=skip_limit_token) # type: ignore @@ -421,7 +427,7 @@ def _parse_limit( return macro -def _parse_value(self: Parser, values: bool = True) -> t.Optional[exp.Expression]: +def _parse_value(self: Parser, values: bool = True) -> t.Optional[exp.Expr]: wrapped = self._match(TokenType.L_PAREN, advance=False) # The base _parse_value method always constructs a Tuple instance. This is problematic when @@ -435,11 +441,11 @@ def _parse_value(self: Parser, values: bool = True) -> t.Optional[exp.Expression return expr -def _parse_macro_or_clause(self: Parser, parser: t.Callable) -> t.Optional[exp.Expression]: +def _parse_macro_or_clause(self: Parser, parser: t.Callable) -> t.Optional[exp.Expr]: return _parse_macro(self) if self._match(TokenType.PARAMETER) else parser() -def _parse_props(self: Parser) -> t.Optional[exp.Expression]: +def _parse_props(self: Parser) -> t.Optional[exp.Expr]: key = self._parse_id_var(any_token=True) if not key: return None @@ -457,7 +463,7 @@ def _parse_props(self: Parser) -> t.Optional[exp.Expression]: elif name == "merge_filter": value = self._parse_conjunction() elif self._match(TokenType.L_PAREN): - value = self.expression(exp.Tuple, expressions=self._parse_csv(self._parse_equality)) + value = self.expression(exp.Tuple(expressions=self._parse_csv(self._parse_equality))) self._match_r_paren() else: value = self._parse_bracket(self._parse_field(any_token=True)) @@ -466,7 +472,7 @@ def _parse_props(self: Parser) -> t.Optional[exp.Expression]: # Make sure if we get a windows path that it is converted to posix value = exp.Literal.string(value.this.replace("\\", "/")) # type: ignore - return self.expression(exp.Property, this=name, value=value) + return self.expression(exp.Property(this=name, value=value)) def _parse_types( @@ -474,7 +480,7 @@ def _parse_types( check_func: bool = False, schema: bool = False, allow_identifiers: bool = True, -) -> t.Optional[exp.Expression]: +) -> t.Optional[exp.Expr]: start = self._curr parsed_type = self.__parse_types( # type: ignore check_func=check_func, schema=schema, allow_identifiers=allow_identifiers @@ -493,13 +499,20 @@ def _parse_types( # # See: https://docs.snowflake.com/en/user-guide/querying-stage def _parse_table_parts( - self: Parser, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False + self: Parser, + schema: bool = False, + is_db_reference: bool = False, + wildcard: bool = False, + fast: bool = False, ) -> exp.Table | StagedFilePath: index = self._index table = self.__parse_table_parts( # type: ignore - schema=schema, is_db_reference=is_db_reference, wildcard=wildcard + schema=schema, is_db_reference=is_db_reference, wildcard=wildcard, fast=fast ) + if table is None: + return table # type: ignore[return-value] + table_arg = table.this name = table_arg.name if isinstance(table_arg, exp.Var) else "" @@ -523,7 +536,9 @@ def _parse_table_parts( ) ): self._retreat(index) - return Parser._parse_table_parts(self, schema=schema, is_db_reference=is_db_reference) + return Parser._parse_table_parts( + self, schema=schema, is_db_reference=is_db_reference, fast=fast + ) # type: ignore[return-value] table_arg.replace(MacroVar(this=name[1:])) return StagedFilePath(**table.args) @@ -531,7 +546,7 @@ def _parse_table_parts( return table -def _parse_if(self: Parser) -> t.Optional[exp.Expression]: +def _parse_if(self: Parser) -> t.Optional[exp.Expr]: # If we fail to parse an IF function with expressions as arguments, we then try # to parse a statement / command to support the macro @IF(condition, statement) index = self._index @@ -551,6 +566,10 @@ def _parse_if(self: Parser) -> t.Optional[exp.Expression]: if last_token.token_type == TokenType.R_PAREN: self._tokens[-2].comments.extend(last_token.comments) self._tokens.pop() + if hasattr(self, "_tokens_size"): + # keep _tokens_size in sync sqlglot 30.0.3 caches len(_tokens) + # _advance() tries to read tokens[index + 1] past the new end + self._tokens_size -= 1 else: self.raise_error("Expecting )") @@ -563,11 +582,11 @@ def _parse_if(self: Parser) -> t.Optional[exp.Expression]: return exp.Anonymous(this="IF", expressions=[cond, stmt]) -def _create_parser(expression_type: t.Type[exp.Expression], table_keys: t.List[str]) -> t.Callable: - def parse(self: Parser) -> t.Optional[exp.Expression]: +def _create_parser(expression_type: t.Type[exp.Expr], table_keys: t.List[str]) -> t.Callable: + def parse(self: Parser) -> t.Optional[exp.Expr]: from sqlmesh.core.model.kind import ModelKindName - expressions: t.List[exp.Expression] = [] + expressions: t.List[exp.Expr] = [] while True: prev_property = seq_get(expressions, -1) @@ -586,7 +605,7 @@ def parse(self: Parser) -> t.Optional[exp.Expression]: key = key_expression.name.lower() start = self._curr - value: t.Optional[exp.Expression | str] + value: t.Optional[exp.Expr | str] if key in table_keys: value = self._parse_table_parts() @@ -626,7 +645,7 @@ def parse(self: Parser) -> t.Optional[exp.Expression]: else: props = None - value = self.expression(ModelKind, this=kind.value, expressions=props) + value = self.expression(ModelKind(this=kind.value, expressions=props)) elif key == "expression": value = self._parse_conjunction() elif key == "partitioned_by": @@ -638,12 +657,12 @@ def parse(self: Parser) -> t.Optional[exp.Expression]: else: value = self._parse_bracket(self._parse_field(any_token=True)) - if isinstance(value, exp.Expression): + if isinstance(value, exp.Expr): value.meta["sql"] = self._find_sql(start, self._prev) - expressions.append(self.expression(exp.Property, this=key, value=value)) + expressions.append(self.expression(exp.Property(this=key, value=value))) - return self.expression(expression_type, expressions=expressions) + return self.expression(expression_type(expressions=expressions)) return parse @@ -655,7 +674,7 @@ def parse(self: Parser) -> t.Optional[exp.Expression]: } -def _props_sql(self: Generator, expressions: t.List[exp.Expression]) -> str: +def _props_sql(self: Generator, expressions: t.List[exp.Expr]) -> str: props = [] size = len(expressions) @@ -673,7 +692,7 @@ def _props_sql(self: Generator, expressions: t.List[exp.Expression]) -> str: return "\n".join(props) -def _on_virtual_update_sql(self: Generator, expressions: t.List[exp.Expression]) -> str: +def _on_virtual_update_sql(self: Generator, expressions: t.List[exp.Expr]) -> str: statements = "\n".join( self.sql(expression) if isinstance(expression, JinjaStatement) @@ -694,7 +713,7 @@ def _model_kind_sql(self: Generator, expression: ModelKind) -> str: return expression.name.upper() -def _macro_keyword_func_sql(self: Generator, expression: exp.Expression) -> str: +def _macro_keyword_func_sql(self: Generator, expression: exp.Expr) -> str: name = expression.name keyword = name.replace("_", " ") *args, clause = expression.expressions @@ -728,7 +747,7 @@ def _override(klass: t.Type[Tokenizer | Parser], func: t.Callable) -> None: def format_model_expressions( - expressions: t.List[exp.Expression], + expressions: t.List[exp.Expr], dialect: t.Optional[str] = None, rewrite_casts: bool = True, **kwargs: t.Any, @@ -749,7 +768,7 @@ def format_model_expressions( if rewrite_casts: - def cast_to_colon(node: exp.Expression) -> exp.Expression: + def cast_to_colon(node: exp.Expr) -> exp.Expr: if isinstance(node, exp.Cast) and not any( # Only convert CAST into :: if it doesn't have additional args set, otherwise this # conversion could alter the semantics (eg. changing SAFE_CAST in BigQuery to CAST) @@ -781,8 +800,8 @@ def cast_to_colon(node: exp.Expression) -> exp.Expression: def text_diff( - a: t.List[exp.Expression], - b: t.List[exp.Expression], + a: t.List[exp.Expr], + b: t.List[exp.Expr], a_dialect: t.Optional[str] = None, b_dialect: t.Optional[str] = None, ) -> str: @@ -800,8 +819,15 @@ def text_diff( return "\n".join(unified_diff(a_sql, b_sql)) +WS_OR_COMMENT = r"(?:\s|--[^\n]*\n|/\*.*?\*/)" +HEADER = r"\b(?:model|audit)\b(?=\s*\()" +KEY_BOUNDARY = r"(?:\(|,)" # key is preceded by either '(' or ',' +DIALECT_VALUE = r"['\"]?(?P[a-z][a-z0-9]*)['\"]?" +VALUE_BOUNDARY = r"(?=,|\))" # value is followed by comma or closing paren + DIALECT_PATTERN = re.compile( - r"(model|audit).*?\(.*?dialect\s+'?([a-z]*)", re.IGNORECASE | re.DOTALL + rf"{HEADER}.*?{KEY_BOUNDARY}{WS_OR_COMMENT}*dialect{WS_OR_COMMENT}+{DIALECT_VALUE}{WS_OR_COMMENT}*{VALUE_BOUNDARY}", + re.IGNORECASE | re.DOTALL, ) @@ -850,7 +876,7 @@ def _is_virtual_statement_end(tokens: t.List[Token], pos: int) -> bool: return _is_command_statement(ON_VIRTUAL_UPDATE_END, tokens, pos) -def virtual_statement(statements: t.List[exp.Expression]) -> VirtualUpdateStatement: +def virtual_statement(statements: t.List[exp.Expr]) -> VirtualUpdateStatement: return VirtualUpdateStatement(expressions=statements) @@ -864,7 +890,7 @@ class ChunkType(Enum): def parse_one( sql: str, dialect: t.Optional[str] = None, into: t.Optional[exp.IntoType] = None -) -> exp.Expression: +) -> exp.Expr: expressions = parse(sql, default_dialect=dialect, match_dialect=False, into=into) if not expressions: raise SQLMeshError(f"No expressions found in '{sql}'") @@ -878,7 +904,7 @@ def parse( default_dialect: t.Optional[str] = None, match_dialect: bool = True, into: t.Optional[exp.IntoType] = None, -) -> t.List[exp.Expression]: +) -> t.List[exp.Expr]: """Parse a sql string. Supports parsing model definition. @@ -892,7 +918,8 @@ def parse( A list of the parsed expressions: [Model, *Statements, Query, *Statements] """ match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE]) - dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect) + dialect_str = match.group("dialect") if match else None + dialect = Dialect.get_or_raise(dialect_str or default_dialect) tokens = dialect.tokenize(sql) chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)] @@ -941,10 +968,10 @@ def parse( pos += 1 parser = dialect.parser() - expressions: t.List[exp.Expression] = [] + expressions: t.List[exp.Expr] = [] - def parse_sql_chunk(chunk: t.List[Token], meta_sql: bool = True) -> t.List[exp.Expression]: - parsed_expressions: t.List[t.Optional[exp.Expression]] = ( + def parse_sql_chunk(chunk: t.List[Token], meta_sql: bool = True) -> t.List[exp.Expr]: + parsed_expressions: t.List[t.Optional[exp.Expr]] = ( parser.parse(chunk, sql) if into is None else parser.parse_into(into, chunk, sql) ) expressions = [] @@ -955,7 +982,7 @@ def parse_sql_chunk(chunk: t.List[Token], meta_sql: bool = True) -> t.List[exp.E expressions.append(expression) return expressions - def parse_jinja_chunk(chunk: t.List[Token], meta_sql: bool = True) -> exp.Expression: + def parse_jinja_chunk(chunk: t.List[Token], meta_sql: bool = True) -> exp.Expr: start, *_, end = chunk segment = sql[start.end + 2 : end.start - 1] factory = jinja_query if chunk_type == ChunkType.JINJA_QUERY else jinja_statement @@ -966,9 +993,9 @@ def parse_jinja_chunk(chunk: t.List[Token], meta_sql: bool = True) -> exp.Expres def parse_virtual_statement( chunks: t.List[t.Tuple[t.List[Token], ChunkType]], pos: int - ) -> t.Tuple[t.List[exp.Expression], int]: + ) -> t.Tuple[t.List[exp.Expr], int]: # For virtual statements we need to handle both SQL and Jinja nested blocks within the chunk - virtual_update_statements = [] + virtual_update_statements: t.List[exp.Expr] = [] start = chunks[pos][0][0].start while ( @@ -1020,7 +1047,7 @@ def extend_sqlglot() -> None: # so this ensures that the extra ones it defines are also extended if dialect == athena.Athena: tokenizers.add(athena._TrinoTokenizer) - parsers.add(athena._TrinoParser) + parsers.add(AthenaTrinoParser) generators.add(athena._TrinoGenerator) generators.add(athena._HiveGenerator) @@ -1090,6 +1117,7 @@ def extend_sqlglot() -> None: _override(Parser, _parse_value) _override(Parser, _parse_lambda) _override(Parser, _parse_types) + _override(TSQL.Parser, Parser._parse_if) _override(Parser, _parse_if) _override(Parser, _parse_id_var) _override(Parser, _warn_unsupported) @@ -1239,7 +1267,7 @@ def normalize_model_name( def find_tables( - expression: exp.Expression, default_catalog: t.Optional[str], dialect: DialectType = None + expression: exp.Expr, default_catalog: t.Optional[str], dialect: DialectType = None ) -> t.Set[str]: """Find all tables referenced in a query. @@ -1262,10 +1290,10 @@ def find_tables( return expression.meta[TABLES_META] -def add_table(node: exp.Expression, table: str) -> exp.Expression: +def add_table(node: exp.Expr, table: str) -> exp.Expr: """Add a table to all columns in an expression.""" - def _transform(node: exp.Expression) -> exp.Expression: + def _transform(node: exp.Expr) -> exp.Expr: if isinstance(node, exp.Column) and not node.table: return exp.column(node.this, table=table) if isinstance(node, exp.Identifier): @@ -1375,7 +1403,7 @@ def normalize_and_quote( quote_identifiers(query, dialect=dialect) -def interpret_expression(e: exp.Expression) -> exp.Expression | str | int | float | bool: +def interpret_expression(e: exp.Expr) -> exp.Expr | str | int | float | bool: if e.is_int: return int(e.this) if e.is_number: @@ -1387,13 +1415,13 @@ def interpret_expression(e: exp.Expression) -> exp.Expression | str | int | floa def interpret_key_value_pairs( e: exp.Tuple, -) -> t.Dict[str, exp.Expression | str | int | float | bool]: +) -> t.Dict[str, exp.Expr | str | int | float | bool]: return {i.this.name: interpret_expression(i.expression) for i in e.expressions} def extract_func_call( - v: exp.Expression, allow_tuples: bool = False -) -> t.Tuple[str, t.Dict[str, exp.Expression]]: + v: exp.Expr, allow_tuples: bool = False +) -> t.Tuple[str, t.Dict[str, exp.Expr]]: kwargs = {} if isinstance(v, exp.Anonymous): @@ -1430,7 +1458,7 @@ def extract_function_calls(func_calls: t.Any, allow_tuples: bool = False) -> t.A return [extract_func_call(i, allow_tuples=allow_tuples) for i in func_calls.expressions] if isinstance(func_calls, exp.Paren): return [extract_func_call(func_calls.this, allow_tuples=allow_tuples)] - if isinstance(func_calls, exp.Expression): + if isinstance(func_calls, exp.Expr): return [extract_func_call(func_calls, allow_tuples=allow_tuples)] if isinstance(func_calls, list): function_calls = [] @@ -1462,9 +1490,7 @@ def is_meta_expression(v: t.Any) -> bool: return isinstance(v, (Audit, Metric, Model)) -def replace_merge_table_aliases( - expression: exp.Expression, dialect: t.Optional[str] = None -) -> exp.Expression: +def replace_merge_table_aliases(expression: exp.Expr, dialect: t.Optional[str] = None) -> exp.Expr: """ Resolves references from the "source" and "target" tables (or their DBT equivalents) with the corresponding SQLMesh merge aliases (MERGE_SOURCE_ALIAS and MERGE_TARGET_ALIAS) diff --git a/sqlmesh/core/engine_adapter/_typing.py b/sqlmesh/core/engine_adapter/_typing.py index 98821bb2d4..77bcf2c015 100644 --- a/sqlmesh/core/engine_adapter/_typing.py +++ b/sqlmesh/core/engine_adapter/_typing.py @@ -30,3 +30,5 @@ ] QueryOrDF = t.Union[Query, DF] + GrantsConfig = t.Dict[str, t.List[str]] + DCL = t.TypeVar("DCL", exp.Grant, exp.Revoke) diff --git a/sqlmesh/core/engine_adapter/athena.py b/sqlmesh/core/engine_adapter/athena.py index bd84ba5276..338381549b 100644 --- a/sqlmesh/core/engine_adapter/athena.py +++ b/sqlmesh/core/engine_adapter/athena.py @@ -158,7 +158,7 @@ def _create_schema( schema_name: SchemaName, ignore_if_exists: bool, warn_on_error: bool, - properties: t.List[exp.Expression], + properties: t.List[exp.Expr], kind: str, ) -> None: if location := self._table_location(table_properties=None, table=exp.to_table(schema_name)): @@ -177,14 +177,14 @@ def _create_schema( def _build_create_table_exp( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, **kwargs: t.Any, ) -> exp.Create: exists = False if replace else exists @@ -235,18 +235,18 @@ def _build_table_properties_exp( catalog_name: t.Optional[str] = None, table_format: t.Optional[str] = None, storage_format: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, partition_interval_unit: t.Optional[IntervalUnit] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, table: t.Optional[exp.Table] = None, - expression: t.Optional[exp.Expression] = None, + expression: t.Optional[exp.Expr] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] table_properties = table_properties or {} is_hive = self._table_type(table_format) == "hive" @@ -266,7 +266,7 @@ def _build_table_properties_exp( properties.append(exp.SchemaCommentProperty(this=exp.Literal.string(table_description))) if partitioned_by: - schema_expressions: t.List[exp.Expression] = [] + schema_expressions: t.List[exp.Expr] = [] if is_hive and target_columns_to_types: # For Hive-style tables, you cannot include the partitioned by columns in the main set of columns # In the PARTITIONED BY expression, you also cant just include the column names, you need to include the data type as well @@ -381,7 +381,7 @@ def _is_hive_partitioned_table(self, table: exp.Table) -> bool: raise e def _table_location_or_raise( - self, table_properties: t.Optional[t.Dict[str, exp.Expression]], table: exp.Table + self, table_properties: t.Optional[t.Dict[str, exp.Expr]], table: exp.Table ) -> exp.LocationProperty: location = self._table_location(table_properties, table) if not location: @@ -392,7 +392,7 @@ def _table_location_or_raise( def _table_location( self, - table_properties: t.Optional[t.Dict[str, exp.Expression]], + table_properties: t.Optional[t.Dict[str, exp.Expr]], table: exp.Table, ) -> t.Optional[exp.LocationProperty]: base_uri: str @@ -402,7 +402,7 @@ def _table_location( s3_base_location_property = table_properties.pop( "s3_base_location" ) # pop because it's handled differently and we dont want it to end up in the TBLPROPERTIES clause - if isinstance(s3_base_location_property, exp.Expression): + if isinstance(s3_base_location_property, exp.Expr): base_uri = s3_base_location_property.name else: base_uri = s3_base_location_property @@ -419,7 +419,7 @@ def _table_location( return exp.LocationProperty(this=exp.Literal.string(full_uri)) def _find_matching_columns( - self, partitioned_by: t.List[exp.Expression], columns_to_types: t.Dict[str, exp.DataType] + self, partitioned_by: t.List[exp.Expr], columns_to_types: t.Dict[str, exp.DataType] ) -> t.List[t.Tuple[str, exp.DataType]]: matches = [] for col in partitioned_by: @@ -557,7 +557,7 @@ def _chunks() -> t.Iterable[t.List[t.List[str]]]: PartitionsToDelete=[{"Values": v} for v in batch], ) - def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None: + def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expr]) -> None: table = exp.to_table(table_name) table_type = self._query_table_type(table) diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index c48ce2154d..8de7b79398 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -18,7 +18,7 @@ from sqlglot import Dialect, exp from sqlglot.errors import ErrorLevel -from sqlglot.helper import ensure_list +from sqlglot.helper import ensure_list, seq_get from sqlglot.optimizer.qualify_columns import quote_identifiers from sqlmesh.core.dialect import ( @@ -63,6 +63,7 @@ from sqlmesh.core.engine_adapter._typing import ( DF, BigframeSession, + GrantsConfig, PySparkDataFrame, PySparkSession, Query, @@ -114,11 +115,13 @@ class EngineAdapter: SUPPORTS_TUPLE_IN = True HAS_VIEW_BINDING = False SUPPORTS_REPLACE_TABLE = True + SUPPORTS_GRANTS = False DEFAULT_CATALOG_TYPE = DIALECT QUOTE_IDENTIFIERS_IN_VIEWS = True MAX_IDENTIFIER_LENGTH: t.Optional[int] = None ATTACH_CORRELATION_ID = True SUPPORTS_QUERY_EXECUTION_TRACKING = False + SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS = False def __init__( self, @@ -160,6 +163,7 @@ def __init__( self.correlation_id = correlation_id self._schema_differ_overrides = schema_differ_overrides self._query_execution_tracker = query_execution_tracker + self._data_object_cache: t.Dict[str, t.Optional[DataObject]] = {} def with_settings(self, **kwargs: t.Any) -> EngineAdapter: extra_kwargs = { @@ -223,12 +227,16 @@ def schema_differ(self) -> SchemaDiffer: } ) + @property + def _catalog_type_overrides(self) -> t.Dict[str, str]: + return self._extra_config.get("catalog_type_overrides") or {} + @classmethod def _casted_columns( cls, target_columns_to_types: t.Dict[str, exp.DataType], source_columns: t.Optional[t.List[str]] = None, - ) -> t.List[exp.Alias]: + ) -> t.List[exp.Expr]: source_columns_lookup = set(source_columns or target_columns_to_types) return [ exp.alias_( @@ -430,7 +438,11 @@ def get_catalog_type(self, catalog: t.Optional[str]) -> str: raise UnsupportedCatalogOperationError( f"{self.dialect} does not support catalogs and a catalog was provided: {catalog}" ) - return self.DEFAULT_CATALOG_TYPE + return ( + self._catalog_type_overrides.get(catalog, self.DEFAULT_CATALOG_TYPE) + if catalog + else self.DEFAULT_CATALOG_TYPE + ) def get_catalog_type_from_table(self, table: TableName) -> str: """Get the catalog type from a table name if it has a catalog specified, otherwise return the current catalog type""" @@ -539,11 +551,13 @@ def replace_query( target_table, source_queries, target_columns_to_types, + **kwargs, ) return self._insert_overwrite_by_condition( target_table, source_queries, target_columns_to_types, + **kwargs, ) def create_index( @@ -577,7 +591,7 @@ def create_index( def _pop_creatable_type_from_properties( self, - properties: t.Dict[str, exp.Expression], + properties: t.Dict[str, exp.Expr], ) -> t.Optional[exp.Property]: """Pop out the creatable_type from the properties dictionary (if exists (return it/remove it) else return none). It also checks that none of the expressions are MATERIALIZE as that conflicts with the `materialize` parameter. @@ -638,9 +652,9 @@ def create_managed_table( table_name: TableName, query: Query, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, table_description: t.Optional[str] = None, column_descriptions: t.Optional[t.Dict[str, str]] = None, source_columns: t.Optional[t.List[str]] = None, @@ -797,6 +811,7 @@ def _build_schema_exp( column_descriptions: t.Optional[t.Dict[str, str]] = None, expressions: t.Optional[t.List[exp.PrimaryKey]] = None, is_view: bool = False, + materialized: bool = False, ) -> exp.Schema: """ Build a schema expression for a table, columns, column comments, and additional schema properties. @@ -809,6 +824,7 @@ def _build_schema_exp( target_columns_to_types=target_columns_to_types, column_descriptions=column_descriptions, is_view=is_view, + materialized=materialized, ) + expressions, ) @@ -818,6 +834,7 @@ def _build_column_defs( target_columns_to_types: t.Dict[str, exp.DataType], column_descriptions: t.Optional[t.Dict[str, str]] = None, is_view: bool = False, + materialized: bool = False, ) -> t.List[exp.ColumnDef]: engine_supports_schema_comments = ( self.COMMENT_CREATION_VIEW.supports_schema_def @@ -947,7 +964,7 @@ def _create_table_from_source_queries( def _create_table( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -974,11 +991,18 @@ def _create_table( ), track_rows_processed=track_rows_processed, ) + # Extract table name to clear cache + table_name = ( + table_name_or_schema.this + if isinstance(table_name_or_schema, exp.Schema) + else table_name_or_schema + ) + self._clear_data_object_cache(table_name) def _build_create_table_exp( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -1029,13 +1053,15 @@ def create_table_like( target_table_name: The name of the table to create. Can be fully qualified or just table name. source_table_name: The name of the table to base the new table on. """ - self.create_table(target_table_name, self.columns(source_table_name), exists=exists) + self._create_table_like(target_table_name, source_table_name, exists=exists, **kwargs) + self._clear_data_object_cache(target_table_name) def clone_table( self, target_table_name: TableName, source_table_name: TableName, replace: bool = False, + exists: bool = True, clone_kwargs: t.Optional[t.Dict[str, t.Any]] = None, **kwargs: t.Any, ) -> None: @@ -1045,6 +1071,7 @@ def clone_table( target_table_name: The name of the table that should be created. source_table_name: The name of the source table that should be cloned. replace: Whether or not to replace an existing table. + exists: Indicates whether to include the IF NOT EXISTS check. """ if not self.SUPPORTS_CLONING: raise NotImplementedError(f"Engine does not support cloning: {type(self)}") @@ -1055,6 +1082,7 @@ def clone_table( this=exp.to_table(target_table_name), kind="TABLE", replace=replace, + exists=exists, clone=exp.Clone( this=exp.to_table(source_table_name), **(clone_kwargs or {}), @@ -1062,6 +1090,7 @@ def clone_table( **kwargs, ) ) + self._clear_data_object_cache(target_table_name) def drop_data_object(self, data_object: DataObject, ignore_if_not_exists: bool = True) -> None: """Drops a data object of arbitrary type. @@ -1127,6 +1156,7 @@ def _drop_object( drop_args["cascade"] = cascade self.execute(exp.Drop(this=exp.to_table(name), kind=kind, exists=exists, **drop_args)) + self._clear_data_object_cache(name) def get_alter_operations( self, @@ -1173,7 +1203,7 @@ def create_view( materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, table_description: t.Optional[str] = None, column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, source_columns: t.Optional[t.List[str]] = None, **create_kwargs: t.Any, ) -> None: @@ -1233,7 +1263,11 @@ def create_view( schema: t.Union[exp.Table, exp.Schema] = exp.to_table(view_name) if target_columns_to_types: schema = self._build_schema_exp( - exp.to_table(view_name), target_columns_to_types, column_descriptions, is_view=True + exp.to_table(view_name), + target_columns_to_types, + column_descriptions, + is_view=True, + materialized=materialized, ) properties = create_kwargs.pop("properties", None) @@ -1317,6 +1351,8 @@ def create_view( quote_identifiers=self.QUOTE_IDENTIFIERS_IN_VIEWS, ) + self._clear_data_object_cache(view_name) + # Register table comment with commands if the engine doesn't support doing it in CREATE if ( table_description @@ -1346,7 +1382,7 @@ def create_schema( schema_name: SchemaName, ignore_if_exists: bool = True, warn_on_error: bool = True, - properties: t.Optional[t.List[exp.Expression]] = None, + properties: t.Optional[t.List[exp.Expr]] = None, ) -> None: properties = properties or [] return self._create_schema( @@ -1362,7 +1398,7 @@ def _create_schema( schema_name: SchemaName, ignore_if_exists: bool, warn_on_error: bool, - properties: t.List[exp.Expression], + properties: t.List[exp.Expr], kind: str, ) -> None: """Create a schema from a name or qualified table name.""" @@ -1387,7 +1423,7 @@ def drop_schema( schema_name: SchemaName, ignore_if_not_exists: bool = True, cascade: bool = False, - **drop_args: t.Dict[str, exp.Expression], + **drop_args: t.Dict[str, exp.Expr], ) -> None: return self._drop_object( name=schema_name, @@ -1446,13 +1482,19 @@ def columns( } def table_exists(self, table_name: TableName) -> bool: + table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None + try: - self.execute(exp.Describe(this=exp.to_table(table_name), kind="TABLE")) + self.execute(exp.Describe(this=table, kind="TABLE")) return True except Exception: return False - def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None: + def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expr]) -> None: self.execute(exp.delete(table_name, where)) def insert_append( @@ -1510,7 +1552,7 @@ def insert_overwrite_by_partition( self, table_name: TableName, query_or_df: QueryOrDF, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, source_columns: t.Optional[t.List[str]] = None, ) -> None: @@ -1541,10 +1583,8 @@ def insert_overwrite_by_time_partition( query_or_df: QueryOrDF, start: TimeLike, end: TimeLike, - time_formatter: t.Callable[ - [TimeLike, t.Optional[t.Dict[str, exp.DataType]]], exp.Expression - ], - time_column: TimeColumn | exp.Expression | str, + time_formatter: t.Callable[[TimeLike, t.Optional[t.Dict[str, exp.DataType]]], exp.Expr], + time_column: TimeColumn | exp.Expr | str, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, source_columns: t.Optional[t.List[str]] = None, **kwargs: t.Any, @@ -1581,7 +1621,7 @@ def _insert_overwrite_by_time_partition( **kwargs: t.Any, ) -> None: return self._insert_overwrite_by_condition( - table_name, source_queries, target_columns_to_types, where + table_name, source_queries, target_columns_to_types, where, **kwargs ) def _values_to_sql( @@ -1633,6 +1673,30 @@ def _insert_overwrite_by_condition( target_columns_to_types=target_columns_to_types, order_projections=False, ) + elif insert_overwrite_strategy.is_merge: + columns = [exp.column(col) for col in target_columns_to_types] + when_not_matched_by_source = exp.When( + matched=False, + source=True, + condition=where, + then=exp.Delete(), + ) + when_not_matched_by_target = exp.When( + matched=False, + source=False, + then=exp.Insert( + this=exp.Tuple(expressions=columns), + expression=exp.Tuple(expressions=columns), + ), + ) + self._merge( + target_table=table_name, + query=query, + on=exp.false(), + whens=exp.Whens( + expressions=[when_not_matched_by_source, when_not_matched_by_target] + ), + ) else: insert_exp = exp.insert( query, @@ -1660,7 +1724,7 @@ def _merge( self, target_table: TableName, query: Query, - on: exp.Expression, + on: exp.Expr, whens: exp.Whens, ) -> None: this = exp.alias_(exp.to_table(target_table), alias=MERGE_TARGET_ALIAS, table=True) @@ -1675,7 +1739,7 @@ def scd_type_2_by_time( self, target_table: TableName, source_table: QueryOrDF, - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], valid_from_col: exp.Column, valid_to_col: exp.Column, execution_time: t.Union[TimeLike, exp.Column], @@ -1711,11 +1775,11 @@ def scd_type_2_by_column( self, target_table: TableName, source_table: QueryOrDF, - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], valid_from_col: exp.Column, valid_to_col: exp.Column, execution_time: t.Union[TimeLike, exp.Column], - check_columns: t.Union[exp.Star, t.Sequence[exp.Column]], + check_columns: t.Union[exp.Star, t.Sequence[exp.Expr]], invalidate_hard_deletes: bool = True, execution_time_as_valid_from: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -1747,13 +1811,13 @@ def _scd_type_2( self, target_table: TableName, source_table: QueryOrDF, - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], valid_from_col: exp.Column, valid_to_col: exp.Column, execution_time: t.Union[TimeLike, exp.Column], invalidate_hard_deletes: bool = True, updated_at_col: t.Optional[exp.Column] = None, - check_columns: t.Optional[t.Union[exp.Star, t.Sequence[exp.Column]]] = None, + check_columns: t.Optional[t.Union[exp.Star, t.Sequence[exp.Expr]]] = None, updated_at_as_valid_from: bool = False, execution_time_as_valid_from: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -1828,8 +1892,10 @@ def remove_managed_columns( # they are equal or not, the extra check is not a problem and we gain simplified logic here. # If we want to change this, then we just need to check the expressions in unique_key and pull out the # column names and then remove them from the unmanaged_columns - if check_columns and check_columns == exp.Star(): - check_columns = [exp.column(col) for col in unmanaged_columns_to_types] + if check_columns: + # Handle both Star directly and [Star()] (which can happen during serialization/deserialization) + if isinstance(seq_get(ensure_list(check_columns), 0), exp.Star): + check_columns = [exp.column(col) for col in unmanaged_columns_to_types] execution_ts = ( exp.cast(execution_time, time_data_type, dialect=self.dialect) if isinstance(execution_time, exp.Column) @@ -1840,7 +1906,7 @@ def remove_managed_columns( raise SQLMeshError( "Cannot use `updated_at_as_valid_from` without `updated_at_name` for SCD Type 2" ) - update_valid_from_start: t.Union[str, exp.Expression] = updated_at_col + update_valid_from_start: t.Union[str, exp.Expr] = updated_at_col # If using check_columns and the user doesn't always want execution_time for valid from # then we only use epoch 0 if we are truncating the table and loading rows for the first time. # All future new rows should have execution time. @@ -1866,7 +1932,8 @@ def remove_managed_columns( col_qualified.set("table", exp.to_identifier("joined")) t_col = col_qualified.copy() - t_col.this.set("this", f"t_{col.name}") + for column in t_col.find_all(exp.Column): + column.this.set("this", f"t_{column.name}") row_check_conditions.extend( [ @@ -2138,9 +2205,9 @@ def merge( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], when_matched: t.Optional[exp.Whens] = None, - merge_filter: t.Optional[exp.Expression] = None, + merge_filter: t.Optional[exp.Expr] = None, source_columns: t.Optional[t.List[str]] = None, **kwargs: t.Any, ) -> None: @@ -2217,24 +2284,34 @@ def rename_table( "Tried to rename table across catalogs which is not supported" ) self._rename_table(old_table_name, new_table_name) + self._clear_data_object_cache(old_table_name) + self._clear_data_object_cache(new_table_name) - def get_data_object(self, target_name: TableName) -> t.Optional[DataObject]: + def get_data_object( + self, target_name: TableName, safe_to_cache: bool = False + ) -> t.Optional[DataObject]: target_table = exp.to_table(target_name) existing_data_objects = self.get_data_objects( - schema_(target_table.db, target_table.catalog), {target_table.name} + schema_(target_table.db, target_table.catalog), + {target_table.name}, + safe_to_cache=safe_to_cache, ) if existing_data_objects: return existing_data_objects[0] return None def get_data_objects( - self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None + self, + schema_name: SchemaName, + object_names: t.Optional[t.Set[str]] = None, + safe_to_cache: bool = False, ) -> t.List[DataObject]: """Lists all data objects in the target schema. Args: schema_name: The name of the schema to list data objects from. object_names: If provided, only return data objects with these names. + safe_to_cache: Whether it is safe to cache the results of this call. Returns: A list of data objects in the target schema. @@ -2242,19 +2319,68 @@ def get_data_objects( if object_names is not None: if not object_names: return [] - object_names_list = list(object_names) - batches = [ - object_names_list[i : i + self.DATA_OBJECT_FILTER_BATCH_SIZE] - for i in range(0, len(object_names_list), self.DATA_OBJECT_FILTER_BATCH_SIZE) - ] - return [ - obj for batch in batches for obj in self._get_data_objects(schema_name, set(batch)) - ] - return self._get_data_objects(schema_name) + + # Check cache for each object name + target_schema = to_schema(schema_name) + cached_objects = [] + missing_names = set() + + for name in object_names: + cache_key = _get_data_object_cache_key( + target_schema.catalog, target_schema.db, name + ) + if cache_key in self._data_object_cache: + logger.debug("Data object cache hit: %s", cache_key) + data_object = self._data_object_cache[cache_key] + # If the object is none, then the table was previously looked for but not found + if data_object: + cached_objects.append(data_object) + else: + logger.debug("Data object cache miss: %s", cache_key) + missing_names.add(name) + + # Fetch missing objects from database + if missing_names: + object_names_list = list(missing_names) + batches = [ + object_names_list[i : i + self.DATA_OBJECT_FILTER_BATCH_SIZE] + for i in range(0, len(object_names_list), self.DATA_OBJECT_FILTER_BATCH_SIZE) + ] + + fetched_objects = [] + fetched_object_names = set() + for batch in batches: + objects = self._get_data_objects(schema_name, set(batch)) + for obj in objects: + if safe_to_cache: + cache_key = _get_data_object_cache_key( + obj.catalog, obj.schema_name, obj.name + ) + self._data_object_cache[cache_key] = obj + fetched_objects.append(obj) + fetched_object_names.add(obj.name) + + if safe_to_cache: + for missing_name in missing_names - fetched_object_names: + cache_key = _get_data_object_cache_key( + target_schema.catalog, target_schema.db, missing_name + ) + self._data_object_cache[cache_key] = None + + return cached_objects + fetched_objects + + return cached_objects + + fetched_objects = self._get_data_objects(schema_name) + if safe_to_cache: + for obj in fetched_objects: + cache_key = _get_data_object_cache_key(obj.catalog, obj.schema_name, obj.name) + self._data_object_cache[cache_key] = obj + return fetched_objects def fetchone( self, - query: t.Union[exp.Expression, str], + query: t.Union[exp.Expr, str], ignore_unsupported_errors: bool = False, quote_identifiers: bool = False, ) -> t.Optional[t.Tuple]: @@ -2268,7 +2394,7 @@ def fetchone( def fetchall( self, - query: t.Union[exp.Expression, str], + query: t.Union[exp.Expr, str], ignore_unsupported_errors: bool = False, quote_identifiers: bool = False, ) -> t.List[t.Tuple]: @@ -2281,7 +2407,7 @@ def fetchall( return self.cursor.fetchall() def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> DF: """Fetches a DataFrame that can be either Pandas or PySpark from the cursor""" with self.transaction(): @@ -2304,7 +2430,7 @@ def _native_df_to_pandas_df( raise NotImplementedError(f"Unable to convert {type(query_or_df)} to Pandas") def fetchdf( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> pd.DataFrame: """Fetches a Pandas DataFrame from the cursor""" import pandas as pd @@ -2317,11 +2443,16 @@ def fetchdf( return df def fetch_pyspark_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> PySparkDataFrame: """Fetches a PySpark DataFrame from the cursor""" raise NotImplementedError(f"Engine does not support PySpark DataFrames: {type(self)}") + @property + def wap_enabled(self) -> bool: + """Returns whether WAP is enabled for this engine.""" + return self._extra_config.get("wap_enabled", False) + def wap_supported(self, table_name: TableName) -> bool: """Returns whether WAP for the target table is supported.""" return False @@ -2359,6 +2490,33 @@ def wap_publish(self, table_name: TableName, wap_id: str) -> None: """ raise NotImplementedError(f"Engine does not support WAP: {type(self)}") + def sync_grants_config( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> None: + """Applies the grants_config to a table authoritatively. + It first compares the specified grants against the current grants, and then + applies the diffs to the table by revoking and granting privileges as needed. + + Args: + table: The table/view to apply grants to. + grants_config: Dictionary mapping privileges to lists of grantees. + table_type: The type of database object (TABLE, VIEW, MATERIALIZED_VIEW). + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + + current_grants = self._get_current_grants_config(table) + new_grants, revoked_grants = self._diff_grants_configs(grants_config, current_grants) + revoke_exprs = self._revoke_grants_config_expr(table, revoked_grants, table_type) + grant_exprs = self._apply_grants_config_expr(table, new_grants, table_type) + dcl_exprs = revoke_exprs + grant_exprs + + if dcl_exprs: + self.execute(dcl_exprs) + @contextlib.contextmanager def transaction( self, @@ -2415,7 +2573,7 @@ def _is_session_active(self) -> bool: def execute( self, - expressions: t.Union[str, exp.Expression, t.Sequence[exp.Expression]], + expressions: t.Union[str, exp.Expr, t.Sequence[exp.Expr]], ignore_unsupported_errors: bool = False, quote_identifiers: bool = True, track_rows_processed: bool = False, @@ -2427,7 +2585,7 @@ def execute( ) with self.transaction(): for e in ensure_list(expressions): - if isinstance(e, exp.Expression): + if isinstance(e, exp.Expr): self._check_identifier_length(e) sql = self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs) else: @@ -2437,7 +2595,7 @@ def execute( self._log_sql( sql, - expression=e if isinstance(e, exp.Expression) else None, + expression=e if isinstance(e, exp.Expr) else None, quote_identifiers=quote_identifiers, ) self._execute(sql, track_rows_processed, **kwargs) @@ -2450,7 +2608,7 @@ def _attach_correlation_id(self, sql: str) -> str: def _log_sql( self, sql: str, - expression: t.Optional[exp.Expression] = None, + expression: t.Optional[exp.Expr] = None, quote_identifiers: bool = True, ) -> None: if not logger.isEnabledFor(self._execute_log_level): @@ -2542,7 +2700,7 @@ def temp_table( self.drop_table(table) def _table_or_view_properties_to_expressions( - self, table_or_view_properties: t.Optional[t.Dict[str, exp.Expression]] = None + self, table_or_view_properties: t.Optional[t.Dict[str, exp.Expr]] = None ) -> t.List[exp.Property]: """Converts model properties (either physical or virtual) to a list of property expressions.""" if not table_or_view_properties: @@ -2554,7 +2712,7 @@ def _table_or_view_properties_to_expressions( def _build_partitioned_by_exp( self, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], *, partition_interval_unit: t.Optional[IntervalUnit] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -2565,7 +2723,7 @@ def _build_partitioned_by_exp( def _build_clustered_by_exp( self, - clustered_by: t.List[exp.Expression], + clustered_by: t.List[exp.Expr], **kwargs: t.Any, ) -> t.Optional[exp.Cluster]: return None @@ -2575,17 +2733,17 @@ def _build_table_properties_exp( catalog_name: t.Optional[str] = None, table_format: t.Optional[str] = None, storage_format: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, partition_interval_unit: t.Optional[IntervalUnit] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: """Creates a SQLGlot table properties expression for ddl.""" - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] if table_description: properties.append( @@ -2604,12 +2762,12 @@ def _build_table_properties_exp( def _build_view_properties_exp( self, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, table_description: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: """Creates a SQLGlot table properties expression for view""" - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] if table_description: properties.append( @@ -2631,7 +2789,7 @@ def _truncate_table_comment(self, comment: str) -> str: def _truncate_column_comment(self, comment: str) -> str: return self._truncate_comment(comment, self.MAX_COLUMN_COMMENT_LENGTH) - def _to_sql(self, expression: exp.Expression, quote: bool = True, **kwargs: t.Any) -> str: + def _to_sql(self, expression: exp.Expr, quote: bool = True, **kwargs: t.Any) -> str: """ Converts an expression to a SQL string. Has a set of default kwargs to apply, and then default kwargs defined for the given dialect, and then kwargs provided by the user when defining the engine @@ -2652,6 +2810,17 @@ def _to_sql(self, expression: exp.Expression, quote: bool = True, **kwargs: t.An return expression.sql(**sql_gen_kwargs, copy=False) # type: ignore + def _clear_data_object_cache(self, table_name: t.Optional[TableName] = None) -> None: + """Clears the cache entry for the given table name, or clears the entire cache if table_name is None.""" + if table_name is None: + logger.debug("Clearing entire data object cache") + self._data_object_cache.clear() + else: + table = exp.to_table(table_name) + cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + logger.debug("Clearing data object cache key: %s", cache_key) + self._data_object_cache.pop(cache_key, None) + def _get_data_objects( self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None ) -> t.List[DataObject]: @@ -2681,7 +2850,7 @@ def _order_projections_and_filter( self, query: Query, target_columns_to_types: t.Dict[str, exp.DataType], - where: t.Optional[exp.Expression] = None, + where: t.Optional[exp.Expr] = None, coerce_types: bool = False, ) -> Query: if not isinstance(query, exp.Query) or ( @@ -2690,9 +2859,9 @@ def _order_projections_and_filter( return query query = t.cast(exp.Query, query.copy()) - with_ = query.args.pop("with", None) + with_ = query.args.pop("with_", None) - select_exprs: t.List[exp.Expression] = [ + select_exprs: t.List[exp.Expr] = [ exp.column(c, quoted=True) for c in target_columns_to_types ] if coerce_types and columns_to_types_all_known(target_columns_to_types): @@ -2706,7 +2875,7 @@ def _order_projections_and_filter( query = query.where(where, copy=False) if with_: - query.set("with", with_) + query.set("with_", with_) return query @@ -2743,7 +2912,7 @@ def _replace_by_key( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - key: t.Sequence[exp.Expression], + key: t.Sequence[exp.Expr], is_unique_key: bool, source_columns: t.Optional[t.List[str]] = None, ) -> None: @@ -2837,6 +3006,15 @@ def _create_column_comments( exc_info=True, ) + def _create_table_like( + self, + target_table_name: TableName, + source_table_name: TableName, + exists: bool, + **kwargs: t.Any, + ) -> None: + self.create_table(target_table_name, self.columns(source_table_name), exists=exists) + def _rename_table( self, old_table_name: TableName, @@ -2875,7 +3053,7 @@ def _select_columns( ) ) - def _check_identifier_length(self, expression: exp.Expression) -> None: + def _check_identifier_length(self, expression: exp.Expr) -> None: if self.MAX_IDENTIFIER_LENGTH is None or not isinstance(expression, exp.DDL): return @@ -2887,6 +3065,127 @@ def _check_identifier_length(self, expression: exp.Expression) -> None: f"Identifier name '{name}' (length {name_length}) exceeds {self.dialect.capitalize()}'s max identifier limit of {self.MAX_IDENTIFIER_LENGTH} characters" ) + def get_table_last_modified_ts(self, table_names: t.List[TableName]) -> t.List[int]: + raise NotImplementedError() + + @classmethod + def _diff_grants_configs( + cls, new_config: GrantsConfig, old_config: GrantsConfig + ) -> t.Tuple[GrantsConfig, GrantsConfig]: + """Compute additions and removals between two grants configurations. + + This method compares new (desired) and old (current) GrantsConfigs case-insensitively + for both privilege keys and grantees, while preserving original casing + in the output GrantsConfigs. + + Args: + new_config: Desired grants configuration (specified by the user). + old_config: Current grants configuration (returned by the database). + + Returns: + A tuple of (additions, removals) GrantsConfig where: + - additions contains privileges/grantees present in new_config but not in old_config + - additions uses keys and grantee strings from new_config (user-specified casing) + - removals contains privileges/grantees present in old_config but not in new_config + - removals uses keys and grantee strings from old_config (database-returned casing) + + Notes: + - Comparison is case-insensitive using casefold(); original casing is preserved in results. + - Overlapping grantees (case-insensitive) are excluded from the results. + """ + + def _diffs(config1: GrantsConfig, config2: GrantsConfig) -> GrantsConfig: + diffs: GrantsConfig = {} + cf_config2 = {k.casefold(): {g.casefold() for g in v} for k, v in config2.items()} + for key, grantees in config1.items(): + cf_key = key.casefold() + + # Missing key (add all grantees) + if cf_key not in cf_config2: + diffs[key] = grantees.copy() + continue + + # Include only grantees not in config2 + cf_grantees2 = cf_config2[cf_key] + diff_grantees = [] + for grantee in grantees: + if grantee.casefold() not in cf_grantees2: + diff_grantees.append(grantee) + if diff_grantees: + diffs[key] = diff_grantees + return diffs + + return _diffs(new_config, old_config), _diffs(old_config, new_config) + + def _get_current_grants_config(self, table: exp.Table) -> GrantsConfig: + """Returns current grants for a table as a dictionary. + + This method queries the database and returns the current grants/permissions + for the given table, parsed into a dictionary format. The it handles + case-insensitive comparison between these current grants and the desired + grants from model configuration. + + Args: + table: The table/view to query grants for. + + Returns: + Dictionary mapping permissions to lists of grantees. Permission names + should be returned as the database provides them (typically uppercase + for standard SQL permissions, but engine-specific roles may vary). + + Raises: + NotImplementedError: If the engine does not support grants. + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + raise NotImplementedError("Subclass must implement get_current_grants") + + def _apply_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expr]: + """Returns SQLGlot Grant expressions to apply grants to a table. + + Args: + table: The table/view to grant permissions on. + grants_config: Dictionary mapping permissions to lists of grantees. + table_type: The type of database object (TABLE, VIEW, MATERIALIZED_VIEW). + + Returns: + List of SQLGlot expressions for grant operations. + + Raises: + NotImplementedError: If the engine does not support grants. + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + raise NotImplementedError("Subclass must implement _apply_grants_config_expr") + + def _revoke_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expr]: + """Returns SQLGlot expressions to revoke grants from a table. + + Args: + table: The table/view to revoke permissions from. + grants_config: Dictionary mapping permissions to lists of grantees. + table_type: The type of database object (TABLE, VIEW, MATERIALIZED_VIEW). + + Returns: + List of SQLGlot expressions for revoke operations. + + Raises: + NotImplementedError: If the engine does not support grants. + """ + if not self.SUPPORTS_GRANTS: + raise NotImplementedError(f"Engine does not support grants: {type(self)}") + raise NotImplementedError("Subclass must implement _revoke_grants_config_expr") + class EngineAdapterWithIndexSupport(EngineAdapter): SUPPORTS_INDEXES = True @@ -2896,3 +3195,9 @@ def _decoded_str(value: t.Union[str, bytes]) -> str: if isinstance(value, bytes): return value.decode("utf-8") return value + + +def _get_data_object_cache_key(catalog: t.Optional[str], schema_name: str, object_name: str) -> str: + """Returns a cache key for a data object based on its fully qualified name.""" + catalog = f"{catalog}." if catalog else "" + return f"{catalog}{schema_name}.{object_name}" diff --git a/sqlmesh/core/engine_adapter/base_postgres.py b/sqlmesh/core/engine_adapter/base_postgres.py index c6ba7d6d62..e2347b1263 100644 --- a/sqlmesh/core/engine_adapter/base_postgres.py +++ b/sqlmesh/core/engine_adapter/base_postgres.py @@ -1,11 +1,12 @@ from __future__ import annotations import typing as t +import logging from sqlglot import exp from sqlmesh.core.dialect import to_schema -from sqlmesh.core.engine_adapter import EngineAdapter +from sqlmesh.core.engine_adapter.base import EngineAdapter, _get_data_object_cache_key from sqlmesh.core.engine_adapter.shared import ( CatalogSupport, CommentCreationTable, @@ -20,6 +21,9 @@ from sqlmesh.core.engine_adapter._typing import QueryOrDF +logger = logging.getLogger(__name__) + + class BasePostgresEngineAdapter(EngineAdapter): DEFAULT_BATCH_SIZE = 400 COMMENT_CREATION_TABLE = CommentCreationTable.COMMENT_COMMAND_ONLY @@ -58,6 +62,7 @@ def columns( raise SQLMeshError( f"Could not get columns for table '{table.sql(dialect=self.dialect)}'. Table not found." ) + return { column_name: exp.DataType.build(data_type, dialect=self.dialect, udt=True) for column_name, data_type in resp @@ -75,6 +80,10 @@ def table_exists(self, table_name: TableName) -> bool: Reference: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/cursor.py#L528-L553 """ table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None sql = ( exp.select("1") @@ -101,7 +110,7 @@ def create_view( materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, table_description: t.Optional[str] = None, column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, source_columns: t.Optional[t.List[str]] = None, **create_kwargs: t.Any, ) -> None: @@ -188,3 +197,10 @@ def _get_data_objects( ) for row in df.itertuples() ] + + def _get_current_schema(self) -> str: + """Returns the current default schema for the connection.""" + result = self.fetchone(exp.select(exp.func("current_schema"))) + if result and result[0]: + return result[0] + return "public" diff --git a/sqlmesh/core/engine_adapter/bigquery.py b/sqlmesh/core/engine_adapter/bigquery.py index b3d02d8bbf..d136445114 100644 --- a/sqlmesh/core/engine_adapter/bigquery.py +++ b/sqlmesh/core/engine_adapter/bigquery.py @@ -8,9 +8,10 @@ from sqlglot.transforms import remove_precision_parameterized_types from sqlmesh.core.dialect import to_schema +from sqlmesh.core.engine_adapter.base import _get_data_object_cache_key from sqlmesh.core.engine_adapter.mixins import ( - InsertOverwriteWithMergeMixin, ClusteredByMixin, + GrantsFromInfoSchemaMixin, RowDiffMixin, TableAlterClusterByOperation, ) @@ -20,6 +21,7 @@ DataObjectType, SourceQuery, set_catalog, + InsertOverwriteStrategy, ) from sqlmesh.core.node import IntervalUnit from sqlmesh.core.schema_diff import TableAlterOperation, NestedSupport @@ -39,7 +41,7 @@ from google.cloud.bigquery.table import Table as BigQueryTable from sqlmesh.core._typing import SchemaName, SessionProperties, TableName - from sqlmesh.core.engine_adapter._typing import BigframeSession, DF, Query + from sqlmesh.core.engine_adapter._typing import BigframeSession, DCL, DF, GrantsConfig, Query from sqlmesh.core.engine_adapter.base import QueryOrDF @@ -54,7 +56,7 @@ @set_catalog() -class BigQueryEngineAdapter(InsertOverwriteWithMergeMixin, ClusteredByMixin, RowDiffMixin): +class BigQueryEngineAdapter(ClusteredByMixin, RowDiffMixin, GrantsFromInfoSchemaMixin): """ BigQuery Engine Adapter using the `google-cloud-bigquery` library's DB API. """ @@ -64,10 +66,16 @@ class BigQueryEngineAdapter(InsertOverwriteWithMergeMixin, ClusteredByMixin, Row SUPPORTS_TRANSACTIONS = False SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_CLONING = True + SUPPORTS_GRANTS = True + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expr = exp.func("session_user") + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = True + USE_CATALOG_IN_GRANTS = True + GRANT_INFORMATION_SCHEMA_TABLE_NAME = "OBJECT_PRIVILEGES" MAX_TABLE_COMMENT_LENGTH = 1024 MAX_COLUMN_COMMENT_LENGTH = 1024 SUPPORTS_QUERY_EXECUTION_TRACKING = True SUPPORTED_DROP_CASCADE_OBJECT_KINDS = ["SCHEMA"] + INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.MERGE SCHEMA_DIFFER_KWARGS = { "compatible_types": { @@ -132,8 +140,10 @@ def _job_params(self) -> t.Dict[str, t.Any]: "priority", BigQueryPriority.INTERACTIVE.bigquery_constant ), } - if self._extra_config.get("maximum_bytes_billed"): + if self._extra_config.get("maximum_bytes_billed") is not None: params["maximum_bytes_billed"] = self._extra_config.get("maximum_bytes_billed") + if self._extra_config.get("reservation") is not None: + params["reservation"] = self._extra_config.get("reservation") if self.correlation_id: # BigQuery label keys must be lowercase key = self.correlation_id.job_type.value.lower() @@ -168,17 +178,18 @@ def _df_to_source_queries( ) def query_factory() -> Query: - if bigframes_pd and isinstance(df, bigframes_pd.DataFrame): - df.to_gbq( + ordered_df = df[list(source_columns_to_types)] + if bigframes_pd and isinstance(ordered_df, bigframes_pd.DataFrame): + ordered_df.to_gbq( f"{temp_bq_table.project}.{temp_bq_table.dataset_id}.{temp_bq_table.table_id}", if_exists="replace", ) elif not self.table_exists(temp_table): # Make mypy happy - assert isinstance(df, pd.DataFrame) + assert isinstance(ordered_df, pd.DataFrame) self._db_call(self.client.create_table, table=temp_bq_table, exists_ok=False) result = self.__load_pandas_to_table( - temp_bq_table, df, source_columns_to_types, replace=False + temp_bq_table, ordered_df, source_columns_to_types, replace=False ) if result.errors: raise SQLMeshError(result.errors) @@ -279,7 +290,7 @@ def create_schema( schema_name: SchemaName, ignore_if_exists: bool = True, warn_on_error: bool = True, - properties: t.List[exp.Expression] = [], + properties: t.List[exp.Expr] = [], ) -> None: """Create a schema from a name or qualified table name.""" from google.api_core.exceptions import Conflict @@ -424,7 +435,7 @@ def alter_table( def fetchone( self, - query: t.Union[exp.Expression, str], + query: t.Union[exp.Expr, str], ignore_unsupported_errors: bool = False, quote_identifiers: bool = False, ) -> t.Optional[t.Tuple]: @@ -444,7 +455,7 @@ def fetchone( def fetchall( self, - query: t.Union[exp.Expression, str], + query: t.Union[exp.Expr, str], ignore_unsupported_errors: bool = False, quote_identifiers: bool = False, ) -> t.List[t.Tuple]: @@ -680,7 +691,7 @@ def insert_overwrite_by_partition( self, table_name: TableName, query_or_df: QueryOrDF, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, source_columns: t.Optional[t.List[str]] = None, ) -> None: @@ -742,6 +753,12 @@ def insert_overwrite_by_partition( ) def table_exists(self, table_name: TableName) -> bool: + table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None + try: from google.cloud.exceptions import NotFound except ModuleNotFoundError: @@ -753,6 +770,28 @@ def table_exists(self, table_name: TableName) -> bool: except NotFound: return False + def get_table_last_modified_ts(self, table_names: t.List[TableName]) -> t.List[int]: + from sqlmesh.utils.date import to_timestamp + + datasets_to_tables: t.DefaultDict[str, t.List[str]] = defaultdict(list) + for table_name in table_names: + table = exp.to_table(table_name) + datasets_to_tables[table.db].append(table.name) + + results = [] + + for dataset, tables in datasets_to_tables.items(): + query = ( + f"SELECT TIMESTAMP_MILLIS(last_modified_time) FROM `{dataset}.__TABLES__` WHERE " + ) + for i, table_name in enumerate(tables): + query += f"TABLE_ID = '{table_name}'" + if i < len(tables) - 1: + query += " OR " + results.extend(self.fetchall(query)) + + return [to_timestamp(row[0]) for row in results] + def _get_table(self, table_name: TableName) -> BigQueryTable: """ Returns a BigQueryTable object for the given table name. @@ -766,7 +805,7 @@ def _table_name(self, table_name: TableName) -> str: return ".".join(part.name for part in exp.to_table(table_name).parts) def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> DF: self.execute(query, quote_identifiers=quote_identifiers) query_job = self._query_job @@ -826,7 +865,7 @@ def _build_description_property_exp( def _build_partitioned_by_exp( self, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], *, partition_interval_unit: t.Optional[IntervalUnit] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -872,16 +911,16 @@ def _build_table_properties_exp( catalog_name: t.Optional[str] = None, table_format: t.Optional[str] = None, storage_format: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, partition_interval_unit: t.Optional[IntervalUnit] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] if partitioned_by and ( partitioned_by_prop := self._build_partitioned_by_exp( @@ -988,12 +1027,12 @@ def _build_col_comment_exp( def _build_view_properties_exp( self, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, table_description: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: """Creates a SQLGlot table properties expression for view""" - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] if table_description: properties.append( @@ -1069,7 +1108,9 @@ def _execute( else [] ) + # Create job config job_config = QueryJobConfig(**self._job_params, connection_properties=connection_properties) + self._query_job = self._db_call( self.client.query, query=sql, @@ -1220,10 +1261,10 @@ def _update_clustering_key(self, operation: TableAlterClusterByOperation) -> Non ) ) - def _normalize_decimal_value(self, col: exp.Expression, precision: int) -> exp.Expression: + def _normalize_decimal_value(self, col: exp.Expr, precision: int) -> exp.Expr: return exp.func("FORMAT", exp.Literal.string(f"%.{precision}f"), col) - def _normalize_nested_value(self, col: exp.Expression) -> exp.Expression: + def _normalize_nested_value(self, col: exp.Expr) -> exp.Expr: return exp.func("TO_JSON_STRING", col, dialect=self.dialect) @t.overload @@ -1295,6 +1336,108 @@ def _session_id(self) -> t.Any: def _session_id(self, value: t.Any) -> None: self._connection_pool.set_attribute("session_id", value) + def _get_current_schema(self) -> str: + raise NotImplementedError("BigQuery does not support current schema") + + def _get_bq_dataset_location(self, project: str, dataset: str) -> str: + return self._db_call(self.client.get_dataset, dataset_ref=f"{project}.{dataset}").location + + def _get_grant_expression(self, table: exp.Table) -> exp.Expr: + if not table.db: + raise ValueError( + f"Table {table.sql(dialect=self.dialect)} does not have a schema (dataset)" + ) + project = table.catalog or self.get_current_catalog() + if not project: + raise ValueError( + f"Table {table.sql(dialect=self.dialect)} does not have a catalog (project)" + ) + + dataset = table.db + table_name = table.name + location = self._get_bq_dataset_location(project, dataset) + + # https://cloud.google.com/bigquery/docs/information-schema-object-privileges + # OBJECT_PRIVILEGES is a project-level INFORMATION_SCHEMA view with regional qualifier + object_privileges_table = exp.to_table( + f"`{project}`.`region-{location}`.INFORMATION_SCHEMA.{self.GRANT_INFORMATION_SCHEMA_TABLE_NAME}", + dialect=self.dialect, + ) + return ( + exp.select("privilege_type", "grantee") + .from_(object_privileges_table) + .where( + exp.and_( + exp.column("object_schema").eq(exp.Literal.string(dataset)), + exp.column("object_name").eq(exp.Literal.string(table_name)), + # Filter out current_user + # BigQuery grantees format: "user:email" or "group:name" + exp.func("split", exp.column("grantee"), exp.Literal.string(":"))[ + exp.func("OFFSET", exp.Literal.number("1")) + ].neq(self.CURRENT_USER_OR_ROLE_EXPRESSION), + ) + ) + ) + + @staticmethod + def _grant_object_kind(table_type: DataObjectType) -> str: + if table_type == DataObjectType.VIEW: + return "VIEW" + if table_type == DataObjectType.MATERIALIZED_VIEW: + # We actually need to use "MATERIALIZED VIEW" here even though it's not listed + # as a supported resource_type in the BigQuery DCL doc: + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language + return "MATERIALIZED VIEW" + return "TABLE" + + def _dcl_grants_config_expr( + self, + dcl_cmd: t.Type[DCL], + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expr]: + expressions: t.List[exp.Expr] = [] + if not grants_config: + return expressions + + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language + + def normalize_principal(p: str) -> str: + if ":" not in p: + raise ValueError(f"Principal '{p}' missing a prefix label") + + # allUsers and allAuthenticatedUsers special groups that are cas-sensitive and must start with "specialGroup:" + if p.endswith("allUsers") or p.endswith("allAuthenticatedUsers"): + if not p.startswith("specialGroup:"): + raise ValueError( + f"Special group principal '{p}' must start with 'specialGroup:' prefix label" + ) + return p + + label, principal = p.split(":", 1) + # always lowercase principals + return f"{label}:{principal.lower()}" + + object_kind = self._grant_object_kind(table_type) + for privilege, principals in grants_config.items(): + if not principals: + continue + + noramlized_principals = [exp.Literal.string(normalize_principal(p)) for p in principals] + args: t.Dict[str, t.Any] = { + "privileges": [exp.GrantPrivilege(this=exp.to_identifier(privilege, quoted=True))], + "securable": table.copy(), + "principals": noramlized_principals, + } + + if object_kind: + args["kind"] = exp.Var(this=object_kind) + + expressions.append(dcl_cmd(**args)) # type: ignore[arg-type] + + return expressions + class _ErrorCounter: """ diff --git a/sqlmesh/core/engine_adapter/clickhouse.py b/sqlmesh/core/engine_adapter/clickhouse.py index ccffe64118..71a834ecfc 100644 --- a/sqlmesh/core/engine_adapter/clickhouse.py +++ b/sqlmesh/core/engine_adapter/clickhouse.py @@ -64,7 +64,7 @@ def cluster(self) -> t.Optional[str]: # doesn't use the row index at all def fetchone( self, - query: t.Union[exp.Expression, str], + query: t.Union[exp.Expr, str], ignore_unsupported_errors: bool = False, quote_identifiers: bool = False, ) -> t.Tuple: @@ -77,13 +77,11 @@ def fetchone( return self.cursor.fetchall()[0] def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> pd.DataFrame: """Fetches a Pandas DataFrame from the cursor""" return self.cursor.client.query_df( - self._to_sql(query, quote=quote_identifiers) - if isinstance(query, exp.Expression) - else query, + self._to_sql(query, quote=quote_identifiers) if isinstance(query, exp.Expr) else query, use_extended_dtypes=True, ) @@ -112,8 +110,9 @@ def query_factory() -> Query: storage_format=exp.var("MergeTree"), **kwargs, ) + ordered_df = df[list(source_columns_to_types)] - self.cursor.client.insert_df(temp_table.sql(dialect=self.dialect), df=df) + self.cursor.client.insert_df(temp_table.sql(dialect=self.dialect), df=ordered_df) return exp.select(*self._casted_columns(target_columns_to_types, source_columns)).from_( temp_table @@ -167,7 +166,7 @@ def create_schema( schema_name: SchemaName, ignore_if_exists: bool = True, warn_on_error: bool = True, - properties: t.List[exp.Expression] = [], + properties: t.List[exp.Expr] = [], ) -> None: """Create a Clickhouse database from a name or qualified table name. @@ -223,12 +222,12 @@ def _insert_overwrite_by_condition( target_columns_to_types = target_columns_to_types or self.columns(target_table) temp_table = self._get_temp_table(target_table) - self._create_table_like(temp_table, target_table) + self.create_table_like(temp_table, target_table) # REPLACE BY KEY: extract kwargs if present dynamic_key = kwargs.get("dynamic_key") if dynamic_key: - dynamic_key_exp = t.cast(exp.Expression, kwargs.get("dynamic_key_exp")) + dynamic_key_exp = t.cast(exp.Expr, kwargs.get("dynamic_key_exp")) dynamic_key_unique = t.cast(bool, kwargs.get("dynamic_key_unique")) try: @@ -413,7 +412,7 @@ def _replace_by_key( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - key: t.Sequence[exp.Expression], + key: t.Sequence[exp.Expr], is_unique_key: bool, source_columns: t.Optional[t.List[str]] = None, ) -> None: @@ -439,7 +438,7 @@ def insert_overwrite_by_partition( self, table_name: TableName, query_or_df: QueryOrDF, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, source_columns: t.Optional[t.List[str]] = None, ) -> None: @@ -455,7 +454,11 @@ def insert_overwrite_by_partition( ) def _create_table_like( - self, target_table_name: TableName, source_table_name: TableName + self, + target_table_name: TableName, + source_table_name: TableName, + exists: bool, + **kwargs: t.Any, ) -> None: """Create table with identical structure as source table""" self.execute( @@ -482,7 +485,7 @@ def _get_partition_ids( def _create_table( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -590,7 +593,7 @@ def _rename_table( self.execute(f"RENAME TABLE {old_table_sql} TO {new_table_sql}{self._on_cluster_sql()}") - def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None: + def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expr]) -> None: delete_expr = exp.delete(table_name, where) if self.engine_run_mode.is_cluster: delete_expr.set("cluster", exp.OnCluster(this=exp.to_identifier(self.cluster))) @@ -631,21 +634,20 @@ def _drop_object( kind: What kind of object to drop. Defaults to TABLE **drop_args: Any extra arguments to set on the Drop expression """ - self.execute( - exp.Drop( - this=exp.to_table(name), - kind=kind, - exists=exists, - cluster=exp.OnCluster(this=exp.to_identifier(self.cluster)) - if self.engine_run_mode.is_cluster - else None, - **drop_args, - ) + super()._drop_object( + name=name, + exists=exists, + kind=kind, + cascade=cascade, + cluster=exp.OnCluster(this=exp.to_identifier(self.cluster)) + if self.engine_run_mode.is_cluster + else None, + **drop_args, ) def _build_partitioned_by_exp( self, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], **kwargs: t.Any, ) -> t.Optional[t.Union[exp.PartitionedByProperty, exp.Property]]: return exp.PartitionedByProperty( @@ -710,14 +712,14 @@ def use_server_nulls_for_unmatched_after_join( return query def _build_settings_property( - self, key: str, value: exp.Expression | str | int | float + self, key: str, value: exp.Expr | str | int | float ) -> exp.SettingsProperty: return exp.SettingsProperty( expressions=[ exp.EQ( this=exp.var(key.lower()), expression=value - if isinstance(value, exp.Expression) + if isinstance(value, exp.Expr) else exp.Literal(this=value, is_string=isinstance(value, str)), ) ] @@ -728,17 +730,17 @@ def _build_table_properties_exp( catalog_name: t.Optional[str] = None, table_format: t.Optional[str] = None, storage_format: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, partition_interval_unit: t.Optional[IntervalUnit] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, empty_ctas: bool = False, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] table_engine = self.DEFAULT_TABLE_ENGINE if storage_format: @@ -805,9 +807,7 @@ def _build_table_properties_exp( ttl = table_properties_copy.pop("TTL", None) if ttl: properties.append( - exp.MergeTreeTTL( - expressions=[ttl if isinstance(ttl, exp.Expression) else exp.var(ttl)] - ) + exp.MergeTreeTTL(expressions=[ttl if isinstance(ttl, exp.Expr) else exp.var(ttl)]) ) if ( @@ -841,12 +841,12 @@ def _build_table_properties_exp( def _build_view_properties_exp( self, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, table_description: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: """Creates a SQLGlot table properties expression for view""" - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] view_properties_copy = view_properties.copy() if view_properties else {} diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py index 2571cb7214..e3d029a17d 100644 --- a/sqlmesh/core/engine_adapter/databricks.py +++ b/sqlmesh/core/engine_adapter/databricks.py @@ -5,7 +5,9 @@ from functools import partial from sqlglot import exp + from sqlmesh.core.dialect import to_schema +from sqlmesh.core.engine_adapter.mixins import GrantsFromInfoSchemaMixin from sqlmesh.core.engine_adapter.shared import ( CatalogSupport, DataObject, @@ -28,12 +30,16 @@ logger = logging.getLogger(__name__) -class DatabricksEngineAdapter(SparkEngineAdapter): +class DatabricksEngineAdapter(SparkEngineAdapter, GrantsFromInfoSchemaMixin): DIALECT = "databricks" INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.REPLACE_WHERE SUPPORTS_CLONING = True SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True + SUPPORTS_GRANTS = True + USE_CATALOG_IN_GRANTS = True + # Spark has this set to false for compatibility when mixing with Trino but that isn't a concern with Databricks + QUOTE_IDENTIFIERS_IN_VIEWS = True SCHEMA_DIFFER_KWARGS = { "support_positional_add": True, "nested_support": NestedSupport.ALL, @@ -72,21 +78,21 @@ def can_access_databricks_connect(cls, disable_databricks_connect: bool) -> bool def _use_spark_session(self) -> bool: if self.can_access_spark_session(bool(self._extra_config.get("disable_spark_session"))): return True - return ( - self.can_access_databricks_connect( - bool(self._extra_config.get("disable_databricks_connect")) - ) - and ( - { - "databricks_connect_server_hostname", - "databricks_connect_access_token", - }.issubset(self._extra_config) - ) - and ( - "databricks_connect_cluster_id" in self._extra_config - or "databricks_connect_use_serverless" in self._extra_config - ) - ) + + if self.can_access_databricks_connect( + bool(self._extra_config.get("disable_databricks_connect")) + ): + if self._extra_config.get("databricks_connect_use_serverless"): + return True + + if { + "databricks_connect_cluster_id", + "databricks_connect_server_hostname", + "databricks_connect_access_token", + }.issubset(self._extra_config): + return True + + return False @property def is_spark_session_connection(self) -> bool: @@ -102,7 +108,7 @@ def _set_spark_engine_adapter_if_needed(self) -> None: connect_kwargs = dict( host=self._extra_config["databricks_connect_server_hostname"], - token=self._extra_config["databricks_connect_access_token"], + token=self._extra_config.get("databricks_connect_access_token"), ) if "databricks_connect_use_serverless" in self._extra_config: connect_kwargs["serverless"] = True @@ -149,6 +155,28 @@ def spark(self) -> PySparkSession: def catalog_support(self) -> CatalogSupport: return CatalogSupport.FULL_SUPPORT + @staticmethod + def _grant_object_kind(table_type: DataObjectType) -> str: + if table_type == DataObjectType.VIEW: + return "VIEW" + if table_type == DataObjectType.MATERIALIZED_VIEW: + return "MATERIALIZED VIEW" + return "TABLE" + + def _get_grant_expression(self, table: exp.Table) -> exp.Expr: + # We only care about explicitly granted privileges and not inherited ones + # if this is removed you would see grants inherited from the catalog get returned + expression = super()._get_grant_expression(table) + expression.args["where"].set( + "this", + exp.and_( + expression.args["where"].this, + exp.column("inherited_from").eq(exp.Literal.string("NONE")), + wrap=False, + ), + ) + return expression + def _begin_session(self, properties: SessionProperties) -> t.Any: """Begin a new session.""" # Align the different possible connectors to a single catalog @@ -182,7 +210,7 @@ def query_factory() -> Query: return [SourceQuery(query_factory=query_factory)] def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> DF: """Fetches a DataFrame that can be either Pandas or PySpark from the cursor""" if self.is_spark_session_connection: @@ -195,7 +223,7 @@ def _fetch_native_df( return self.cursor.fetchall_arrow().to_pandas() def fetchdf( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> pd.DataFrame: """ Returns a Pandas DataFrame from a query or expression. @@ -299,6 +327,7 @@ def clone_table( target_table_name: TableName, source_table_name: TableName, replace: bool = False, + exists: bool = True, clone_kwargs: t.Optional[t.Dict[str, t.Any]] = None, **kwargs: t.Any, ) -> None: @@ -335,10 +364,10 @@ def _build_table_properties_exp( catalog_name: t.Optional[str] = None, table_format: t.Optional[str] = None, storage_format: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, partition_interval_unit: t.Optional[IntervalUnit] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, @@ -365,3 +394,20 @@ def _build_table_properties_exp( expressions.append(clustered_by_exp) properties = exp.Properties(expressions=expressions) return properties + + def _build_column_defs( + self, + target_columns_to_types: t.Dict[str, exp.DataType], + column_descriptions: t.Optional[t.Dict[str, str]] = None, + is_view: bool = False, + materialized: bool = False, + ) -> t.List[exp.ColumnDef]: + # Databricks requires column types to be specified when adding column comments + # in CREATE MATERIALIZED VIEW statements. Override is_view to False to force + # column types to be included when comments are present. + if is_view and materialized and column_descriptions: + is_view = False + + return super()._build_column_defs( + target_columns_to_types, column_descriptions, is_view, materialized + ) diff --git a/sqlmesh/core/engine_adapter/duckdb.py b/sqlmesh/core/engine_adapter/duckdb.py index 3b057219e0..ebfcaa7901 100644 --- a/sqlmesh/core/engine_adapter/duckdb.py +++ b/sqlmesh/core/engine_adapter/duckdb.py @@ -145,7 +145,7 @@ def _get_data_objects( for row in df.itertuples() ] - def _normalize_decimal_value(self, col: exp.Expression, precision: int) -> exp.Expression: + def _normalize_decimal_value(self, col: exp.Expr, precision: int) -> exp.Expr: """ duckdb truncates instead of rounding when casting to decimal. @@ -163,7 +163,7 @@ def _normalize_decimal_value(self, col: exp.Expression, precision: int) -> exp.E def _create_table( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py index a528be3cb4..e1dffe88f4 100644 --- a/sqlmesh/core/engine_adapter/fabric.py +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -10,23 +10,17 @@ from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter from sqlmesh.core.engine_adapter.shared import ( InsertOverwriteStrategy, - SourceQuery, ) -from sqlmesh.core.engine_adapter.base import EngineAdapter from sqlmesh.utils.errors import SQLMeshError from sqlmesh.utils.connection_pool import ConnectionPool +from sqlmesh.core.schema_diff import TableAlterOperation +from sqlmesh.utils import random_id -if t.TYPE_CHECKING: - from sqlmesh.core._typing import TableName - - -from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin - logger = logging.getLogger(__name__) -class FabricEngineAdapter(LogicalMergeMixin, MSSQLEngineAdapter): +class FabricEngineAdapter(MSSQLEngineAdapter): """ Adapter for Microsoft Fabric. """ @@ -58,26 +52,6 @@ def _target_catalog(self) -> t.Optional[str]: def _target_catalog(self, value: t.Optional[str]) -> None: self._connection_pool.set_attribute("target_catalog", value) - def _insert_overwrite_by_condition( - self, - table_name: TableName, - source_queries: t.List[SourceQuery], - target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - where: t.Optional[exp.Condition] = None, - insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, - **kwargs: t.Any, - ) -> None: - # Override to avoid MERGE statement which isn't fully supported in Fabric - return EngineAdapter._insert_overwrite_by_condition( - self, - table_name=table_name, - source_queries=source_queries, - target_columns_to_types=target_columns_to_types, - where=where, - insert_overwrite_strategy_override=InsertOverwriteStrategy.DELETE_INSERT, - **kwargs, - ) - @property def api_client(self) -> FabricHttpClient: # the requests Session is not guaranteed to be threadsafe @@ -181,6 +155,113 @@ def set_current_catalog(self, catalog_name: str) -> None: f"Unable to switch catalog to {catalog_name}, catalog ended up as {catalog_after_switch}" ) + def alter_table( + self, alter_expressions: t.Union[t.List[exp.Alter], t.List[TableAlterOperation]] + ) -> None: + """ + Applies alter expressions to a table. Fabric has limited support for ALTER TABLE, + so this method implements a workaround for column type changes. + This method is self-contained and sets its own catalog context. + """ + if not alter_expressions: + return + + # Get the target table from the first expression to determine the correct catalog. + first_op = alter_expressions[0] + expression = first_op.expression if isinstance(first_op, TableAlterOperation) else first_op + if not isinstance(expression, exp.Alter) or not expression.this.catalog: + # Fallback for unexpected scenarios + logger.warning( + "Could not determine catalog from alter expression, executing with current context." + ) + super().alter_table(alter_expressions) + return + + target_catalog = expression.this.catalog + self.set_current_catalog(target_catalog) + + with self.transaction(): + for op in alter_expressions: + expression = op.expression if isinstance(op, TableAlterOperation) else op + + if not isinstance(expression, exp.Alter): + self.execute(expression) + continue + + for action in expression.actions: + table_name = expression.this + + table_name_without_catalog = table_name.copy() + table_name_without_catalog.set("catalog", None) + + is_type_change = isinstance(action, exp.AlterColumn) and action.args.get( + "dtype" + ) + + if is_type_change: + column_to_alter = action.this + new_type = action.args["dtype"] + temp_column_name_str = f"{column_to_alter.name}__{random_id(short=True)}" + temp_column_name = exp.to_identifier(temp_column_name_str) + + logger.info( + "Applying workaround for column '%s' on table '%s' to change type to '%s'.", + column_to_alter.sql(), + table_name.sql(), + new_type.sql(), + ) + + # Step 1: Add a temporary column. + add_column_expr = exp.Alter( + this=table_name_without_catalog.copy(), + kind="TABLE", + actions=[ + exp.ColumnDef(this=temp_column_name.copy(), kind=new_type.copy()) + ], + ) + add_sql = self._to_sql(add_column_expr) + self.execute(add_sql) + + # Step 2: Copy and cast data. + update_sql = self._to_sql( + exp.Update( + this=table_name_without_catalog.copy(), + expressions=[ + exp.EQ( + this=temp_column_name.copy(), + expression=exp.Cast( + this=column_to_alter.copy(), to=new_type.copy() + ), + ) + ], + ) + ) + self.execute(update_sql) + + # Step 3: Drop the original column. + drop_sql = self._to_sql( + exp.Alter( + this=table_name_without_catalog.copy(), + kind="TABLE", + actions=[exp.Drop(this=column_to_alter.copy(), kind="COLUMN")], + ) + ) + self.execute(drop_sql) + + # Step 4: Rename the temporary column. + old_name_qualified = f"{table_name_without_catalog.sql(dialect=self.dialect)}.{temp_column_name.sql(dialect=self.dialect)}" + new_name_unquoted = column_to_alter.sql( + dialect=self.dialect, identify=False + ) + rename_sql = f"EXEC sp_rename '{old_name_qualified}', '{new_name_unquoted}', 'COLUMN'" + self.execute(rename_sql) + else: + # For other alterations, execute directly. + direct_alter_expr = exp.Alter( + this=table_name_without_catalog.copy(), kind="TABLE", actions=[action] + ) + self.execute(direct_alter_expr) + class FabricHttpClient: def __init__(self, tenant_id: str, workspace_id: str, client_id: str, client_secret: str): diff --git a/sqlmesh/core/engine_adapter/mixins.py b/sqlmesh/core/engine_adapter/mixins.py index 865e47fb93..bf4bb970a2 100644 --- a/sqlmesh/core/engine_adapter/mixins.py +++ b/sqlmesh/core/engine_adapter/mixins.py @@ -7,9 +7,10 @@ from sqlglot import exp, parse_one from sqlglot.helper import seq_get +from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlmesh.core.engine_adapter.base import EngineAdapter -from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, SourceQuery +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.node import IntervalUnit from sqlmesh.core.dialect import schema_ from sqlmesh.core.schema_diff import TableAlterOperation @@ -17,7 +18,12 @@ if t.TYPE_CHECKING: from sqlmesh.core._typing import TableName - from sqlmesh.core.engine_adapter._typing import DF + from sqlmesh.core.engine_adapter._typing import ( + DCL, + DF, + GrantsConfig, + QueryOrDF, + ) from sqlmesh.core.engine_adapter.base import QueryOrDF logger = logging.getLogger(__name__) @@ -32,9 +38,9 @@ def merge( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], when_matched: t.Optional[exp.Whens] = None, - merge_filter: t.Optional[exp.Expression] = None, + merge_filter: t.Optional[exp.Expr] = None, source_columns: t.Optional[t.List[str]] = None, **kwargs: t.Any, ) -> None: @@ -52,18 +58,14 @@ def merge( class PandasNativeFetchDFSupportMixin(EngineAdapter): def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> DF: """Fetches a Pandas DataFrame from a SQL query.""" from warnings import catch_warnings, filterwarnings from pandas.io.sql import read_sql_query - sql = ( - self._to_sql(query, quote=quote_identifiers) - if isinstance(query, exp.Expression) - else query - ) + sql = self._to_sql(query, quote=quote_identifiers) if isinstance(query, exp.Expr) else query logger.debug(f"Executing SQL:\n{sql}") with catch_warnings(), self.transaction(): filterwarnings( @@ -75,59 +77,13 @@ def _fetch_native_df( return df -class InsertOverwriteWithMergeMixin(EngineAdapter): - def _insert_overwrite_by_condition( - self, - table_name: TableName, - source_queries: t.List[SourceQuery], - target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - where: t.Optional[exp.Condition] = None, - insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, - **kwargs: t.Any, - ) -> None: - """ - Some engines do not support `INSERT OVERWRITE` but instead support - doing an "INSERT OVERWRITE" using a Merge expression but with the - predicate being `False`. - """ - target_columns_to_types = target_columns_to_types or self.columns(table_name) - for source_query in source_queries: - with source_query as query: - query = self._order_projections_and_filter( - query, target_columns_to_types, where=where - ) - columns = [exp.column(col) for col in target_columns_to_types] - when_not_matched_by_source = exp.When( - matched=False, - source=True, - condition=where, - then=exp.Delete(), - ) - when_not_matched_by_target = exp.When( - matched=False, - source=False, - then=exp.Insert( - this=exp.Tuple(expressions=columns), - expression=exp.Tuple(expressions=columns), - ), - ) - self._merge( - target_table=table_name, - query=query, - on=exp.false(), - whens=exp.Whens( - expressions=[when_not_matched_by_source, when_not_matched_by_target] - ), - ) - - class HiveMetastoreTablePropertiesMixin(EngineAdapter): MAX_TABLE_COMMENT_LENGTH = 4000 MAX_COLUMN_COMMENT_LENGTH = 4000 def _build_partitioned_by_exp( self, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], *, catalog_name: t.Optional[str] = None, **kwargs: t.Any, @@ -160,16 +116,16 @@ def _build_table_properties_exp( catalog_name: t.Optional[str] = None, table_format: t.Optional[str] = None, storage_format: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, partition_interval_unit: t.Optional[IntervalUnit] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] if table_format and self.dialect == "spark": properties.append(exp.FileFormatProperty(this=exp.Var(this=table_format))) @@ -206,12 +162,12 @@ def _build_table_properties_exp( def _build_view_properties_exp( self, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, table_description: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: """Creates a SQLGlot table properties expression for view""" - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] if table_description: properties.append( @@ -234,7 +190,7 @@ def _truncate_comment(self, comment: str, length: t.Optional[int]) -> str: class GetCurrentCatalogFromFunctionMixin(EngineAdapter): - CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.func("current_catalog") + CURRENT_CATALOG_EXPRESSION: exp.Expr = exp.func("current_catalog") def get_current_catalog(self) -> t.Optional[str]: """Returns the catalog name of the current connection.""" @@ -280,7 +236,7 @@ def _default_precision_to_max( def _build_create_table_exp( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -362,11 +318,11 @@ def is_destructive(self) -> bool: return False @property - def _alter_actions(self) -> t.List[exp.Expression]: + def _alter_actions(self) -> t.List[exp.Expr]: return [exp.Cluster(expressions=self.cluster_key_expressions)] @property - def cluster_key_expressions(self) -> t.List[exp.Expression]: + def cluster_key_expressions(self) -> t.List[exp.Expr]: # Note: Assumes `clustering_key` as a string like: # - "(col_a)" # - "(col_a, col_b)" @@ -386,14 +342,14 @@ def is_destructive(self) -> bool: return False @property - def _alter_actions(self) -> t.List[exp.Expression]: + def _alter_actions(self) -> t.List[exp.Expr]: return [exp.Command(this="DROP", expression="CLUSTERING KEY")] class ClusteredByMixin(EngineAdapter): def _build_clustered_by_exp( self, - clustered_by: t.List[exp.Expression], + clustered_by: t.List[exp.Expr], **kwargs: t.Any, ) -> t.Optional[exp.Cluster]: return exp.Cluster(expressions=[c.copy() for c in clustered_by]) @@ -450,9 +406,9 @@ def logical_merge( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], when_matched: t.Optional[exp.Whens] = None, - merge_filter: t.Optional[exp.Expression] = None, + merge_filter: t.Optional[exp.Expr] = None, source_columns: t.Optional[t.List[str]] = None, ) -> None: """ @@ -492,12 +448,12 @@ def concat_columns( decimal_precision: int = 3, timestamp_precision: int = MAX_TIMESTAMP_PRECISION, delimiter: str = ",", - ) -> exp.Expression: + ) -> exp.Expr: """ Produce an expression that generates a string version of a record, that is: - Every column converted to a string representation, joined together into a single string using the specified :delimiter """ - expressions_to_concat: t.List[exp.Expression] = [] + expressions_to_concat: t.List[exp.Expr] = [] for idx, (column, type) in enumerate(columns_to_types.items()): expressions_to_concat.append( exp.func( @@ -515,11 +471,11 @@ def concat_columns( def normalize_value( self, - expr: exp.Expression, + expr: exp.Expr, type: exp.DataType, decimal_precision: int = 3, timestamp_precision: int = MAX_TIMESTAMP_PRECISION, - ) -> exp.Expression: + ) -> exp.Expr: """ Return an expression that converts the values inside the column `col` to a normalized string @@ -530,6 +486,7 @@ def normalize_value( - `boolean` columns -> '1' or '0' - NULLS -> "" (empty string) """ + value: exp.Expr if type.is_type(exp.DataType.Type.BOOLEAN): value = self._normalize_boolean_value(expr) elif type.is_type(*exp.DataType.INTEGER_TYPES): @@ -552,12 +509,12 @@ def normalize_value( return exp.cast(value, to=exp.DataType.build("VARCHAR")) - def _normalize_nested_value(self, expr: exp.Expression) -> exp.Expression: + def _normalize_nested_value(self, expr: exp.Expr) -> exp.Expr: return expr def _normalize_timestamp_value( - self, expr: exp.Expression, type: exp.DataType, precision: int - ) -> exp.Expression: + self, expr: exp.Expr, type: exp.DataType, precision: int + ) -> exp.Expr: if precision > self.MAX_TIMESTAMP_PRECISION: raise ValueError( f"Requested timestamp precision '{precision}' exceeds maximum supported precision: {self.MAX_TIMESTAMP_PRECISION}" @@ -587,11 +544,145 @@ def _normalize_timestamp_value( return expr - def _normalize_integer_value(self, expr: exp.Expression) -> exp.Expression: + def _normalize_integer_value(self, expr: exp.Expr) -> exp.Expr: return exp.cast(expr, "BIGINT") - def _normalize_decimal_value(self, expr: exp.Expression, precision: int) -> exp.Expression: + def _normalize_decimal_value(self, expr: exp.Expr, precision: int) -> exp.Expr: return exp.cast(expr, f"DECIMAL(38,{precision})") - def _normalize_boolean_value(self, expr: exp.Expression) -> exp.Expression: + def _normalize_boolean_value(self, expr: exp.Expr) -> exp.Expr: return exp.cast(expr, "INT") + + +class GrantsFromInfoSchemaMixin(EngineAdapter): + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expr = exp.func("current_user") + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = False + USE_CATALOG_IN_GRANTS = False + GRANT_INFORMATION_SCHEMA_TABLE_NAME = "table_privileges" + + @staticmethod + @abc.abstractmethod + def _grant_object_kind(table_type: DataObjectType) -> t.Optional[str]: + pass + + @abc.abstractmethod + def _get_current_schema(self) -> str: + pass + + def _dcl_grants_config_expr( + self, + dcl_cmd: t.Type[DCL], + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expr]: + expressions: t.List[exp.Expr] = [] + if not grants_config: + return expressions + + object_kind = self._grant_object_kind(table_type) + for privilege, principals in grants_config.items(): + args: t.Dict[str, t.Any] = { + "privileges": [exp.GrantPrivilege(this=exp.Var(this=privilege))], + "securable": table.copy(), + } + if object_kind: + args["kind"] = exp.Var(this=object_kind) + if self.SUPPORTS_MULTIPLE_GRANT_PRINCIPALS: + args["principals"] = [ + normalize_identifiers( + parse_one(principal, into=exp.GrantPrincipal, dialect=self.dialect), + dialect=self.dialect, + ) + for principal in principals + ] + expressions.append(dcl_cmd(**args)) # type: ignore[arg-type] + else: + for principal in principals: + args["principals"] = [ + normalize_identifiers( + parse_one(principal, into=exp.GrantPrincipal, dialect=self.dialect), + dialect=self.dialect, + ) + ] + expressions.append(dcl_cmd(**args)) # type: ignore[arg-type] + + return expressions + + def _apply_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expr]: + return self._dcl_grants_config_expr(exp.Grant, table, grants_config, table_type) + + def _revoke_grants_config_expr( + self, + table: exp.Table, + grants_config: GrantsConfig, + table_type: DataObjectType = DataObjectType.TABLE, + ) -> t.List[exp.Expr]: + return self._dcl_grants_config_expr(exp.Revoke, table, grants_config, table_type) + + def _get_grant_expression(self, table: exp.Table) -> exp.Expr: + schema_identifier = table.args.get("db") or normalize_identifiers( + exp.to_identifier(self._get_current_schema(), quoted=True), dialect=self.dialect + ) + schema_name = schema_identifier.this + table_name = table.args.get("this").this # type: ignore + + grant_conditions = [ + exp.column("table_schema").eq(exp.Literal.string(schema_name)), + exp.column("table_name").eq(exp.Literal.string(table_name)), + exp.column("grantor").eq(self.CURRENT_USER_OR_ROLE_EXPRESSION), + exp.column("grantee").neq(self.CURRENT_USER_OR_ROLE_EXPRESSION), + ] + + info_schema_table = normalize_identifiers( + exp.table_(self.GRANT_INFORMATION_SCHEMA_TABLE_NAME, db="information_schema"), + dialect=self.dialect, + ) + if self.USE_CATALOG_IN_GRANTS: + catalog_identifier = table.args.get("catalog") + if not catalog_identifier: + catalog_name = self.get_current_catalog() + if not catalog_name: + raise SQLMeshError( + "Current catalog could not be determined for fetching grants. This is unexpected." + ) + catalog_identifier = normalize_identifiers( + exp.to_identifier(catalog_name, quoted=True), dialect=self.dialect + ) + catalog_name = catalog_identifier.this + info_schema_table.set("catalog", catalog_identifier.copy()) + grant_conditions.insert( + 0, exp.column("table_catalog").eq(exp.Literal.string(catalog_name)) + ) + + return ( + exp.select("privilege_type", "grantee") + .from_(info_schema_table) + .where(exp.and_(*grant_conditions)) + ) + + def _get_current_grants_config(self, table: exp.Table) -> GrantsConfig: + grant_expr = self._get_grant_expression(table) + + results = self.fetchall(grant_expr) + + grants_dict: GrantsConfig = {} + for privilege_raw, grantee_raw in results: + if privilege_raw is None or grantee_raw is None: + continue + + privilege = str(privilege_raw) + grantee = str(grantee_raw) + if not privilege or not grantee: + continue + + grantees = grants_dict.setdefault(privilege, []) + if grantee not in grantees: + grantees.append(grantee) + + return grants_dict diff --git a/sqlmesh/core/engine_adapter/mssql.py b/sqlmesh/core/engine_adapter/mssql.py index 50a67b4b37..e381c0a198 100644 --- a/sqlmesh/core/engine_adapter/mssql.py +++ b/sqlmesh/core/engine_adapter/mssql.py @@ -3,6 +3,7 @@ from __future__ import annotations import typing as t +import logging from sqlglot import exp @@ -13,10 +14,10 @@ InsertOverwriteStrategy, MERGE_SOURCE_ALIAS, MERGE_TARGET_ALIAS, + _get_data_object_cache_key, ) from sqlmesh.core.engine_adapter.mixins import ( GetCurrentCatalogFromFunctionMixin, - InsertOverwriteWithMergeMixin, PandasNativeFetchDFSupportMixin, VarcharSizeWorkaroundMixin, RowDiffMixin, @@ -37,11 +38,13 @@ from sqlmesh.core.engine_adapter._typing import DF, Query, QueryOrDF +logger = logging.getLogger(__name__) + + @set_catalog() class MSSQLEngineAdapter( EngineAdapterWithIndexSupport, PandasNativeFetchDFSupportMixin, - InsertOverwriteWithMergeMixin, GetCurrentCatalogFromFunctionMixin, VarcharSizeWorkaroundMixin, RowDiffMixin, @@ -53,6 +56,7 @@ class MSSQLEngineAdapter( COMMENT_CREATION_TABLE = CommentCreationTable.UNSUPPORTED COMMENT_CREATION_VIEW = CommentCreationView.UNSUPPORTED SUPPORTS_REPLACE_TABLE = False + MAX_IDENTIFIER_LENGTH = 128 SUPPORTS_QUERY_EXECUTION_TRACKING = True SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { @@ -74,6 +78,7 @@ class MSSQLEngineAdapter( }, } VARIABLE_LENGTH_DATA_TYPES = {"binary", "varbinary", "char", "varchar", "nchar", "nvarchar"} + INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.MERGE @property def catalog_support(self) -> CatalogSupport: @@ -145,6 +150,10 @@ def build_var_length_col( def table_exists(self, table_name: TableName) -> bool: """MsSql doesn't support describe so we query information_schema.""" table = exp.to_table(table_name) + data_object_cache_key = _get_data_object_cache_key(table.catalog, table.db, table.name) + if data_object_cache_key in self._data_object_cache: + logger.debug("Table existence cache hit: %s", data_object_cache_key) + return self._data_object_cache[data_object_cache_key] is not None sql = ( exp.select("1") @@ -167,7 +176,7 @@ def drop_schema( schema_name: SchemaName, ignore_if_not_exists: bool = True, cascade: bool = False, - **drop_args: t.Dict[str, exp.Expression], + **drop_args: t.Dict[str, exp.Expr], ) -> None: """ MsSql doesn't support CASCADE clause and drops schemas unconditionally. @@ -196,9 +205,9 @@ def merge( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], when_matched: t.Optional[exp.Whens] = None, - merge_filter: t.Optional[exp.Expression] = None, + merge_filter: t.Optional[exp.Expr] = None, source_columns: t.Optional[t.List[str]] = None, **kwargs: t.Any, ) -> None: @@ -392,7 +401,7 @@ def _get_data_objects( for row in dataframe.itertuples() ] - def _to_sql(self, expression: exp.Expression, quote: bool = True, **kwargs: t.Any) -> str: + def _to_sql(self, expression: exp.Expr, quote: bool = True, **kwargs: t.Any) -> str: sql = super()._to_sql(expression, quote=quote, **kwargs) return f"{sql};" @@ -414,7 +423,9 @@ def _insert_overwrite_by_condition( insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, **kwargs: t.Any, ) -> None: - if not where or where == exp.true(): + # note that this is passed as table_properties here rather than physical_properties + use_merge_strategy = kwargs.get("table_properties", {}).get("mssql_merge_exists") + if (not where or where == exp.true()) and not use_merge_strategy: # this is a full table replacement, call the base strategy to do DELETE+INSERT # which will result in TRUNCATE+INSERT due to how we have overridden self.delete_from() return EngineAdapter._insert_overwrite_by_condition( @@ -427,7 +438,7 @@ def _insert_overwrite_by_condition( **kwargs, ) - # For actual conditional overwrites, use MERGE from InsertOverwriteWithMergeMixin + # For conditional overwrites or when mssql_merge_exists is set use MERGE return super()._insert_overwrite_by_condition( table_name=table_name, source_queries=source_queries, @@ -437,7 +448,7 @@ def _insert_overwrite_by_condition( **kwargs, ) - def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None: + def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expr]) -> None: if where == exp.true(): # "A TRUNCATE TABLE operation can be rolled back within a transaction." # ref: https://learn.microsoft.com/en-us/sql/t-sql/statements/truncate-table-transact-sql?view=sql-server-ver15#remarks diff --git a/sqlmesh/core/engine_adapter/mysql.py b/sqlmesh/core/engine_adapter/mysql.py index 26cc7c0197..66759dc440 100644 --- a/sqlmesh/core/engine_adapter/mysql.py +++ b/sqlmesh/core/engine_adapter/mysql.py @@ -73,7 +73,7 @@ def drop_schema( schema_name: SchemaName, ignore_if_not_exists: bool = True, cascade: bool = False, - **drop_args: t.Dict[str, exp.Expression], + **drop_args: t.Dict[str, exp.Expr], ) -> None: # MySQL doesn't support CASCADE clause and drops schemas unconditionally. super().drop_schema(schema_name, ignore_if_not_exists=ignore_if_not_exists, cascade=False) @@ -164,11 +164,11 @@ def _create_column_comments( exc_info=True, ) - def create_table_like( + def _create_table_like( self, target_table_name: TableName, source_table_name: TableName, - exists: bool = True, + exists: bool, **kwargs: t.Any, ) -> None: self.execute( diff --git a/sqlmesh/core/engine_adapter/postgres.py b/sqlmesh/core/engine_adapter/postgres.py index e9c212bd5f..6794169322 100644 --- a/sqlmesh/core/engine_adapter/postgres.py +++ b/sqlmesh/core/engine_adapter/postgres.py @@ -12,6 +12,7 @@ PandasNativeFetchDFSupportMixin, RowDiffMixin, logical_merge, + GrantsFromInfoSchemaMixin, ) from sqlmesh.core.engine_adapter.shared import set_catalog @@ -28,14 +29,19 @@ class PostgresEngineAdapter( PandasNativeFetchDFSupportMixin, GetCurrentCatalogFromFunctionMixin, RowDiffMixin, + GrantsFromInfoSchemaMixin, ): DIALECT = "postgres" + SUPPORTS_GRANTS = True SUPPORTS_INDEXES = True HAS_VIEW_BINDING = True CURRENT_CATALOG_EXPRESSION = exp.column("current_catalog") SUPPORTS_REPLACE_TABLE = False - MAX_IDENTIFIER_LENGTH = 63 + MAX_IDENTIFIER_LENGTH: t.Optional[int] = 63 SUPPORTS_QUERY_EXECUTION_TRACKING = True + GRANT_INFORMATION_SCHEMA_TABLE_NAME = "role_table_grants" + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expr = exp.column("current_role") + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = True SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { # DECIMAL without precision is "up to 131072 digits before the decimal point; up to 16383 digits after the decimal point" @@ -67,7 +73,7 @@ class PostgresEngineAdapter( } def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> DF: """ `read_sql_query` when using psycopg will result on a hanging transaction that must be committed @@ -79,11 +85,11 @@ def _fetch_native_df( self._connection_pool.commit() return df - def create_table_like( + def _create_table_like( self, target_table_name: TableName, source_table_name: TableName, - exists: bool = True, + exists: bool, **kwargs: t.Any, ) -> None: self.execute( @@ -107,9 +113,9 @@ def merge( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], when_matched: t.Optional[exp.Whens] = None, - merge_filter: t.Optional[exp.Expression] = None, + merge_filter: t.Optional[exp.Expr] = None, source_columns: t.Optional[t.List[str]] = None, **kwargs: t.Any, ) -> None: diff --git a/sqlmesh/core/engine_adapter/redshift.py b/sqlmesh/core/engine_adapter/redshift.py index 7979268473..c2a27954cd 100644 --- a/sqlmesh/core/engine_adapter/redshift.py +++ b/sqlmesh/core/engine_adapter/redshift.py @@ -14,6 +14,7 @@ VarcharSizeWorkaroundMixin, RowDiffMixin, logical_merge, + GrantsFromInfoSchemaMixin, ) from sqlmesh.core.engine_adapter.shared import ( CommentCreationView, @@ -40,12 +41,15 @@ class RedshiftEngineAdapter( NonTransactionalTruncateMixin, VarcharSizeWorkaroundMixin, RowDiffMixin, + GrantsFromInfoSchemaMixin, ): DIALECT = "redshift" CURRENT_CATALOG_EXPRESSION = exp.func("current_database") # Redshift doesn't support comments for VIEWs WITH NO SCHEMA BINDING (which we always use) COMMENT_CREATION_VIEW = CommentCreationView.UNSUPPORTED SUPPORTS_REPLACE_TABLE = False + SUPPORTS_GRANTS = True + SUPPORTS_MULTIPLE_GRANT_PRINCIPALS = True SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { @@ -139,7 +143,7 @@ def cursor(self) -> t.Any: return cursor def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> pd.DataFrame: """Fetches a Pandas DataFrame from the cursor""" import pandas as pd @@ -213,7 +217,7 @@ def create_view( materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, table_description: t.Optional[str] = None, column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, source_columns: t.Optional[t.List[str]] = None, **create_kwargs: t.Any, ) -> None: @@ -223,7 +227,7 @@ def create_view( swap tables out from under views. Therefore, we create the view as non-binding. """ no_schema_binding = True - if isinstance(query_or_df, exp.Expression): + if isinstance(query_or_df, exp.Expr): # We can't include NO SCHEMA BINDING if the query has a recursive CTE has_recursive_cte = any( w.args.get("recursive", False) for w in query_or_df.find_all(exp.With) @@ -363,9 +367,9 @@ def merge( target_table: TableName, source_table: QueryOrDF, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]], - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], when_matched: t.Optional[exp.Whens] = None, - merge_filter: t.Optional[exp.Expression] = None, + merge_filter: t.Optional[exp.Expr] = None, source_columns: t.Optional[t.List[str]] = None, **kwargs: t.Any, ) -> None: @@ -396,12 +400,12 @@ def _merge( self, target_table: TableName, query: Query, - on: exp.Expression, + on: exp.Expr, whens: exp.Whens, ) -> None: # Redshift does not support table aliases in the target table of a MERGE statement. # So we must use the actual table name instead of an alias, as we do with the source table. - def resolve_target_table(expression: exp.Expression) -> exp.Expression: + def resolve_target_table(expression: exp.Expr) -> exp.Expr: if ( isinstance(expression, exp.Column) and expression.table.upper() == MERGE_TARGET_ALIAS @@ -432,7 +436,7 @@ def resolve_target_table(expression: exp.Expression) -> exp.Expression: track_rows_processed=True, ) - def _normalize_decimal_value(self, expr: exp.Expression, precision: int) -> exp.Expression: + def _normalize_decimal_value(self, expr: exp.Expr, precision: int) -> exp.Expr: # Redshift is finicky. It truncates when the data is already in a table, but rounds when the data is generated as part of a SELECT. # # The following works: diff --git a/sqlmesh/core/engine_adapter/risingwave.py b/sqlmesh/core/engine_adapter/risingwave.py index fdcee90f0f..61b44f5bbb 100644 --- a/sqlmesh/core/engine_adapter/risingwave.py +++ b/sqlmesh/core/engine_adapter/risingwave.py @@ -32,6 +32,7 @@ class RisingwaveEngineAdapter(PostgresEngineAdapter): SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_TRANSACTIONS = False MAX_IDENTIFIER_LENGTH = None + SUPPORTS_GRANTS = False def columns( self, table_name: TableName, include_pseudo_columns: bool = False diff --git a/sqlmesh/core/engine_adapter/shared.py b/sqlmesh/core/engine_adapter/shared.py index 55f04a995e..ba0e1fa619 100644 --- a/sqlmesh/core/engine_adapter/shared.py +++ b/sqlmesh/core/engine_adapter/shared.py @@ -243,6 +243,8 @@ class InsertOverwriteStrategy(Enum): # Issue a single INSERT query to replace a data range. The assumption is that the query engine will transparently match partition bounds # and replace data rather than append to it. Trino is an example of this when `hive.insert-existing-partitions-behavior=OVERWRITE` is configured INTO_IS_OVERWRITE = 4 + # Do the INSERT OVERWRITE using merge since the engine doesn't support it natively + MERGE = 5 @property def is_delete_insert(self) -> bool: @@ -260,6 +262,10 @@ def is_replace_where(self) -> bool: def is_into_is_overwrite(self) -> bool: return self == InsertOverwriteStrategy.INTO_IS_OVERWRITE + @property + def is_merge(self) -> bool: + return self == InsertOverwriteStrategy.MERGE + class SourceQuery: def __init__( diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py index 8a6f5e2fcc..09c530b8f3 100644 --- a/sqlmesh/core/engine_adapter/snowflake.py +++ b/sqlmesh/core/engine_adapter/snowflake.py @@ -15,6 +15,7 @@ GetCurrentCatalogFromFunctionMixin, ClusteredByMixin, RowDiffMixin, + GrantsFromInfoSchemaMixin, ) from sqlmesh.core.engine_adapter.shared import ( CatalogSupport, @@ -34,7 +35,12 @@ import pandas as pd from sqlmesh.core._typing import SchemaName, SessionProperties, TableName - from sqlmesh.core.engine_adapter._typing import DF, Query, QueryOrDF, SnowparkSession + from sqlmesh.core.engine_adapter._typing import ( + DF, + Query, + QueryOrDF, + SnowparkSession, + ) from sqlmesh.core.node import IntervalUnit @@ -46,7 +52,9 @@ "drop_catalog": CatalogSupport.REQUIRES_SET_CATALOG, # needs a catalog to issue a query to information_schema.databases even though the result is global } ) -class SnowflakeEngineAdapter(GetCurrentCatalogFromFunctionMixin, ClusteredByMixin, RowDiffMixin): +class SnowflakeEngineAdapter( + GetCurrentCatalogFromFunctionMixin, ClusteredByMixin, RowDiffMixin, GrantsFromInfoSchemaMixin +): DIALECT = "snowflake" SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True @@ -54,6 +62,7 @@ class SnowflakeEngineAdapter(GetCurrentCatalogFromFunctionMixin, ClusteredByMixi SUPPORTS_MANAGED_MODELS = True CURRENT_CATALOG_EXPRESSION = exp.func("current_database") SUPPORTS_CREATE_DROP_CATALOG = True + SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS = True SUPPORTED_DROP_CASCADE_OBJECT_KINDS = ["DATABASE", "SCHEMA", "TABLE"] SCHEMA_DIFFER_KWARGS = { "parameterized_type_defaults": { @@ -73,6 +82,9 @@ class SnowflakeEngineAdapter(GetCurrentCatalogFromFunctionMixin, ClusteredByMixi MANAGED_TABLE_KIND = "DYNAMIC TABLE" SNOWPARK = "snowpark" SUPPORTS_QUERY_EXECUTION_TRACKING = True + SUPPORTS_GRANTS = True + CURRENT_USER_OR_ROLE_EXPRESSION: exp.Expr = exp.func("CURRENT_ROLE") + USE_CATALOG_IN_GRANTS = True @contextlib.contextmanager def session(self, properties: SessionProperties) -> t.Iterator[None]: @@ -83,7 +95,7 @@ def session(self, properties: SessionProperties) -> t.Iterator[None]: if isinstance(warehouse, str): warehouse = exp.to_identifier(warehouse) - if not isinstance(warehouse, exp.Expression): + if not isinstance(warehouse, exp.Expr): raise SQLMeshError(f"Invalid warehouse: '{warehouse}'") warehouse_exp = quote_identifiers( @@ -127,6 +139,23 @@ def snowpark(self) -> t.Optional[SnowparkSession]: def catalog_support(self) -> CatalogSupport: return CatalogSupport.FULL_SUPPORT + @staticmethod + def _grant_object_kind(table_type: DataObjectType) -> str: + if table_type == DataObjectType.VIEW: + return "VIEW" + if table_type == DataObjectType.MATERIALIZED_VIEW: + return "MATERIALIZED VIEW" + if table_type == DataObjectType.MANAGED_TABLE: + return "DYNAMIC TABLE" + return "TABLE" + + def _get_current_schema(self) -> str: + """Returns the current default schema for the connection.""" + result = self.fetchone("SELECT CURRENT_SCHEMA()") + if not result or not result[0]: + raise SQLMeshError("Unable to determine current schema") + return str(result[0]) + def _create_catalog(self, catalog_name: exp.Identifier) -> None: props = exp.Properties( expressions=[exp.SchemaCommentProperty(this=exp.Literal.string(c.SQLMESH_MANAGED))] @@ -160,7 +189,7 @@ def _drop_catalog(self, catalog_name: exp.Identifier) -> None: def _create_table( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -196,9 +225,9 @@ def create_managed_table( table_name: TableName, query: Query, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, table_description: t.Optional[str] = None, column_descriptions: t.Optional[t.Dict[str, str]] = None, source_columns: t.Optional[t.List[str]] = None, @@ -249,7 +278,7 @@ def create_view( materialized_properties: t.Optional[t.Dict[str, t.Any]] = None, table_description: t.Optional[str] = None, column_descriptions: t.Optional[t.Dict[str, str]] = None, - view_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + view_properties: t.Optional[t.Dict[str, exp.Expr]] = None, source_columns: t.Optional[t.List[str]] = None, **create_kwargs: t.Any, ) -> None: @@ -282,16 +311,16 @@ def _build_table_properties_exp( catalog_name: t.Optional[str] = None, table_format: t.Optional[str] = None, storage_format: t.Optional[str] = None, - partitioned_by: t.Optional[t.List[exp.Expression]] = None, + partitioned_by: t.Optional[t.List[exp.Expr]] = None, partition_interval_unit: t.Optional[IntervalUnit] = None, - clustered_by: t.Optional[t.List[exp.Expression]] = None, - table_properties: t.Optional[t.Dict[str, exp.Expression]] = None, + clustered_by: t.Optional[t.List[exp.Expr]] = None, + table_properties: t.Optional[t.Dict[str, exp.Expr]] = None, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, table_description: t.Optional[str] = None, table_kind: t.Optional[str] = None, **kwargs: t.Any, ) -> t.Optional[exp.Properties]: - properties: t.List[exp.Expression] = [] + properties: t.List[exp.Expr] = [] # TODO: there is some overlap with the base class and other engine adapters # we need a better way of filtering table properties relevent to the current engine @@ -378,6 +407,8 @@ def query_factory() -> Query: elif isinstance(df, pd.DataFrame): from snowflake.connector.pandas_tools import write_pandas + ordered_df = df[list(source_columns_to_types)] + # Workaround for https://github.com/snowflakedb/snowflake-connector-python/issues/1034 # The above issue has already been fixed upstream, but we keep the following # line anyway in order to support a wider range of Snowflake versions. @@ -388,16 +419,16 @@ def query_factory() -> Query: # See: https://stackoverflow.com/a/75627721 for column, kind in source_columns_to_types.items(): - if is_datetime64_any_dtype(df.dtypes[column]): + if is_datetime64_any_dtype(ordered_df.dtypes[column]): if kind.is_type("date"): # type: ignore - df[column] = pd.to_datetime(df[column]).dt.date # type: ignore - elif getattr(df.dtypes[column], "tz", None) is not None: # type: ignore - df[column] = pd.to_datetime(df[column]).dt.strftime( + ordered_df[column] = pd.to_datetime(ordered_df[column]).dt.date # type: ignore + elif getattr(ordered_df.dtypes[column], "tz", None) is not None: # type: ignore + ordered_df[column] = pd.to_datetime(ordered_df[column]).dt.strftime( "%Y-%m-%d %H:%M:%S.%f%z" ) # type: ignore # https://github.com/snowflakedb/snowflake-connector-python/issues/1677 else: # type: ignore - df[column] = pd.to_datetime(df[column]).dt.strftime( + ordered_df[column] = pd.to_datetime(ordered_df[column]).dt.strftime( "%Y-%m-%d %H:%M:%S.%f" ) # type: ignore @@ -407,7 +438,7 @@ def query_factory() -> Query: write_pandas( self._connection_pool.get(), - df, + ordered_df, temp_table.name, schema=temp_table.db or None, database=database.sql(dialect=self.dialect) if database else None, @@ -440,7 +471,7 @@ def cleanup() -> None: return [SourceQuery(query_factory=query_factory, cleanup_func=cleanup)] def _fetch_native_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> DF: import pandas as pd from snowflake.connector.errors import NotSupportedError @@ -526,16 +557,36 @@ def _get_data_objects( type=DataObjectType.from_str(row.type), # type: ignore clustering_key=row.clustering_key, # type: ignore ) - for row in df.itertuples() + # lowercase the column names for cases where Snowflake might return uppercase column names for certain catalogs + for row in df.rename(columns={col: col.lower() for col in df.columns}).itertuples() ] + def _get_grant_expression(self, table: exp.Table) -> exp.Expr: + # Upon execute the catalog in table expressions are properly normalized to handle the case where a user provides + # the default catalog in their connection config. This doesn't though update catalogs in strings like when querying + # the information schema. So we need to manually replace those here. + expression = super()._get_grant_expression(table) + for col_exp in expression.find_all(exp.Column): + if col_exp.this.name == "table_catalog": + and_exp = col_exp.parent + assert and_exp is not None, "Expected column expression to have a parent" + assert and_exp.expression, "Expected AND expression to have an expression" + normalized_catalog = self._normalize_catalog( + exp.table_("placeholder", db="placeholder", catalog=and_exp.expression.this) + ) + and_exp.set( + "expression", + exp.Literal.string(normalized_catalog.args["catalog"].alias_or_name), + ) + return expression + def set_current_catalog(self, catalog: str) -> None: self.execute(exp.Use(this=exp.to_identifier(catalog))) def set_current_schema(self, schema: str) -> None: self.execute(exp.Use(kind="SCHEMA", this=to_schema(schema))) - def _to_sql(self, expression: exp.Expression, quote: bool = True, **kwargs: t.Any) -> str: + def _normalize_catalog(self, expression: exp.Expr) -> exp.Expr: # note: important to use self._default_catalog instead of the self.default_catalog property # otherwise we get RecursionError: maximum recursion depth exceeded # because it calls get_current_catalog(), which executes a query, which needs the default catalog, which calls get_current_catalog()... etc @@ -553,7 +604,7 @@ def unquote_and_lower(identifier: str) -> str: self._default_catalog, dialect=self.dialect ) - def catalog_rewriter(node: exp.Expression) -> exp.Expression: + def catalog_rewriter(node: exp.Expr) -> exp.Expr: if isinstance(node, exp.Table): if node.catalog: # only replace the catalog on the model with the target catalog if the two are functionally equivalent @@ -568,8 +619,12 @@ def catalog_rewriter(node: exp.Expression) -> exp.Expression: # Snowflake connection config. This is because the catalog present on the model gets normalized and quoted to match # the source dialect, which isnt always compatible with Snowflake expression = expression.transform(catalog_rewriter) + return expression - return super()._to_sql(expression=expression, quote=quote, **kwargs) + def _to_sql(self, expression: exp.Expr, quote: bool = True, **kwargs: t.Any) -> str: + return super()._to_sql( + expression=self._normalize_catalog(expression), quote=quote, **kwargs + ) def _create_column_comments( self, @@ -610,6 +665,7 @@ def clone_table( target_table_name: TableName, source_table_name: TableName, replace: bool = False, + exists: bool = True, clone_kwargs: t.Optional[t.Dict[str, t.Any]] = None, **kwargs: t.Any, ) -> None: @@ -665,3 +721,18 @@ def close(self) -> t.Any: self._connection_pool.set_attribute(self.SNOWPARK, None) return super().close() + + def get_table_last_modified_ts(self, table_names: t.List[TableName]) -> t.List[int]: + from sqlmesh.utils.date import to_timestamp + + num_tables = len(table_names) + + query = "SELECT LAST_ALTERED FROM INFORMATION_SCHEMA.TABLES WHERE" + for i, table_name in enumerate(table_names): + table = exp.to_table(table_name) + query += f"""(TABLE_NAME = '{table.name}' AND TABLE_SCHEMA = '{table.db}' AND TABLE_CATALOG = '{table.catalog}')""" + if i < num_tables - 1: + query += " OR " + + result = self.fetchall(query) + return [to_timestamp(row[0]) for row in result] diff --git a/sqlmesh/core/engine_adapter/spark.py b/sqlmesh/core/engine_adapter/spark.py index 7d6a4d969b..9199aa3bcd 100644 --- a/sqlmesh/core/engine_adapter/spark.py +++ b/sqlmesh/core/engine_adapter/spark.py @@ -340,12 +340,12 @@ def _get_temp_table( return table def fetchdf( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> pd.DataFrame: return self.fetch_pyspark_df(query, quote_identifiers=quote_identifiers).toPandas() def fetch_pyspark_df( - self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False + self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False ) -> PySparkDataFrame: return self._ensure_pyspark_df( self._fetch_native_df(query, quote_identifiers=quote_identifiers) @@ -397,19 +397,21 @@ def get_current_catalog(self) -> t.Optional[str]: def set_current_catalog(self, catalog_name: str) -> None: self.connection.set_current_catalog(catalog_name) - def get_current_database(self) -> str: + def _get_current_schema(self) -> str: if self._use_spark_session: return self.spark.catalog.currentDatabase() return self.fetchone(exp.select(exp.func("current_database")))[0] # type: ignore - def get_data_object(self, target_name: TableName) -> t.Optional[DataObject]: + def get_data_object( + self, target_name: TableName, safe_to_cache: bool = False + ) -> t.Optional[DataObject]: target_table = exp.to_table(target_name) if isinstance(target_table.this, exp.Dot) and target_table.this.expression.name.startswith( f"{self.BRANCH_PREFIX}{self.WAP_PREFIX}" ): # Exclude the branch name target_table.set("this", target_table.this.this) - return super().get_data_object(target_table) + return super().get_data_object(target_table, safe_to_cache=safe_to_cache) def create_state_table( self, @@ -435,7 +437,7 @@ def _native_df_to_pandas_df( def _create_table( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -457,12 +459,14 @@ def _create_table( if wap_id.startswith(f"{self.BRANCH_PREFIX}{self.WAP_PREFIX}"): table_name.set("this", table_name.this.this) - wap_supported = ( - kwargs.get("storage_format") or "" - ).lower() == "iceberg" or self.wap_supported(table_name) - do_dummy_insert = ( - False if not wap_supported or not exists else not self.table_exists(table_name) - ) + do_dummy_insert = False + if self.wap_enabled: + wap_supported = ( + kwargs.get("storage_format") or "" + ).lower() == "iceberg" or self.wap_supported(table_name) + do_dummy_insert = ( + False if not wap_supported or not exists else not self.table_exists(table_name) + ) super()._create_table( table_name_or_schema, expression, @@ -535,7 +539,7 @@ def _ensure_fqn(self, table_name: TableName) -> exp.Table: if not table.catalog: table.set("catalog", self.get_current_catalog()) if not table.db: - table.set("db", self.get_current_database()) + table.set("db", self._get_current_schema()) return table def _build_create_comment_column_exp( diff --git a/sqlmesh/core/engine_adapter/trino.py b/sqlmesh/core/engine_adapter/trino.py index 0e6853dd4a..00acddb26c 100644 --- a/sqlmesh/core/engine_adapter/trino.py +++ b/sqlmesh/core/engine_adapter/trino.py @@ -71,9 +71,35 @@ class TrinoEngineAdapter( MAX_TIMESTAMP_PRECISION = 3 @property - def schema_location_mapping(self) -> t.Optional[dict[re.Pattern, str]]: + def schema_location_mapping(self) -> t.Optional[t.Dict[re.Pattern, str]]: return self._extra_config.get("schema_location_mapping") + @property + def timestamp_mapping(self) -> t.Optional[t.Dict[exp.DataType, exp.DataType]]: + return self._extra_config.get("timestamp_mapping") + + def _apply_timestamp_mapping( + self, columns_to_types: t.Dict[str, exp.DataType] + ) -> t.Tuple[t.Dict[str, exp.DataType], t.Set[str]]: + """Apply custom timestamp mapping to column types. + + Returns: + A tuple of (mapped_columns_to_types, mapped_column_names) where mapped_column_names + contains the names of columns that were found in the mapping. + """ + if not self.timestamp_mapping: + return columns_to_types, set() + + result = {} + mapped_columns: t.Set[str] = set() + for column, column_type in columns_to_types.items(): + if column_type in self.timestamp_mapping: + result[column] = self.timestamp_mapping[column_type] + mapped_columns.add(column) + else: + result[column] = column_type + return result, mapped_columns + @property def catalog_support(self) -> CatalogSupport: return CatalogSupport.FULL_SUPPORT @@ -86,6 +112,8 @@ def set_current_catalog(self, catalog: str) -> None: def get_catalog_type(self, catalog: t.Optional[str]) -> str: row: t.Tuple = tuple() if catalog: + if catalog_type_override := self._catalog_type_overrides.get(catalog): + return catalog_type_override row = ( self.fetchone( f"select connector_name from system.metadata.catalogs where catalog_name='{catalog}'" @@ -101,7 +129,7 @@ def session(self, properties: SessionProperties) -> t.Iterator[None]: yield return - if not isinstance(authorization, exp.Expression): + if not isinstance(authorization, exp.Expr): authorization = exp.Literal.string(authorization) if not authorization.is_string: @@ -115,7 +143,7 @@ def session(self, properties: SessionProperties) -> t.Iterator[None]: try: yield finally: - self.execute(f"RESET SESSION AUTHORIZATION") + self.execute("RESET SESSION AUTHORIZATION") def replace_query( self, @@ -282,9 +310,13 @@ def _build_schema_exp( column_descriptions: t.Optional[t.Dict[str, str]] = None, expressions: t.Optional[t.List[exp.PrimaryKey]] = None, is_view: bool = False, + materialized: bool = False, ) -> exp.Schema: + target_columns_to_types, mapped_columns = self._apply_timestamp_mapping( + target_columns_to_types + ) if "delta_lake" in self.get_catalog_type_from_table(table): - target_columns_to_types = self._to_delta_ts(target_columns_to_types) + target_columns_to_types = self._to_delta_ts(target_columns_to_types, mapped_columns) return super()._build_schema_exp( table, target_columns_to_types, column_descriptions, expressions, is_view @@ -294,13 +326,13 @@ def _scd_type_2( self, target_table: TableName, source_table: QueryOrDF, - unique_key: t.Sequence[exp.Expression], + unique_key: t.Sequence[exp.Expr], valid_from_col: exp.Column, valid_to_col: exp.Column, execution_time: t.Union[TimeLike, exp.Column], invalidate_hard_deletes: bool = True, updated_at_col: t.Optional[exp.Column] = None, - check_columns: t.Optional[t.Union[exp.Star, t.Sequence[exp.Column]]] = None, + check_columns: t.Optional[t.Union[exp.Star, t.Sequence[exp.Expr]]] = None, updated_at_as_valid_from: bool = False, execution_time_as_valid_from: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, @@ -310,10 +342,15 @@ def _scd_type_2( source_columns: t.Optional[t.List[str]] = None, **kwargs: t.Any, ) -> None: + mapped_columns: t.Set[str] = set() + if target_columns_to_types: + target_columns_to_types, mapped_columns = self._apply_timestamp_mapping( + target_columns_to_types + ) if target_columns_to_types and "delta_lake" in self.get_catalog_type_from_table( target_table ): - target_columns_to_types = self._to_delta_ts(target_columns_to_types) + target_columns_to_types = self._to_delta_ts(target_columns_to_types, mapped_columns) return super()._scd_type_2( target_table, @@ -343,18 +380,21 @@ def _scd_type_2( # - `timestamp(3) with time zone` for timezone-aware # https://trino.io/docs/current/connector/delta-lake.html#delta-lake-to-trino-type-mapping def _to_delta_ts( - self, columns_to_types: t.Dict[str, exp.DataType] + self, + columns_to_types: t.Dict[str, exp.DataType], + skip_columns: t.Optional[t.Set[str]] = None, ) -> t.Dict[str, exp.DataType]: ts6 = exp.DataType.build("timestamp(6)") ts3_tz = exp.DataType.build("timestamp(3) with time zone") + skip = skip_columns or set() delta_columns_to_types = { - k: ts6 if v.is_type(exp.DataType.Type.TIMESTAMP) else v + k: ts6 if k not in skip and v.is_type(exp.DataType.Type.TIMESTAMP) else v for k, v in columns_to_types.items() } delta_columns_to_types = { - k: ts3_tz if v.is_type(exp.DataType.Type.TIMESTAMPTZ) else v + k: ts3_tz if k not in skip and v.is_type(exp.DataType.Type.TIMESTAMPTZ) else v for k, v in delta_columns_to_types.items() } @@ -369,7 +409,7 @@ def _create_schema( schema_name: SchemaName, ignore_if_exists: bool, warn_on_error: bool, - properties: t.List[exp.Expression], + properties: t.List[exp.Expr], kind: str, ) -> None: if mapped_location := self._schema_location(schema_name): @@ -386,7 +426,7 @@ def _create_schema( def _create_table( self, table_name_or_schema: t.Union[exp.Schema, TableName], - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], exists: bool = True, replace: bool = False, target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, diff --git a/sqlmesh/core/janitor.py b/sqlmesh/core/janitor.py new file mode 100644 index 0000000000..e050d6ef6c --- /dev/null +++ b/sqlmesh/core/janitor.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import typing as t + +from sqlglot import exp + +from sqlmesh.core.engine_adapter import EngineAdapter +from sqlmesh.core.console import Console +from sqlmesh.core.dialect import schema_ +from sqlmesh.core.environment import Environment +from sqlmesh.core.snapshot import SnapshotEvaluator +from sqlmesh.core.state_sync import StateSync +from sqlmesh.core.state_sync.common import ( + logger, + iter_expired_snapshot_batches, + RowBoundary, + ExpiredBatchRange, +) +from sqlmesh.utils.errors import SQLMeshError + + +def cleanup_expired_views( + default_adapter: EngineAdapter, + engine_adapters: t.Dict[str, EngineAdapter], + environments: t.List[Environment], + warn_on_delete_failure: bool = False, + console: t.Optional[Console] = None, +) -> None: + expired_schema_or_catalog_environments = [ + environment + for environment in environments + if environment.suffix_target.is_schema or environment.suffix_target.is_catalog + ] + expired_table_environments = [ + environment for environment in environments if environment.suffix_target.is_table + ] + + # We have to use the corresponding adapter if the virtual layer is gateway managed + def get_adapter(gateway_managed: bool, gateway: t.Optional[str] = None) -> EngineAdapter: + if gateway_managed and gateway: + return engine_adapters.get(gateway, default_adapter) + return default_adapter + + catalogs_to_drop: t.Set[t.Tuple[EngineAdapter, str]] = set() + schemas_to_drop: t.Set[t.Tuple[EngineAdapter, exp.Table]] = set() + + # Collect schemas and catalogs to drop + for engine_adapter, expired_catalog, expired_schema, suffix_target in { + ( + (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), + snapshot.qualified_view_name.catalog_for_environment( + environment.naming_info, dialect=engine_adapter.dialect + ), + snapshot.qualified_view_name.schema_for_environment( + environment.naming_info, dialect=engine_adapter.dialect + ), + environment.suffix_target, + ) + for environment in expired_schema_or_catalog_environments + for snapshot in environment.snapshots + if snapshot.is_model and not snapshot.is_symbolic + }: + if suffix_target.is_catalog: + if expired_catalog: + catalogs_to_drop.add((engine_adapter, expired_catalog)) + else: + schema = schema_(expired_schema, expired_catalog) + schemas_to_drop.add((engine_adapter, schema)) + + # Drop the views for the expired environments + for engine_adapter, expired_view in { + ( + (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), + snapshot.qualified_view_name.for_environment( + environment.naming_info, dialect=engine_adapter.dialect + ), + ) + for environment in expired_table_environments + for snapshot in environment.snapshots + if snapshot.is_model and not snapshot.is_symbolic + }: + try: + engine_adapter.drop_view(expired_view, ignore_if_not_exists=True) + if console: + console.update_cleanup_progress(expired_view) + except Exception as e: + message = f"Failed to drop the expired environment view '{expired_view}': {e}" + if warn_on_delete_failure: + logger.warning(message) + else: + raise SQLMeshError(message) from e + + # Drop the schemas for the expired environments + for engine_adapter, schema in schemas_to_drop: + try: + engine_adapter.drop_schema( + schema, + ignore_if_not_exists=True, + cascade=True, + ) + if console: + console.update_cleanup_progress(schema.sql(dialect=engine_adapter.dialect)) + except Exception as e: + message = f"Failed to drop the expired environment schema '{schema}': {e}" + if warn_on_delete_failure: + logger.warning(message) + else: + raise SQLMeshError(message) from e + + # Drop any catalogs that were associated with a snapshot where the engine adapter supports dropping catalogs + # catalogs_to_drop is only populated when environment_suffix_target is set to 'catalog' + for engine_adapter, catalog in catalogs_to_drop: + if engine_adapter.SUPPORTS_CREATE_DROP_CATALOG: + try: + engine_adapter.drop_catalog(catalog) + if console: + console.update_cleanup_progress(catalog) + except Exception as e: + message = f"Failed to drop the expired environment catalog '{catalog}': {e}" + if warn_on_delete_failure: + logger.warning(message) + else: + raise SQLMeshError(message) from e + + +def delete_expired_snapshots( + state_sync: StateSync, + snapshot_evaluator: SnapshotEvaluator, + *, + current_ts: int, + ignore_ttl: bool = False, + batch_size: t.Optional[int] = None, + console: t.Optional[Console] = None, +) -> None: + """Delete all expired snapshots in batches. + + This helper function encapsulates the logic for deleting expired snapshots in batches, + eliminating code duplication across different use cases. + + Args: + state_sync: StateSync instance to query and delete expired snapshots from. + snapshot_evaluator: SnapshotEvaluator instance to clean up tables associated with snapshots. + current_ts: Timestamp used to evaluate expiration. + ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). + batch_size: Maximum number of snapshots to fetch per batch. + console: Optional console for reporting progress. + + Returns: + The total number of deleted expired snapshots. + """ + num_expired_snapshots = 0 + for batch in iter_expired_snapshot_batches( + state_reader=state_sync, + current_ts=current_ts, + ignore_ttl=ignore_ttl, + batch_size=batch_size, + ): + end_info = ( + f"updated_ts={batch.batch_range.end.updated_ts}" + if isinstance(batch.batch_range.end, RowBoundary) + else f"limit={batch.batch_range.end.batch_size}" + ) + logger.info( + "Processing batch of size %s with end %s", + len(batch.expired_snapshot_ids), + end_info, + ) + snapshot_evaluator.cleanup( + target_snapshots=batch.cleanup_tasks, + on_complete=console.update_cleanup_progress if console else None, + ) + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=batch.batch_range.end, + ), + ignore_ttl=ignore_ttl, + ) + logger.info("Cleaned up expired snapshots batch") + num_expired_snapshots += len(batch.expired_snapshot_ids) + logger.info("Cleaned up %s expired snapshots", num_expired_snapshots) diff --git a/sqlmesh/core/lineage.py b/sqlmesh/core/lineage.py index 430a2437b6..8363979034 100644 --- a/sqlmesh/core/lineage.py +++ b/sqlmesh/core/lineage.py @@ -16,7 +16,7 @@ from sqlmesh.core.model import Model -CACHE: t.Dict[str, t.Tuple[int, exp.Expression, Scope]] = {} +CACHE: t.Dict[str, t.Tuple[int, exp.Expr, Scope]] = {} def lineage( @@ -25,8 +25,8 @@ def lineage( trim_selects: bool = True, **kwargs: t.Any, ) -> Node: - query = None - scope = None + query: t.Optional[exp.Expr] = None + scope: t.Optional[Scope] = None if model.name in CACHE: obj_id, query, scope = CACHE[model.name] @@ -66,6 +66,7 @@ def lineage( scope=scope, trim_selects=trim_selects, dialect=model.dialect, + copy=False, ) diff --git a/sqlmesh/core/linter/rules/builtin.py b/sqlmesh/core/linter/rules/builtin.py index f6bef4b4ef..4547ac0528 100644 --- a/sqlmesh/core/linter/rules/builtin.py +++ b/sqlmesh/core/linter/rules/builtin.py @@ -129,6 +129,21 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]: return self.violation() +class NoMissingUnitTest(Rule): + """All models must have a unit test found in the tests/ directory yaml files""" + + def check_model(self, model: Model) -> t.Optional[RuleViolation]: + # External models cannot have unit tests + if isinstance(model, ExternalModel): + return None + + if model.name not in self.context.models_with_tests: + return self.violation( + violation_msg=f"Model {model.name} is missing unit test(s). Please add in the tests/ directory." + ) + return None + + class NoMissingExternalModels(Rule): """All external models must be registered in the external_models.yaml file""" @@ -303,4 +318,4 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]: return None -BUILTIN_RULES = RuleSet(subclasses(__name__, Rule, (Rule,))) +BUILTIN_RULES = RuleSet(subclasses(__name__, Rule, exclude={Rule})) diff --git a/sqlmesh/core/loader.py b/sqlmesh/core/loader.py index 6647a2edba..4b7b1bac02 100644 --- a/sqlmesh/core/loader.py +++ b/sqlmesh/core/loader.py @@ -35,7 +35,7 @@ from sqlmesh.core.model import model as model_registry from sqlmesh.core.model.common import make_python_env from sqlmesh.core.signal import signal -from sqlmesh.core.test import ModelTestMetadata, filter_tests_by_patterns +from sqlmesh.core.test import ModelTestMetadata from sqlmesh.utils import UniqueKeyDict, sys_path from sqlmesh.utils.errors import ConfigError from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroExtractor @@ -64,6 +64,7 @@ class LoadedProject: excluded_requirements: t.Set[str] environment_statements: t.List[EnvironmentStatements] user_rules: RuleSet + model_test_metadata: t.List[ModelTestMetadata] class CacheBase(abc.ABC): @@ -243,6 +244,8 @@ def load(self) -> LoadedProject: user_rules = self._load_linting_rules() + model_test_metadata = self.load_model_tests() + project = LoadedProject( macros=macros, jinja_macros=jinja_macros, @@ -254,6 +257,7 @@ def load(self) -> LoadedProject: excluded_requirements=excluded_requirements, environment_statements=environment_statements, user_rules=user_rules, + model_test_metadata=model_test_metadata, ) return project @@ -423,9 +427,7 @@ def _load_linting_rules(self) -> RuleSet: """Loads user linting rules""" return RuleSet() - def load_model_tests( - self, tests: t.Optional[t.List[str]] = None, patterns: list[str] | None = None - ) -> t.List[ModelTestMetadata]: + def load_model_tests(self) -> t.List[ModelTestMetadata]: """Loads YAML-based model tests""" return [] @@ -838,7 +840,7 @@ def _load_linting_rules(self) -> RuleSet: if os.path.getsize(path): self._track_file(path) module = import_python_file(path, self.config_path) - module_rules = subclasses(module.__name__, Rule, (Rule,)) + module_rules = subclasses(module.__name__, Rule, exclude={Rule}) for user_rule in module_rules: user_rules[user_rule.name] = user_rule @@ -864,38 +866,23 @@ def _load_model_test_file(self, path: Path) -> dict[str, ModelTestMetadata]: return model_test_metadata - def load_model_tests( - self, tests: t.Optional[t.List[str]] = None, patterns: list[str] | None = None - ) -> t.List[ModelTestMetadata]: + def load_model_tests(self) -> t.List[ModelTestMetadata]: """Loads YAML-based model tests""" test_meta_list: t.List[ModelTestMetadata] = [] - if tests: - for test in tests: - filename, test_name = test.split("::", maxsplit=1) if "::" in test else (test, "") - - test_meta = self._load_model_test_file(Path(filename)) - if test_name: - test_meta_list.append(test_meta[test_name]) - else: - test_meta_list.extend(test_meta.values()) - else: - search_path = Path(self.config_path) / c.TESTS + search_path = Path(self.config_path) / c.TESTS - for yaml_file in itertools.chain( - search_path.glob("**/test*.yaml"), - search_path.glob("**/test*.yml"), + for yaml_file in itertools.chain( + search_path.glob("**/test*.yaml"), + search_path.glob("**/test*.yml"), + ): + if any( + yaml_file.match(ignore_pattern) + for ignore_pattern in self.config.ignore_patterns or [] ): - if any( - yaml_file.match(ignore_pattern) - for ignore_pattern in self.config.ignore_patterns or [] - ): - continue - - test_meta_list.extend(self._load_model_test_file(yaml_file).values()) + continue - if patterns: - test_meta_list = filter_tests_by_patterns(test_meta_list, patterns) + test_meta_list.extend(self._load_model_test_file(yaml_file).values()) return test_meta_list diff --git a/sqlmesh/core/macros.py b/sqlmesh/core/macros.py index 9e7df5d111..888acbb8eb 100644 --- a/sqlmesh/core/macros.py +++ b/sqlmesh/core/macros.py @@ -110,7 +110,7 @@ def _macro_sql(sql: str, into: t.Optional[str] = None) -> str: return f"self.parse_one({', '.join(args)})" -def _macro_func_sql(self: Generator, e: exp.Expression) -> str: +def _macro_func_sql(self: Generator, e: exp.Expr) -> str: func = e.this if isinstance(func, exp.Anonymous): @@ -128,6 +128,17 @@ def _macro_str_replace(text: str) -> str: return f"self.template({text}, locals())" +class CaseInsensitiveMapping(t.Dict[str, t.Any]): + def __init__(self, data: t.Dict[str, t.Any]) -> None: + super().__init__(data) + + def __getitem__(self, key: str) -> t.Any: + return super().__getitem__(key.lower()) + + def get(self, key: str, default: t.Any = None, /) -> t.Any: + return super().get(key.lower(), default) + + class MacroDialect(Python): class Generator(Python.Generator): TRANSFORMS = { @@ -167,7 +178,7 @@ def __init__( schema: t.Optional[MappingSchema] = None, runtime_stage: RuntimeStage = RuntimeStage.LOADING, resolve_table: t.Optional[t.Callable[[str | exp.Table], str]] = None, - resolve_tables: t.Optional[t.Callable[[exp.Expression], exp.Expression]] = None, + resolve_tables: t.Optional[t.Callable[[exp.Expr], exp.Expr]] = None, snapshots: t.Optional[t.Dict[str, Snapshot]] = None, default_catalog: t.Optional[str] = None, path: t.Optional[Path] = None, @@ -226,7 +237,7 @@ def __init__( def send( self, name: str, *args: t.Any, **kwargs: t.Any - ) -> t.Union[None, exp.Expression, t.List[exp.Expression]]: + ) -> t.Union[None, exp.Expr, t.List[exp.Expr]]: func = self.macros.get(normalize_macro_name(name)) if not callable(func): @@ -242,38 +253,36 @@ def send( + format_evaluated_code_exception(e, self.python_env) ) - def transform( - self, expression: exp.Expression - ) -> exp.Expression | t.List[exp.Expression] | None: + def transform(self, expression: exp.Expr) -> exp.Expr | t.List[exp.Expr] | None: changed = False def evaluate_macros( - node: exp.Expression, - ) -> exp.Expression | t.List[exp.Expression] | None: + node: exp.Expr, + ) -> exp.Expr | t.List[exp.Expr] | None: nonlocal changed if isinstance(node, MacroVar): changed = True variables = self.variables - if node.name not in self.locals and node.name.lower() not in variables: + # This makes all variables case-insensitive, e.g. @X is the same as @x. We do this + # for consistency, since `variables` and `blueprint_variables` are normalized. + var_name = node.name.lower() + + if var_name not in self.locals and var_name not in variables: if not isinstance(node.parent, StagedFilePath): raise SQLMeshError(f"Macro variable '{node.name}' is undefined.") return node # Precedence order is locals (e.g. @DEF) > blueprint variables > config variables - value = self.locals.get(node.name, variables.get(node.name.lower())) + value = self.locals.get(var_name, variables.get(var_name)) if isinstance(value, list): return exp.convert( - tuple( - self.transform(v) if isinstance(v, exp.Expression) else v for v in value - ) + tuple(self.transform(v) if isinstance(v, exp.Expr) else v for v in value) ) - return exp.convert( - self.transform(value) if isinstance(value, exp.Expression) else value - ) + return exp.convert(self.transform(value) if isinstance(value, exp.Expr) else value) if isinstance(node, exp.Identifier) and "@" in node.this: text = self.template(node.this, {}) if node.this != text: @@ -296,7 +305,7 @@ def evaluate_macros( self.parse_one(node.sql(dialect=self.dialect, copy=False)) for node in transformed ] - if isinstance(transformed, exp.Expression): + if isinstance(transformed, exp.Expr): return self.parse_one(transformed.sql(dialect=self.dialect, copy=False)) return transformed @@ -313,13 +322,18 @@ def template(self, text: t.Any, local_variables: t.Dict[str, t.Any]) -> str: """ # We try to convert all variables into sqlglot expressions because they're going to be converted # into strings; in sql we don't convert strings because that would result in adding quotes - mapping = { - k: convert_sql(v, self.dialect) + base_mapping = { + k.lower(): convert_sql(v, self.dialect) for k, v in chain(self.variables.items(), self.locals.items(), local_variables.items()) + if k.lower() + not in ( + "engine_adapter", + "snapshot", + ) } - return MacroStrTemplate(str(text)).safe_substitute(mapping) + return MacroStrTemplate(str(text)).safe_substitute(CaseInsensitiveMapping(base_mapping)) - def evaluate(self, node: MacroFunc) -> exp.Expression | t.List[exp.Expression] | None: + def evaluate(self, node: MacroFunc) -> exp.Expr | t.List[exp.Expr] | None: if isinstance(node, MacroDef): if isinstance(node.expression, exp.Lambda): _, fn = _norm_var_arg_lambda(self, node.expression) @@ -327,11 +341,13 @@ def evaluate(self, node: MacroFunc) -> exp.Expression | t.List[exp.Expression] | args[0] if len(args) == 1 else exp.Tuple(expressions=list(args)) ) else: - self.locals[node.name] = self.transform(node.expression) + # Make variables defined through `@DEF` case-insensitive + self.locals[node.name.lower()] = self.transform(node.expression) + return node if isinstance(node, (MacroSQL, MacroStrReplace)): - result: t.Optional[exp.Expression | t.List[exp.Expression]] = exp.convert( + result: t.Optional[exp.Expr | t.List[exp.Expr]] = exp.convert( self.eval_expression(node) ) else: @@ -399,7 +415,7 @@ def eval_expression(self, node: t.Any) -> t.Any: Returns: The return value of the evaled Python Code. """ - if not isinstance(node, exp.Expression): + if not isinstance(node, exp.Expr): return node code = node.sql() try: @@ -412,8 +428,8 @@ def eval_expression(self, node: t.Any) -> t.Any: ) def parse_one( - self, sql: str | exp.Expression, into: t.Optional[exp.IntoType] = None, **opts: t.Any - ) -> exp.Expression: + self, sql: str | exp.Expr, into: t.Optional[exp.IntoType] = None, **opts: t.Any + ) -> exp.Expr: """Parses the given SQL string and returns a syntax tree for the first parsed SQL statement. @@ -475,7 +491,7 @@ def resolve_table(self, table: str | exp.Table) -> str: ) return self._resolve_table(table) - def resolve_tables(self, query: exp.Expression) -> exp.Expression: + def resolve_tables(self, query: exp.Expr) -> exp.Expr: """Resolves queries with references to SQLMesh model names to their physical tables.""" if not self._resolve_tables: raise SQLMeshError( @@ -566,7 +582,7 @@ def variables(self) -> t.Dict[str, t.Any]: **self.locals.get(c.SQLMESH_BLUEPRINT_VARS_METADATA, {}), } - def _coerce(self, expr: exp.Expression, typ: t.Any, strict: bool = False) -> t.Any: + def _coerce(self, expr: exp.Expr, typ: t.Any, strict: bool = False) -> t.Any: """Coerces the given expression to the specified type on a best-effort basis.""" return _coerce(expr, typ, self.dialect, self._path, strict) @@ -626,11 +642,11 @@ def _norm_var_arg_lambda( """ def substitute( - node: exp.Expression, args: t.Dict[str, exp.Expression] - ) -> exp.Expression | t.List[exp.Expression] | None: + node: exp.Expr, args: t.Dict[str, exp.Expr] + ) -> exp.Expr | t.List[exp.Expr] | None: if isinstance(node, (exp.Identifier, exp.Var)): if not isinstance(node.parent, exp.Column): - name = node.name + name = node.name.lower() if name in args: return args[name].copy() if name in evaluator.locals: @@ -663,7 +679,7 @@ def substitute( return expressions, lambda args: func.this.transform( substitute, { - expression.name: arg + expression.name.lower(): arg for expression, arg in zip( func.expressions, args.expressions if isinstance(args, exp.Tuple) else [args] ) @@ -776,8 +792,8 @@ def filter_(evaluator: MacroEvaluator, *args: t.Any) -> t.List[t.Any]: def _optional_expression( evaluator: MacroEvaluator, condition: exp.Condition, - expression: exp.Expression, -) -> t.Optional[exp.Expression]: + expression: exp.Expr, +) -> t.Optional[exp.Expr]: """Inserts expression when the condition is True The following examples express the usage of this function in the context of the macros which wrap it. @@ -842,7 +858,7 @@ def star( suffix: exp.Literal = exp.Literal.string(""), quote_identifiers: exp.Boolean = exp.true(), except_: t.Union[exp.Array, exp.Tuple] = exp.Tuple(expressions=[]), -) -> t.List[exp.Alias]: +) -> t.List[exp.Expr]: """Returns a list of projections for the given relation. Args: @@ -917,7 +933,7 @@ def star( @macro() def generate_surrogate_key( evaluator: MacroEvaluator, - *fields: exp.Expression, + *fields: exp.Expr, hash_function: exp.Literal = exp.Literal.string("MD5"), ) -> exp.Func: """Generates a surrogate key (string) for the given fields. @@ -934,7 +950,7 @@ def generate_surrogate_key( >>> MacroEvaluator(dialect="bigquery").transform(parse_one(sql, dialect="bigquery")).sql("bigquery") "SELECT SHA256(CONCAT(COALESCE(CAST(a AS STRING), '_sqlmesh_surrogate_key_null_'), '|', COALESCE(CAST(b AS STRING), '_sqlmesh_surrogate_key_null_'), '|', COALESCE(CAST(c AS STRING), '_sqlmesh_surrogate_key_null_'))) FROM foo" """ - string_fields: t.List[exp.Expression] = [] + string_fields: t.List[exp.Expr] = [] for i, field in enumerate(fields): if i > 0: string_fields.append(exp.Literal.string("|")) @@ -958,7 +974,7 @@ def generate_surrogate_key( @macro() -def safe_add(_: MacroEvaluator, *fields: exp.Expression) -> exp.Case: +def safe_add(_: MacroEvaluator, *fields: exp.Expr) -> exp.Case: """Adds numbers together, substitutes nulls for 0s and only returns null if all fields are null. Example: @@ -976,7 +992,7 @@ def safe_add(_: MacroEvaluator, *fields: exp.Expression) -> exp.Case: @macro() -def safe_sub(_: MacroEvaluator, *fields: exp.Expression) -> exp.Case: +def safe_sub(_: MacroEvaluator, *fields: exp.Expr) -> exp.Case: """Subtract numbers, substitutes nulls for 0s and only returns null if all fields are null. Example: @@ -994,7 +1010,7 @@ def safe_sub(_: MacroEvaluator, *fields: exp.Expression) -> exp.Case: @macro() -def safe_div(_: MacroEvaluator, numerator: exp.Expression, denominator: exp.Expression) -> exp.Div: +def safe_div(_: MacroEvaluator, numerator: exp.Expr, denominator: exp.Expr) -> exp.Div: """Divides numbers, returns null if the denominator is 0. Example: @@ -1010,7 +1026,7 @@ def safe_div(_: MacroEvaluator, numerator: exp.Expression, denominator: exp.Expr @macro() def union( evaluator: MacroEvaluator, - *args: exp.Expression, + *args: exp.Expr, ) -> exp.Query: """Returns a UNION of the given tables. Only choosing columns that have the same name and type. @@ -1085,10 +1101,10 @@ def union( @macro() def haversine_distance( _: MacroEvaluator, - lat1: exp.Expression, - lon1: exp.Expression, - lat2: exp.Expression, - lon2: exp.Expression, + lat1: exp.Expr, + lon1: exp.Expr, + lat2: exp.Expr, + lon2: exp.Expr, unit: exp.Literal = exp.Literal.string("mi"), ) -> exp.Mul: """Returns the haversine distance between two points. @@ -1128,17 +1144,17 @@ def haversine_distance( def pivot( evaluator: MacroEvaluator, column: SQL, - values: t.List[SQL], + values: t.List[exp.Expr], alias: bool = True, - agg: exp.Expression = exp.Literal.string("SUM"), - cmp: exp.Expression = exp.Literal.string("="), - prefix: exp.Expression = exp.Literal.string(""), - suffix: exp.Expression = exp.Literal.string(""), + agg: exp.Expr = exp.Literal.string("SUM"), + cmp: exp.Expr = exp.Literal.string("="), + prefix: exp.Expr = exp.Literal.string(""), + suffix: exp.Expr = exp.Literal.string(""), then_value: SQL = SQL("1"), else_value: SQL = SQL("0"), quote: bool = True, distinct: bool = False, -) -> t.List[exp.Expression]: +) -> t.List[exp.Expr]: """Returns a list of projections as a result of pivoting the given column on the given values. Example: @@ -1146,23 +1162,23 @@ def pivot( >>> from sqlmesh.core.macros import MacroEvaluator >>> sql = "SELECT date_day, @PIVOT(status, ['cancelled', 'completed']) FROM rides GROUP BY 1" >>> MacroEvaluator().transform(parse_one(sql)).sql() - 'SELECT date_day, SUM(CASE WHEN status = \\'cancelled\\' THEN 1 ELSE 0 END) AS "\\'cancelled\\'", SUM(CASE WHEN status = \\'completed\\' THEN 1 ELSE 0 END) AS "\\'completed\\'" FROM rides GROUP BY 1' + 'SELECT date_day, SUM(CASE WHEN status = \\'cancelled\\' THEN 1 ELSE 0 END) AS "cancelled", SUM(CASE WHEN status = \\'completed\\' THEN 1 ELSE 0 END) AS "completed" FROM rides GROUP BY 1' >>> sql = "SELECT @PIVOT(a, ['v'], then_value := tv, suffix := '_sfx', quote := FALSE)" >>> MacroEvaluator(dialect="bigquery").transform(parse_one(sql)).sql("bigquery") - "SELECT SUM(CASE WHEN a = 'v' THEN tv ELSE 0 END) AS `v_sfx`" + "SELECT SUM(CASE WHEN a = 'v' THEN tv ELSE 0 END) AS v_sfx" """ - aggregates: t.List[exp.Expression] = [] + aggregates: t.List[exp.Expr] = [] for value in values: proj = f"{agg.name}(" if distinct: proj += "DISTINCT " - proj += f"CASE WHEN {column} {cmp.name} {value} THEN {then_value} ELSE {else_value} END) " - node = evaluator.parse_one(proj) + proj += f"CASE WHEN {column} {cmp.name} {value.sql(evaluator.dialect)} THEN {then_value} ELSE {else_value} END) " + node: exp.Expr = evaluator.parse_one(proj) if alias: node = node.as_( - f"{prefix.name}{value}{suffix.name}", + f"{prefix.name}{value.name}{suffix.name}", quoted=quote, copy=False, dialect=evaluator.dialect, @@ -1174,7 +1190,7 @@ def pivot( @macro("AND") -def and_(evaluator: MacroEvaluator, *expressions: t.Optional[exp.Expression]) -> exp.Condition: +def and_(evaluator: MacroEvaluator, *expressions: t.Optional[exp.Expr]) -> exp.Condition: """Returns an AND statement filtering out any NULL expressions.""" conditions = [e for e in expressions if not isinstance(e, exp.Null)] @@ -1185,7 +1201,7 @@ def and_(evaluator: MacroEvaluator, *expressions: t.Optional[exp.Expression]) -> @macro("OR") -def or_(evaluator: MacroEvaluator, *expressions: t.Optional[exp.Expression]) -> exp.Condition: +def or_(evaluator: MacroEvaluator, *expressions: t.Optional[exp.Expr]) -> exp.Condition: """Returns an OR statement filtering out any NULL expressions.""" conditions = [e for e in expressions if not isinstance(e, exp.Null)] @@ -1197,8 +1213,8 @@ def or_(evaluator: MacroEvaluator, *expressions: t.Optional[exp.Expression]) -> @macro("VAR") def var( - evaluator: MacroEvaluator, var_name: exp.Expression, default: t.Optional[exp.Expression] = None -) -> exp.Expression: + evaluator: MacroEvaluator, var_name: exp.Expr, default: t.Optional[exp.Expr] = None +) -> exp.Expr: """Returns the value of a variable or the default value if the variable is not set.""" if not var_name.is_string: raise SQLMeshError(f"Invalid variable name '{var_name.sql()}'. Expected a string literal.") @@ -1208,8 +1224,8 @@ def var( @macro("BLUEPRINT_VAR") def blueprint_var( - evaluator: MacroEvaluator, var_name: exp.Expression, default: t.Optional[exp.Expression] = None -) -> exp.Expression: + evaluator: MacroEvaluator, var_name: exp.Expr, default: t.Optional[exp.Expr] = None +) -> exp.Expr: """Returns the value of a blueprint variable or the default value if the variable is not set.""" if not var_name.is_string: raise SQLMeshError( @@ -1222,8 +1238,8 @@ def blueprint_var( @macro() def deduplicate( evaluator: MacroEvaluator, - relation: exp.Expression, - partition_by: t.List[exp.Expression], + relation: exp.Expr, + partition_by: t.List[exp.Expr], order_by: t.List[str], ) -> exp.Query: """Returns a QUERY to deduplicate rows within a table @@ -1279,9 +1295,9 @@ def deduplicate( @macro() def date_spine( evaluator: MacroEvaluator, - datepart: exp.Expression, - start_date: exp.Expression, - end_date: exp.Expression, + datepart: exp.Expr, + start_date: exp.Expr, + end_date: exp.Expr, ) -> exp.Select: """Returns a query that produces a date spine with the given datepart, and range of start_date and end_date. Useful for joining as a date lookup table. @@ -1469,7 +1485,7 @@ def _coerce( """Coerces the given expression to the specified type on a best-effort basis.""" base_err_msg = f"Failed to coerce expression '{expr}' to type '{typ}'." try: - if typ is None or typ is t.Any or not isinstance(expr, exp.Expression): + if typ is None or typ is t.Any or not isinstance(expr, exp.Expr): return expr base = t.get_origin(typ) or typ @@ -1481,7 +1497,7 @@ def _coerce( except Exception: pass raise SQLMeshError(base_err_msg) - if base is SQL and isinstance(expr, exp.Expression): + if base is SQL and isinstance(expr, exp.Expr): return expr.sql(dialect) if base is t.Literal: @@ -1506,7 +1522,7 @@ def _coerce( if isinstance(expr, base): return expr - if issubclass(base, exp.Expression): + if issubclass(base, exp.Expr): d = Dialect.get_or_raise(dialect) into = base if base in d.parser_class.EXPRESSION_PARSERS else None if into is None: @@ -1581,7 +1597,7 @@ def _convert_sql(v: t.Any, dialect: DialectType) -> t.Any: except Exception: pass - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): if (isinstance(v, exp.Column) and not v.table) or ( isinstance(v, exp.Identifier) or v.is_string ): diff --git a/sqlmesh/core/metric/definition.py b/sqlmesh/core/metric/definition.py index dd11cfd38d..70f10b2347 100644 --- a/sqlmesh/core/metric/definition.py +++ b/sqlmesh/core/metric/definition.py @@ -16,7 +16,7 @@ def load_metric_ddl( - expression: exp.Expression, dialect: t.Optional[str], path: Path = Path(), **kwargs: t.Any + expression: exp.Expr, dialect: t.Optional[str], path: Path = Path(), **kwargs: t.Any ) -> MetricMeta: """Returns a MetricMeta from raw Metric DDL.""" if not isinstance(expression, d.Metric): @@ -70,7 +70,7 @@ class MetricMeta(PydanticModel, frozen=True): name: str dialect: str - expression: exp.Expression + expression: exp.Expr description: t.Optional[str] = None owner: t.Optional[str] = None @@ -87,11 +87,11 @@ def _string_validator(cls, v: t.Any) -> t.Optional[str]: return str_or_exp_to_str(v) @field_validator("expression", mode="before") - def _validate_expression(cls, v: t.Any, info: ValidationInfo) -> exp.Expression: + def _validate_expression(cls, v: t.Any, info: ValidationInfo) -> exp.Expr: if isinstance(v, str): dialect = info.data.get("dialect") return d.parse_one(v, dialect=dialect) - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return v return v @@ -139,7 +139,7 @@ def to_metric( class Metric(MetricMeta, frozen=True): - expanded: exp.Expression + expanded: exp.Expr @property def aggs(self) -> t.Dict[exp.AggFunc, MeasureAndDimTables]: @@ -150,7 +150,7 @@ def aggs(self) -> t.Dict[exp.AggFunc, MeasureAndDimTables]: return { t.cast( exp.AggFunc, - t.cast(exp.Expression, agg.parent).transform( + t.cast(exp.Expr, agg.parent).transform( lambda node: ( exp.column(node.this, table=remove_namespace(node)) if isinstance(node, exp.Column) and node.table @@ -162,7 +162,7 @@ def aggs(self) -> t.Dict[exp.AggFunc, MeasureAndDimTables]: } @property - def formula(self) -> exp.Expression: + def formula(self) -> exp.Expr: """Returns the post aggregation formula of a metric. For simple metrics it is just the metric name. For derived metrics, @@ -181,7 +181,7 @@ def _raise_metric_config_error(msg: str, path: Path) -> None: raise ConfigError(f"{msg}. '{path}'") -def _get_measure_and_dim_tables(expression: exp.Expression) -> MeasureAndDimTables: +def _get_measure_and_dim_tables(expression: exp.Expr) -> MeasureAndDimTables: """Finds all the table references in a metric definition. Additionally ensure than the first table returned is the 'measure' or numeric value being aggregated. @@ -190,7 +190,7 @@ def _get_measure_and_dim_tables(expression: exp.Expression) -> MeasureAndDimTabl tables = {} measure_table = None - def is_measure(node: exp.Expression) -> bool: + def is_measure(node: exp.Expr) -> bool: parent = node.parent if isinstance(parent, exp.AggFunc) and node.arg_key == "this": diff --git a/sqlmesh/core/metric/rewriter.py b/sqlmesh/core/metric/rewriter.py index 3519a77e68..6c9ec429a8 100644 --- a/sqlmesh/core/metric/rewriter.py +++ b/sqlmesh/core/metric/rewriter.py @@ -34,13 +34,13 @@ def __init__( self.join_type = join_type self.semantic_name = f"{semantic_schema}.{semantic_table}" - def rewrite(self, expression: exp.Expression) -> exp.Expression: + def rewrite(self, expression: exp.Expr) -> exp.Expr: for select in list(expression.find_all(exp.Select)): self._expand(select) return expression - def _build_sources(self, projections: t.List[exp.Expression]) -> SourceAggsAndJoins: + def _build_sources(self, projections: t.List[exp.Expr]) -> SourceAggsAndJoins: sources: SourceAggsAndJoins = {} for projection in projections: @@ -57,7 +57,7 @@ def _build_sources(self, projections: t.List[exp.Expression]) -> SourceAggsAndJo return sources def _expand(self, select: exp.Select) -> None: - base = select.args["from"].this.find(exp.Table) + base = select.args["from_"].this.find(exp.Table) base_alias = base.alias_or_name base_name = exp.table_name(base) @@ -78,7 +78,7 @@ def _expand(self, select: exp.Select) -> None: explicit_joins = {exp.table_name(join.this): join for join in select.args.pop("joins", [])} for i, (name, (aggs, joins)) in enumerate(sources.items()): - source: exp.Expression = exp.to_table(name) + source: exp.Expr = exp.to_table(name) table_name = remove_namespace(name) if not isinstance(source, exp.Select): @@ -110,7 +110,7 @@ def _expand(self, select: exp.Select) -> None: copy=False, ) - for node in find_all_in_scope(query, (exp.Column, exp.TableAlias)): + for node in find_all_in_scope(query, exp.Column, exp.TableAlias): # type: ignore[arg-type,var-annotated] if isinstance(node, exp.Column): if node.table in mapping: node.set("table", exp.to_identifier(mapping[node.table])) @@ -123,7 +123,7 @@ def _add_joins( source: exp.Select, name: str, joins: t.Dict[str, t.Optional[exp.Join]], - group_by: t.List[exp.Expression], + group_by: t.List[exp.Expr], mapping: t.Dict[str, str], ) -> exp.Select: grain = [e.copy() for e in group_by] @@ -177,7 +177,7 @@ def _add_joins( return source.select(*grain, copy=False).group_by(*grain, copy=False) -def _replace_table(node: exp.Expression, table: str, base_alias: str) -> exp.Expression: +def _replace_table(node: exp.Expr, table: str, base_alias: str) -> exp.Expr: for column in find_all_in_scope(node, exp.Column): if column.table == base_alias: column.args["table"] = exp.to_identifier(table) @@ -185,11 +185,11 @@ def _replace_table(node: exp.Expression, table: str, base_alias: str) -> exp.Exp def rewrite( - sql: str | exp.Expression, + sql: str | exp.Expr, graph: ReferenceGraph, metrics: t.Dict[str, Metric], dialect: t.Optional[str] = "", -) -> exp.Expression: +) -> exp.Expr: rewriter = Rewriter(graph=graph, metrics=metrics, dialect=dialect) return optimize( diff --git a/sqlmesh/core/model/cache.py b/sqlmesh/core/model/cache.py index 774bfa402b..1f038c5d79 100644 --- a/sqlmesh/core/model/cache.py +++ b/sqlmesh/core/model/cache.py @@ -81,7 +81,7 @@ def get(self, name: str, entry_id: str = "") -> t.List[Model]: @dataclass class OptimizedQueryCacheEntry: - optimized_rendered_query: t.Optional[exp.Expression] + optimized_rendered_query: t.Optional[exp.Query] renderer_violations: t.Optional[t.Dict[type[Rule], t.Any]] diff --git a/sqlmesh/core/model/common.py b/sqlmesh/core/model/common.py index 0a55f80cee..ccde7624bd 100644 --- a/sqlmesh/core/model/common.py +++ b/sqlmesh/core/model/common.py @@ -33,8 +33,8 @@ def make_python_env( expressions: t.Union[ - exp.Expression, - t.List[t.Union[exp.Expression, t.Tuple[exp.Expression, bool]]], + exp.Expr, + t.List[t.Union[exp.Expr, t.Tuple[exp.Expr, bool]]], ], jinja_macro_references: t.Optional[t.Set[MacroReference]], module_path: Path, @@ -71,7 +71,7 @@ def make_python_env( visited_macro_funcs: t.Set[int] = set() def _is_metadata_var( - name: str, expression: exp.Expression, appears_in_metadata_expression: bool + name: str, expression: exp.Expr, appears_in_metadata_expression: bool ) -> t.Optional[bool]: is_metadata_so_far = used_variables.get(name, True) if is_metadata_so_far is False: @@ -202,7 +202,7 @@ def _is_metadata_macro(name: str, appears_in_metadata_expression: bool) -> bool: def _extract_macro_func_variable_references( - macro_func: exp.Expression, + macro_func: exp.Expr, is_metadata: bool, ) -> t.Tuple[t.Set[str], t.Dict[int, bool], t.Set[int]]: var_references = set() @@ -255,7 +255,7 @@ def _add_variables_to_python_env( # - appear in metadata-only expressions, such as `audits (...)`, virtual statements, etc # - appear in the ASTs or definitions of metadata-only macros # - # See also: https://github.com/TobikoData/sqlmesh/pull/4936#issuecomment-3136339936, + # See also: https://github.com/SQLMesh/sqlmesh/pull/4936#issuecomment-3136339936, # specifically the "Terminology" and "Observations" section. metadata_used_variables = { var_name for var_name, is_metadata in used_variables.items() if is_metadata @@ -275,7 +275,7 @@ def _add_variables_to_python_env( if overlapping_variables := (non_metadata_used_variables & metadata_used_variables): raise ConfigError( f"Variables {', '.join(overlapping_variables)} are both metadata and non-metadata, " - "which is unexpected. Please file an issue at https://github.com/TobikoData/sqlmesh/issues/new." + "which is unexpected. Please file an issue at https://github.com/SQLMesh/sqlmesh/issues/new." ) metadata_variables = { @@ -292,12 +292,12 @@ def _add_variables_to_python_env( if blueprint_variables: metadata_blueprint_variables = { - k: SqlValue(sql=v.sql(dialect=dialect)) if isinstance(v, exp.Expression) else v + k: SqlValue(sql=v.sql(dialect=dialect)) if isinstance(v, exp.Expr) else v for k, v in blueprint_variables.items() if k in metadata_used_variables } blueprint_variables = { - k.lower(): SqlValue(sql=v.sql(dialect=dialect)) if isinstance(v, exp.Expression) else v + k.lower(): SqlValue(sql=v.sql(dialect=dialect)) if isinstance(v, exp.Expr) else v for k, v in blueprint_variables.items() if k in non_metadata_used_variables } @@ -469,9 +469,9 @@ def single_value_or_tuple(values: t.Sequence) -> exp.Identifier | exp.Tuple: def parse_expression( cls: t.Type, - v: t.Union[t.List[str], t.List[exp.Expression], str, exp.Expression, t.Callable, None], + v: t.Union[t.List[str], t.List[exp.Expr], str, exp.Expr, t.Callable, None], info: t.Optional[ValidationInfo], -) -> t.List[exp.Expression] | exp.Expression | t.Callable | None: +) -> t.List[exp.Expr] | exp.Expr | t.Callable | None: """Helper method to deserialize SQLGlot expressions in Pydantic Models.""" if v is None: return None @@ -483,7 +483,7 @@ def parse_expression( if isinstance(v, list): return [ - e if isinstance(e, exp.Expression) else d.parse_one(e, dialect=dialect) + e if isinstance(e, exp.Expr) else d.parse_one(e, dialect=dialect) # type: ignore[misc] for e in v if not isinstance(e, exp.Semicolon) ] @@ -498,7 +498,7 @@ def parse_expression( def parse_bool(v: t.Any) -> bool: - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): if not isinstance(v, exp.Boolean): from sqlglot.optimizer.simplify import simplify @@ -524,7 +524,7 @@ def parse_properties( if isinstance(v, str): v = d.parse_one(v, dialect=dialect) if isinstance(v, (exp.Array, exp.Paren, exp.Tuple)): - eq_expressions: t.List[exp.Expression] = ( + eq_expressions: t.List[exp.Expr] = ( [v.unnest()] if isinstance(v, exp.Paren) else v.expressions ) @@ -641,6 +641,7 @@ def parse_strings_with_macro_refs(value: t.Any, dialect: DialectType) -> t.Any: "physical_properties_", "virtual_properties_", "materialization_properties_", + "grants_", mode="before", check_fields=False, )(parse_properties) @@ -662,19 +663,20 @@ def parse_strings_with_macro_refs(value: t.Any, dialect: DialectType) -> t.Any: class ParsableSql(PydanticModel): sql: str + transaction: t.Optional[bool] = None - _parsed: t.Optional[exp.Expression] = None + _parsed: t.Optional[exp.Expr] = None _parsed_dialect: t.Optional[str] = None - def parse(self, dialect: str) -> exp.Expression: + def parse(self, dialect: str) -> exp.Expr: if self._parsed is None or self._parsed_dialect != dialect: self._parsed = d.parse_one(self.sql, dialect=dialect) self._parsed_dialect = dialect - return self._parsed + return self._parsed # type: ignore[return-value] @classmethod def from_parsed_expression( - cls, parsed_expression: exp.Expression, dialect: str, use_meta_sql: bool = False + cls, parsed_expression: exp.Expr, dialect: str, use_meta_sql: bool = False ) -> ParsableSql: sql = ( parsed_expression.meta.get("sql") or parsed_expression.sql(dialect=dialect) @@ -695,7 +697,7 @@ def _validate_parsable_sql( return v if isinstance(v, str): return ParsableSql(sql=v) - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return ParsableSql.from_parsed_expression( v, get_dialect(info.data), use_meta_sql=False ) @@ -705,7 +707,7 @@ def _validate_parsable_sql( ParsableSql(sql=s) if isinstance(s, str) else ParsableSql.from_parsed_expression(s, dialect, use_meta_sql=False) - if isinstance(s, exp.Expression) + if isinstance(s, exp.Expr) else ParsableSql.parse_obj(s) for s in v ] diff --git a/sqlmesh/core/model/decorator.py b/sqlmesh/core/model/decorator.py index 73452cc165..328b763f9f 100644 --- a/sqlmesh/core/model/decorator.py +++ b/sqlmesh/core/model/decorator.py @@ -193,7 +193,7 @@ def model( ) rendered_name = rendered_fields["name"] - if isinstance(rendered_name, exp.Expression): + if isinstance(rendered_name, exp.Expr): rendered_fields["name"] = rendered_name.sql(dialect=dialect) rendered_defaults = ( diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py index dbbb8ff3a8..d4f23b4fc0 100644 --- a/sqlmesh/core/model/definition.py +++ b/sqlmesh/core/model/definition.py @@ -34,6 +34,7 @@ ) from sqlmesh.core.model.meta import ModelMeta from sqlmesh.core.model.kind import ( + ExternalKind, ModelKindName, SeedKind, ModelKind, @@ -67,6 +68,7 @@ from sqlmesh.core.context import ExecutionContext from sqlmesh.core.engine_adapter import EngineAdapter from sqlmesh.core.engine_adapter._typing import QueryOrDF + from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.linter.rule import Rule from sqlmesh.core.snapshot import DeployabilityIndex, Node, Snapshot from sqlmesh.utils.jinja import MacroReference @@ -213,7 +215,7 @@ def render_definition( include_python: bool = True, include_defaults: bool = False, render_query: bool = False, - ) -> t.List[exp.Expression]: + ) -> t.List[exp.Expr]: """Returns the original list of sql expressions comprising the model definition. Args: @@ -362,8 +364,9 @@ def render_pre_statements( expand: t.Iterable[str] = tuple(), deployability_index: t.Optional[DeployabilityIndex] = None, engine_adapter: t.Optional[EngineAdapter] = None, + inside_transaction: t.Optional[bool] = True, **kwargs: t.Any, - ) -> t.List[exp.Expression]: + ) -> t.List[exp.Expr]: """Renders pre-statements for a model. Pre-statements are statements that preceded the model's SELECT query. @@ -383,7 +386,11 @@ def render_pre_statements( The list of rendered expressions. """ return self._render_statements( - self.pre_statements, + [ + stmt + for stmt in self.pre_statements + if stmt.args.get("transaction", True) == inside_transaction + ], start=start, end=end, execution_time=execution_time, @@ -404,8 +411,9 @@ def render_post_statements( expand: t.Iterable[str] = tuple(), deployability_index: t.Optional[DeployabilityIndex] = None, engine_adapter: t.Optional[EngineAdapter] = None, + inside_transaction: t.Optional[bool] = True, **kwargs: t.Any, - ) -> t.List[exp.Expression]: + ) -> t.List[exp.Expr]: """Renders post-statements for a model. Post-statements are statements that follow after the model's SELECT query. @@ -419,13 +427,18 @@ def render_post_statements( that depend on materialized tables. Model definitions are inlined and can thus be run end to end on the fly. deployability_index: Determines snapshots that are deployable in the context of this render. + inside_transaction: Whether to render hooks with transaction=True (inside) or transaction=False (outside). kwargs: Additional kwargs to pass to the renderer. Returns: The list of rendered expressions. """ return self._render_statements( - self.post_statements, + [ + stmt + for stmt in self.post_statements + if stmt.args.get("transaction", True) == inside_transaction + ], start=start, end=end, execution_time=execution_time, @@ -447,7 +460,7 @@ def render_on_virtual_update( deployability_index: t.Optional[DeployabilityIndex] = None, engine_adapter: t.Optional[EngineAdapter] = None, **kwargs: t.Any, - ) -> t.List[exp.Expression]: + ) -> t.List[exp.Expr]: return self._render_statements( self.on_virtual_update, start=start, @@ -539,15 +552,15 @@ def render_audit_query( return rendered_query @property - def pre_statements(self) -> t.List[exp.Expression]: + def pre_statements(self) -> t.List[exp.Expr]: return self._get_parsed_statements("pre_statements_") @property - def post_statements(self) -> t.List[exp.Expression]: + def post_statements(self) -> t.List[exp.Expr]: return self._get_parsed_statements("post_statements_") @property - def on_virtual_update(self) -> t.List[exp.Expression]: + def on_virtual_update(self) -> t.List[exp.Expr]: return self._get_parsed_statements("on_virtual_update_") @property @@ -559,22 +572,24 @@ def macro_definitions(self) -> t.List[d.MacroDef]: if isinstance(s, d.MacroDef) ] - def _get_parsed_statements(self, attr_name: str) -> t.List[exp.Expression]: + def _get_parsed_statements(self, attr_name: str) -> t.List[exp.Expr]: value = getattr(self, attr_name) if not value: return [] result = [] for v in value: parsed = v.parse(self.dialect) + if getattr(v, "transaction", None) is not None: + parsed.set("transaction", v.transaction) if not isinstance(parsed, exp.Semicolon): result.append(parsed) return result def _render_statements( self, - statements: t.Iterable[exp.Expression], + statements: t.Iterable[exp.Expr], **kwargs: t.Any, - ) -> t.List[exp.Expression]: + ) -> t.List[exp.Expr]: rendered = ( self._statement_renderer(statement).render(**kwargs) for statement in statements @@ -582,7 +597,7 @@ def _render_statements( ) return [r for expressions in rendered if expressions for r in expressions] - def _statement_renderer(self, expression: exp.Expression) -> ExpressionRenderer: + def _statement_renderer(self, expression: exp.Expr) -> ExpressionRenderer: expression_key = id(expression) if expression_key not in self._statement_renderer_cache: self._statement_renderer_cache[expression_key] = ExpressionRenderer( @@ -616,7 +631,7 @@ def render_signals( The list of rendered expressions. """ - def _render(e: exp.Expression) -> str | int | float | bool: + def _render(e: exp.Expr) -> str | int | float | bool: rendered_exprs = ( self._create_renderer(e).render(start=start, end=end, execution_time=execution_time) or [] @@ -661,7 +676,7 @@ def render_merge_filter( start: t.Optional[TimeLike] = None, end: t.Optional[TimeLike] = None, execution_time: t.Optional[TimeLike] = None, - ) -> t.Optional[exp.Expression]: + ) -> t.Optional[exp.Expr]: if self.merge_filter is None: return None rendered_exprs = ( @@ -675,9 +690,9 @@ def render_merge_filter( return rendered_exprs[0].transform(d.replace_merge_table_aliases, dialect=self.dialect) def _render_properties( - self, properties: t.Dict[str, exp.Expression] | SessionProperties, **render_kwargs: t.Any + self, properties: t.Dict[str, exp.Expr] | SessionProperties, **render_kwargs: t.Any ) -> t.Dict[str, t.Any]: - def _render(expression: exp.Expression) -> exp.Expression | None: + def _render(expression: exp.Expr) -> exp.Expr | None: # note: we use the _statement_renderer instead of _create_renderer because it sets model_fqn which # in turn makes @this_model available in the evaluation context rendered_exprs = self._statement_renderer(expression).render(**render_kwargs) @@ -699,7 +714,7 @@ def _render(expression: exp.Expression) -> exp.Expression | None: return { k: rendered for k, v in properties.items() - if (rendered := (_render(v) if isinstance(v, exp.Expression) else v)) + if (rendered := (_render(v) if isinstance(v, exp.Expr) else v)) } def render_physical_properties(self, **render_kwargs: t.Any) -> t.Dict[str, t.Any]: @@ -711,7 +726,7 @@ def render_virtual_properties(self, **render_kwargs: t.Any) -> t.Dict[str, t.Any def render_session_properties(self, **render_kwargs: t.Any) -> t.Dict[str, t.Any]: return self._render_properties(properties=self.session_properties, **render_kwargs) - def _create_renderer(self, expression: exp.Expression) -> ExpressionRenderer: + def _create_renderer(self, expression: exp.Expr) -> ExpressionRenderer: return ExpressionRenderer( expression, self.dialect, @@ -738,7 +753,7 @@ def ctas_query(self, **render_kwarg: t.Any) -> exp.Query: query = self.render_query_or_raise(**render_kwarg).limit(0) for select_or_set_op in query.find_all(exp.Select, exp.SetOperation): - if isinstance(select_or_set_op, exp.Select) and select_or_set_op.args.get("from"): + if isinstance(select_or_set_op, exp.Select) and select_or_set_op.args.get("from_"): select_or_set_op.where(exp.false(), copy=False) if self.managed_columns: @@ -807,7 +822,7 @@ def set_time_format(self, default_time_format: str = c.DEFAULT_TIME_COLUMN_FORMA def convert_to_time_column( self, time: TimeLike, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None - ) -> exp.Expression: + ) -> exp.Expr: """Convert a TimeLike object to the same time format and type as the model's time column.""" if self.time_column: if columns_to_types is None: @@ -955,7 +970,7 @@ def validate_definition(self) -> None: col.name for expr in values for col in t.cast( - exp.Expression, exp.maybe_parse(expr, dialect=self.dialect) + exp.Expr, exp.maybe_parse(expr, dialect=self.dialect) ).find_all(exp.Column) ] @@ -1021,6 +1036,13 @@ def validate_definition(self) -> None: # Will raise if the custom materialization points to an invalid class get_custom_materialization_type_or_raise(self.kind.materialization) + # Embedded model kind shouldn't have audits + if self.kind.name == ModelKindName.EMBEDDED and self.audits: + raise_config_error( + "Audits are not supported for embedded models", + self._path, + ) + def is_breaking_change(self, previous: Model) -> t.Optional[bool]: """Determines whether this model is a breaking change in relation to the `previous` model. @@ -1186,6 +1208,8 @@ def metadata_hash(self) -> str: gen(self.session_properties_) if self.session_properties_ else None, *[gen(g) for g in self.grains], *self._audit_metadata_hash_values(), + json.dumps(self.grants, sort_keys=True) if self.grants else None, + self.grants_target_layer, ] for key, value in (self.virtual_properties or {}).items(): @@ -1197,6 +1221,9 @@ def metadata_hash(self) -> str: for k, v in sorted(args.items()): metadata.append(f"{k}:{gen(v)}") + if self.dbt_node_info: + metadata.append(self.dbt_node_info.json(sort_keys=True)) + metadata.extend(self._additional_metadata) self._metadata_hash = hash_data(metadata) @@ -1207,6 +1234,24 @@ def is_model(self) -> bool: """Return True if this is a model node""" return True + @property + def grants_table_type(self) -> DataObjectType: + """Get the table type for grants application (TABLE, VIEW, MATERIALIZED_VIEW). + + Returns: + The DataObjectType that should be used when applying grants to this model. + """ + from sqlmesh.core.engine_adapter.shared import DataObjectType + + if self.kind.is_view: + if hasattr(self.kind, "materialized") and getattr(self.kind, "materialized", False): + return DataObjectType.MATERIALIZED_VIEW + return DataObjectType.VIEW + if self.kind.is_managed: + return DataObjectType.MANAGED_TABLE + # All other materialized models are tables + return DataObjectType.TABLE + @property def _additional_metadata(self) -> t.List[str]: additional_metadata = [] @@ -1221,7 +1266,7 @@ def _additional_metadata(self) -> t.List[str]: return additional_metadata - def _is_metadata_statement(self, statement: exp.Expression) -> bool: + def _is_metadata_statement(self, statement: exp.Expr) -> bool: if isinstance(statement, d.MacroDef): return True if isinstance(statement, d.MacroFunc): @@ -1250,7 +1295,7 @@ def full_depends_on(self) -> t.Set[str]: return self._full_depends_on @property - def partitioned_by(self) -> t.List[exp.Expression]: + def partitioned_by(self) -> t.List[exp.Expr]: """Columns to partition the model by, including the time column if it is not already included.""" if self.time_column and not self._is_time_column_in_partitioned_by: # This allows the user to opt out of automatic time_column injection @@ -1278,7 +1323,7 @@ def partition_interval_unit(self) -> t.Optional[IntervalUnit]: return None @property - def audits_with_args(self) -> t.List[t.Tuple[Audit, t.Dict[str, exp.Expression]]]: + def audits_with_args(self) -> t.List[t.Tuple[Audit, t.Dict[str, exp.Expr]]]: from sqlmesh.core.audit.builtin import BUILT_IN_AUDITS audits_by_name = {**BUILT_IN_AUDITS, **self.audit_definitions} @@ -1377,8 +1422,8 @@ def render_definition( include_python: bool = True, include_defaults: bool = False, render_query: bool = False, - ) -> t.List[exp.Expression]: - result = super().render_definition( + ) -> t.List[exp.Expr]: + result: t.List[exp.Expr] = super().render_definition( include_python=include_python, include_defaults=include_defaults ) @@ -1820,6 +1865,12 @@ def _data_hash_values_no_sql(self) -> t.List[str]: for column_name, column_hash in self.column_hashes.items(): data.append(column_name) data.append(column_hash) + + # Include grants in data hash for seed models to force recreation on grant changes + # since seed models don't support migration + data.append(json.dumps(self.grants, sort_keys=True) if self.grants else "") + data.append(self.grants_target_layer) + return data @@ -1895,7 +1946,7 @@ def render_definition( include_python: bool = True, include_defaults: bool = False, render_query: bool = False, - ) -> t.List[exp.Expression]: + ) -> t.List[exp.Expr]: # Ignore the provided value for the include_python flag, since the Pyhon model's # definition without Python code is meaningless. return super().render_definition( @@ -1919,6 +1970,7 @@ def _data_hash_values_no_sql(self) -> t.List[str]: class ExternalModel(_Model): """The model definition which represents an external source/table.""" + kind: ModelKind = ExternalKind() source_type: t.Literal["external"] = "external" def is_breaking_change(self, previous: Model) -> t.Optional[bool]: @@ -1949,7 +2001,7 @@ class AuditResult(PydanticModel): """The model this audit is for.""" count: t.Optional[int] = None """The number of records returned by the audit query. This could be None if the audit was skipped.""" - query: t.Optional[exp.Expression] = None + query: t.Optional[exp.Expr] = None """The rendered query used by the audit. This could be None if the audit was skipped.""" skipped: bool = False """Whether or not the audit was blocking. This can be overriden by the user.""" @@ -1957,7 +2009,7 @@ class AuditResult(PydanticModel): class EvaluatableSignals(PydanticModel): - signals_to_kwargs: t.Dict[str, t.Dict[str, t.Optional[exp.Expression]]] + signals_to_kwargs: t.Dict[str, t.Dict[str, t.Optional[exp.Expr]]] """A mapping of signal names to the kwargs passed to the signal.""" python_env: t.Dict[str, Executable] """The Python environment that should be used to evaluated the rendered signal calls.""" @@ -2002,7 +2054,7 @@ def _extract_blueprint_variables(blueprint: t.Any, path: Path) -> t.Dict[str, t. def create_models_from_blueprints( - gateway: t.Optional[str | exp.Expression], + gateway: t.Optional[str | exp.Expr], blueprints: t.Any, get_variables: t.Callable[[t.Optional[str]], t.Dict[str, str]], loader: t.Callable[..., Model], @@ -2013,7 +2065,9 @@ def create_models_from_blueprints( **loader_kwargs: t.Any, ) -> t.List[Model]: model_blueprints: t.List[Model] = [] + original_default_catalog = loader_kwargs.get("default_catalog") for blueprint in _extract_blueprints(blueprints, path): + loader_kwargs["default_catalog"] = original_default_catalog blueprint_variables = _extract_blueprint_variables(blueprint, path) if gateway: @@ -2031,12 +2085,15 @@ def create_models_from_blueprints( else: gateway_name = None - if ( - default_catalog_per_gateway - and gateway_name - and (catalog := default_catalog_per_gateway.get(gateway_name)) is not None - ): - loader_kwargs["default_catalog"] = catalog + if default_catalog_per_gateway and gateway_name: + catalog = default_catalog_per_gateway.get(gateway_name) + if catalog is not None: + loader_kwargs["default_catalog"] = catalog + else: + # Gateway exists but has no entry in the dict (e.g., catalog-unsupported + # engines like ClickHouse). Clear the default catalog so the global + # default from the primary gateway doesn't leak into this model's name. + loader_kwargs["default_catalog"] = None model_blueprints.append( loader( @@ -2053,7 +2110,7 @@ def create_models_from_blueprints( def load_sql_based_models( - expressions: t.List[exp.Expression], + expressions: t.List[exp.Expr], get_variables: t.Callable[[t.Optional[str]], t.Dict[str, str]], path: Path = Path(), module_path: Path = Path(), @@ -2061,8 +2118,8 @@ def load_sql_based_models( default_catalog_per_gateway: t.Optional[t.Dict[str, str]] = None, **loader_kwargs: t.Any, ) -> t.List[Model]: - gateway: t.Optional[exp.Expression] = None - blueprints: t.Optional[exp.Expression] = None + gateway: t.Optional[exp.Expr] = None + blueprints: t.Optional[exp.Expr] = None model_meta = seq_get(expressions, 0) for prop in (isinstance(model_meta, d.Model) and model_meta.expressions) or []: @@ -2108,7 +2165,7 @@ def load_sql_based_models( def load_sql_based_model( - expressions: t.List[exp.Expression], + expressions: t.List[exp.Expr], *, defaults: t.Optional[t.Dict[str, t.Any]] = None, path: t.Optional[Path] = None, @@ -2254,7 +2311,7 @@ def load_sql_based_model( if kind_prop.name.lower() == "merge_filter": meta_fields["kind"].expressions[idx] = unrendered_merge_filter - if isinstance(meta_fields.get("dialect"), exp.Expression): + if isinstance(meta_fields.get("dialect"), exp.Expr): meta_fields["dialect"] = meta_fields["dialect"].name # The name of the model will be inferred from its path relative to `models/`, if it's not explicitly specified @@ -2315,7 +2372,7 @@ def load_sql_based_model( def create_sql_model( name: TableName, - query: t.Optional[exp.Expression], + query: t.Optional[exp.Expr], **kwargs: t.Any, ) -> Model: """Creates a SQL model. @@ -2440,7 +2497,7 @@ def create_python_model( ) depends_on = { dep.sql(dialect=dialect) - for dep in t.cast(t.List[exp.Expression], depends_on_rendered)[0].expressions + for dep in t.cast(t.List[exp.Expr], depends_on_rendered)[0].expressions } used_variables = {k: v for k, v in (variables or {}).items() if k in referenced_variables} @@ -2545,7 +2602,7 @@ def _create_model( if not issubclass(klass, SqlModel): defaults.pop("optimize_query", None) - statements: t.List[t.Union[exp.Expression, t.Tuple[exp.Expression, bool]]] = [] + statements: t.List[t.Union[exp.Expr, t.Tuple[exp.Expr, bool]]] = [] if "query" in kwargs: statements.append(kwargs["query"]) @@ -2562,9 +2619,17 @@ def _create_model( if statement_field in kwargs: # Macros extracted from these statements need to be treated as metadata only is_metadata = statement_field == "on_virtual_update" - statements.extend((stmt, is_metadata) for stmt in kwargs[statement_field]) + for stmt in kwargs[statement_field]: + # Extract the expression if it's ParsableSql already + expr = stmt.parse(dialect) if isinstance(stmt, ParsableSql) else stmt + statements.append((expr, is_metadata)) kwargs[statement_field] = [ - ParsableSql.from_parsed_expression(stmt, dialect, use_meta_sql=use_original_sql) + # this to retain the transaction information + stmt + if isinstance(stmt, ParsableSql) + else ParsableSql.from_parsed_expression( + stmt, dialect, use_meta_sql=use_original_sql + ) for stmt in kwargs[statement_field] ] @@ -2576,11 +2641,11 @@ def _create_model( if isinstance(property_values, exp.Tuple): statements.extend(property_values.expressions) - if isinstance(getattr(kwargs.get("kind"), "merge_filter", None), exp.Expression): + if isinstance(getattr(kwargs.get("kind"), "merge_filter", None), exp.Expr): statements.append(kwargs["kind"].merge_filter) jinja_macro_references, referenced_variables = extract_macro_references_and_variables( - *(gen(e if isinstance(e, exp.Expression) else e[0]) for e in statements) + *(gen(e if isinstance(e, exp.Expr) else e[0]) for e in statements) ) if jinja_macros: @@ -2627,7 +2692,7 @@ def _create_model( model.audit_definitions.update(audit_definitions) # Any macro referenced in audits or signals needs to be treated as metadata-only - statements.extend((audit.query, True) for audit in audit_definitions.values()) + statements.extend((audit.query, True) for audit in audit_definitions.values()) # type: ignore[misc] # Ensure that all audits referenced in the model are defined from sqlmesh.core.audit.builtin import BUILT_IN_AUDITS @@ -2683,14 +2748,14 @@ def _create_model( def _split_sql_model_statements( - expressions: t.List[exp.Expression], + expressions: t.List[exp.Expr], path: t.Optional[Path], dialect: t.Optional[str] = None, ) -> t.Tuple[ - t.Optional[exp.Expression], - t.List[exp.Expression], - t.List[exp.Expression], - t.List[exp.Expression], + t.Optional[exp.Expr], + t.List[exp.Expr], + t.List[exp.Expr], + t.List[exp.Expr], UniqueKeyDict[str, ModelAudit], ]: """Extracts the SELECT query from a sequence of expressions. @@ -2751,8 +2816,8 @@ def _split_sql_model_statements( def _resolve_properties( default: t.Optional[t.Dict[str, t.Any]], - provided: t.Optional[exp.Expression | t.Dict[str, t.Any]], -) -> t.Optional[exp.Expression]: + provided: t.Optional[exp.Expr | t.Dict[str, t.Any]], +) -> t.Optional[exp.Expr]: if isinstance(provided, dict): properties = {k: exp.Literal.string(k).eq(v) for k, v in provided.items()} elif provided: @@ -2774,7 +2839,7 @@ def _resolve_properties( return None -def _list_of_calls_to_exp(value: t.List[t.Tuple[str, t.Dict[str, t.Any]]]) -> exp.Expression: +def _list_of_calls_to_exp(value: t.List[t.Tuple[str, t.Dict[str, t.Any]]]) -> exp.Expr: return exp.Tuple( expressions=[ exp.Anonymous( @@ -2789,16 +2854,16 @@ def _list_of_calls_to_exp(value: t.List[t.Tuple[str, t.Dict[str, t.Any]]]) -> ex ) -def _is_projection(expr: exp.Expression) -> bool: +def _is_projection(expr: exp.Expr) -> bool: parent = expr.parent return isinstance(parent, exp.Select) and expr.arg_key == "expressions" -def _single_expr_or_tuple(values: t.Sequence[exp.Expression]) -> exp.Expression | exp.Tuple: +def _single_expr_or_tuple(values: t.Sequence[exp.Expr]) -> exp.Expr | exp.Tuple: return values[0] if len(values) == 1 else exp.Tuple(expressions=values) -def _refs_to_sql(values: t.Any) -> exp.Expression: +def _refs_to_sql(values: t.Any) -> exp.Expr: return exp.Tuple(expressions=values) @@ -2814,7 +2879,7 @@ def render_meta_fields( blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, ) -> t.Dict[str, t.Any]: def render_field_value(value: t.Any) -> t.Any: - if isinstance(value, exp.Expression) or (isinstance(value, str) and "@" in value): + if isinstance(value, exp.Expr) or (isinstance(value, str) and "@" in value): expression = exp.maybe_parse(value, dialect=dialect) rendered_expr = render_expression( expression=expression, @@ -2866,6 +2931,13 @@ def render_field_value(value: t.Any) -> t.Any: for key, value in field_value.items(): if key in RUNTIME_RENDERED_MODEL_FIELDS: rendered_dict[key] = parse_strings_with_macro_refs(value, dialect) + elif ( + # don't parse kind auto_restatement_cron="@..." kwargs (e.g. @daily) into MacroVar + key == "auto_restatement_cron" + and isinstance(value, str) + and value.lower() in CRON_SHORTCUTS + ): + rendered_dict[key] = value elif (rendered := render_field_value(value)) is not None: rendered_dict[key] = rendered @@ -2944,7 +3016,7 @@ def parse_defaults_properties( def render_expression( - expression: exp.Expression, + expression: exp.Expr, module_path: Path, path: t.Optional[Path], jinja_macros: t.Optional[JinjaMacroRegistry] = None, @@ -2953,7 +3025,7 @@ def render_expression( variables: t.Optional[t.Dict[str, t.Any]] = None, default_catalog: t.Optional[str] = None, blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, -) -> t.Optional[t.List[exp.Expression]]: +) -> t.Optional[t.List[exp.Expr]]: meta_python_env = make_python_env( expressions=expression, jinja_macro_references=None, @@ -3012,6 +3084,9 @@ def render_expression( "formatting": str, "optimize_query": str, "virtual_environment_mode": lambda value: exp.Literal.string(value.value), + "dbt_node_info_": lambda value: value.to_expression(), + "grants_": lambda value: value, + "grants_target_layer": lambda value: exp.Literal.string(value.value), } @@ -3022,8 +3097,8 @@ def get_model_name(path: Path) -> str: # function applied to time column when automatically used for partitioning in INCREMENTAL_BY_TIME_RANGE models def clickhouse_partition_func( - column: exp.Expression, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] -) -> exp.Expression: + column: exp.Expr, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] +) -> exp.Expr: # `toMonday()` function accepts a Date or DateTime type column col_type = (columns_to_types and columns_to_types.get(column.name)) or exp.DataType.build( diff --git a/sqlmesh/core/model/kind.py b/sqlmesh/core/model/kind.py index dc5f533c21..d7a7bb9579 100644 --- a/sqlmesh/core/model/kind.py +++ b/sqlmesh/core/model/kind.py @@ -23,7 +23,7 @@ PydanticModel, SQLGlotBool, SQLGlotColumn, - SQLGlotListOfColumnsOrStar, + SQLGlotListOfFieldsOrStar, SQLGlotListOfFields, SQLGlotPositiveInt, SQLGlotString, @@ -119,6 +119,10 @@ def is_custom(self) -> bool: def is_managed(self) -> bool: return self.model_kind_name == ModelKindName.MANAGED + @property + def is_dbt_custom(self) -> bool: + return self.model_kind_name == ModelKindName.DBT_CUSTOM + @property def is_symbolic(self) -> bool: """A symbolic model is one that doesn't execute at all.""" @@ -150,6 +154,11 @@ def full_history_restatement_only(self) -> bool: def supports_python_models(self) -> bool: return True + @property + def supports_grants(self) -> bool: + """Whether this model kind supports grants configuration.""" + return self.is_materialized or self.is_view + class ModelKindName(str, ModelKindMixin, Enum): """The kind of model, determining how this data is computed and stored in the warehouse.""" @@ -170,6 +179,7 @@ class ModelKindName(str, ModelKindMixin, Enum): EXTERNAL = "EXTERNAL" CUSTOM = "CUSTOM" MANAGED = "MANAGED" + DBT_CUSTOM = "DBT_CUSTOM" @property def model_kind_name(self) -> t.Optional[ModelKindName]: @@ -269,7 +279,7 @@ def model_kind_name(self) -> t.Optional[ModelKindName]: return self.name def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: kwargs["expressions"] = expressions return d.ModelKind(this=self.name.value.upper(), **kwargs) @@ -284,7 +294,7 @@ def metadata_hash_values(self) -> t.List[t.Optional[str]]: class TimeColumn(PydanticModel): - column: exp.Expression + column: exp.Expr format: t.Optional[str] = None @classmethod @@ -296,7 +306,7 @@ def _time_column_validator(v: t.Any, info: ValidationInfo) -> TimeColumn: @field_validator("column", mode="before") @classmethod - def _column_validator(cls, v: t.Union[str, exp.Expression]) -> exp.Expression: + def _column_validator(cls, v: t.Union[str, exp.Expr]) -> exp.Expr: if not v: raise ConfigError("Time Column cannot be empty.") if isinstance(v, str): @@ -304,14 +314,14 @@ def _column_validator(cls, v: t.Union[str, exp.Expression]) -> exp.Expression: return v @property - def expression(self) -> exp.Expression: + def expression(self) -> exp.Expr: """Convert this pydantic model into a time_column SQLGlot expression.""" if not self.format: return self.column return exp.Tuple(expressions=[self.column, exp.Literal.string(self.format)]) - def to_expression(self, dialect: str) -> exp.Expression: + def to_expression(self, dialect: str) -> exp.Expr: """Convert this pydantic model into a time_column SQLGlot expression.""" if not self.format: return self.column @@ -336,7 +346,7 @@ def create(cls, v: t.Any, dialect: str) -> Self: exp.column(column_expr) if isinstance(column_expr, exp.Identifier) else column_expr ) format = v.expressions[1].name if len(v.expressions) > 1 else None - elif isinstance(v, exp.Expression): + elif isinstance(v, exp.Expr): column = exp.column(v) if isinstance(v, exp.Identifier) else v format = None elif isinstance(v, str): @@ -390,7 +400,7 @@ def metadata_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -434,7 +444,7 @@ def metadata_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -463,7 +473,7 @@ class IncrementalByTimeRangeKind(_IncrementalBy): _time_column_validator = TimeColumn.validator() def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -503,7 +513,7 @@ class IncrementalByUniqueKeyKind(_IncrementalBy): ) unique_key: SQLGlotListOfFields when_matched: t.Optional[exp.Whens] = None - merge_filter: t.Optional[exp.Expression] = None + merge_filter: t.Optional[exp.Expr] = None batch_concurrency: t.Literal[1] = 1 @field_validator("when_matched", mode="before") @@ -533,9 +543,9 @@ def _when_matched_validator( @field_validator("merge_filter", mode="before") def _merge_filter_validator( cls, - v: t.Optional[exp.Expression], + v: t.Optional[exp.Expr], info: ValidationInfo, - ) -> t.Optional[exp.Expression]: + ) -> t.Optional[exp.Expr]: if v is None: return v @@ -558,7 +568,7 @@ def data_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -580,7 +590,7 @@ class IncrementalByPartitionKind(_Incremental): disable_restatement: SQLGlotBool = False @field_validator("forward_only", mode="before") - def _forward_only_validator(cls, v: t.Union[bool, exp.Expression]) -> t.Literal[True]: + def _forward_only_validator(cls, v: t.Union[bool, exp.Expr]) -> t.Literal[True]: if v is not True: raise ConfigError( "Do not specify the `forward_only` configuration key - INCREMENTAL_BY_PARTITION models are always forward_only." @@ -596,7 +606,7 @@ def metadata_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -630,7 +640,7 @@ def metadata_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -659,7 +669,7 @@ def supports_python_models(self) -> bool: return False def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -680,7 +690,7 @@ class SeedKind(_ModelKind): def _parse_csv_settings(cls, v: t.Any) -> t.Optional[CsvSettings]: if v is None or isinstance(v, CsvSettings): return v - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): tuple_exp = parse_properties(cls, v, None) if not tuple_exp: return None @@ -690,7 +700,7 @@ def _parse_csv_settings(cls, v: t.Any) -> t.Optional[CsvSettings]: return v def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: """Convert the seed kind into a SQLGlot expression.""" return super().to_expression( @@ -746,13 +756,16 @@ class _SCDType2Kind(_Incremental): @field_validator("time_data_type", mode="before") @classmethod - def _time_data_type_validator( - cls, v: t.Union[str, exp.Expression], values: t.Any - ) -> exp.Expression: - if isinstance(v, exp.Expression) and not isinstance(v, exp.DataType): + def _time_data_type_validator(cls, v: t.Union[str, exp.Expr], values: t.Any) -> exp.Expr: + if isinstance(v, exp.Expr) and not isinstance(v, exp.DataType): v = v.name dialect = get_dialect(values) data_type = exp.DataType.build(v, dialect=dialect) + # Clear meta["sql"] (set by our parser extension) so the pydantic encoder + # uses dialect-aware rendering: e.sql(dialect=meta["dialect"]). Without this, + # the raw SQL text takes priority, which can be wrong for dialect-normalized + # types (e.g., default "TIMESTAMP" should render as "DATETIME" in BigQuery). + data_type.meta.pop("sql", None) data_type.meta["dialect"] = dialect return data_type @@ -785,7 +798,7 @@ def metadata_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -825,7 +838,7 @@ def data_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -842,7 +855,7 @@ def to_expression( class SCDType2ByColumnKind(_SCDType2Kind): name: t.Literal[ModelKindName.SCD_TYPE_2_BY_COLUMN] = ModelKindName.SCD_TYPE_2_BY_COLUMN - columns: SQLGlotListOfColumnsOrStar + columns: SQLGlotListOfFieldsOrStar execution_time_as_valid_from: SQLGlotBool = False updated_at_name: t.Optional[SQLGlotColumn] = None @@ -861,7 +874,7 @@ def data_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -887,6 +900,46 @@ def supports_python_models(self) -> bool: return False +class DbtCustomKind(_ModelKind): + name: t.Literal[ModelKindName.DBT_CUSTOM] = ModelKindName.DBT_CUSTOM + materialization: str + adapter: str = "default" + definition: str + dialect: t.Optional[str] = Field(None, validate_default=True) + + _dialect_validator = kind_dialect_validator + + @field_validator("materialization", "adapter", "definition", mode="before") + @classmethod + def _validate_fields(cls, v: t.Any) -> str: + return validate_string(v) + + @property + def data_hash_values(self) -> t.List[t.Optional[str]]: + return [ + *super().data_hash_values, + self.materialization, + self.definition, + self.adapter, + self.dialect, + ] + + def to_expression( + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any + ) -> d.ModelKind: + return super().to_expression( + expressions=[ + *(expressions or []), + *_properties( + { + "materialization": exp.Literal.string(self.materialization), + "adapter": exp.Literal.string(self.adapter), + } + ), + ], + ) + + class EmbeddedKind(_ModelKind): name: t.Literal[ModelKindName.EMBEDDED] = ModelKindName.EMBEDDED @@ -955,7 +1008,7 @@ def metadata_hash_values(self) -> t.List[t.Optional[str]]: ] def to_expression( - self, expressions: t.Optional[t.List[exp.Expression]] = None, **kwargs: t.Any + self, expressions: t.Optional[t.List[exp.Expr]] = None, **kwargs: t.Any ) -> d.ModelKind: return super().to_expression( expressions=[ @@ -992,6 +1045,7 @@ def to_expression( SCDType2ByColumnKind, CustomKind, ManagedKind, + DbtCustomKind, ], Field(discriminator="name"), ] @@ -1011,6 +1065,7 @@ def to_expression( ModelKindName.SCD_TYPE_2_BY_COLUMN: SCDType2ByColumnKind, ModelKindName.CUSTOM: CustomKind, ModelKindName.MANAGED: ManagedKind, + ModelKindName.DBT_CUSTOM: DbtCustomKind, } @@ -1053,6 +1108,18 @@ def create_model_kind(v: t.Any, dialect: str, defaults: t.Dict[str, t.Any]) -> M ): props[on_change_property] = defaults.get(on_change_property) + # only pass the batch_concurrency user default to models inheriting from _IncrementalBy + # that don't explicitly set it in the model definition, but ignore subclasses of _IncrementalBy + # that hardcode a specific batch_concurrency + if issubclass(kind_type, _IncrementalBy): + BATCH_CONCURRENCY: t.Final = "batch_concurrency" + if ( + props.get(BATCH_CONCURRENCY) is None + and defaults.get(BATCH_CONCURRENCY) is not None + and kind_type.all_field_infos()[BATCH_CONCURRENCY].default is None + ): + props[BATCH_CONCURRENCY] = defaults.get(BATCH_CONCURRENCY) + if kind_type == CustomKind: # load the custom materialization class and check if it uses a custom kind type from sqlmesh.core.snapshot.evaluator import get_custom_materialization_type @@ -1078,7 +1145,7 @@ def create_model_kind(v: t.Any, dialect: str, defaults: t.Dict[str, t.Any]) -> M ) return kind_type(**props) - name = (v.name if isinstance(v, exp.Expression) else str(v)).upper() + name = (v.name if isinstance(v, exp.Expr) else str(v)).upper() return model_kind_type_from_name(name)(name=name) # type: ignore diff --git a/sqlmesh/core/model/meta.py b/sqlmesh/core/model/meta.py index 9208fbdbb5..a73d6d871a 100644 --- a/sqlmesh/core/model/meta.py +++ b/sqlmesh/core/model/meta.py @@ -1,6 +1,7 @@ from __future__ import annotations import typing as t +from enum import Enum from functools import cached_property from typing_extensions import Self @@ -13,6 +14,7 @@ from sqlmesh.core.config.common import VirtualEnvironmentMode from sqlmesh.core.config.linter import LinterConfig from sqlmesh.core.dialect import normalize_model_name +from sqlmesh.utils import classproperty from sqlmesh.core.model.common import ( bool_validator, default_catalog_validator, @@ -46,8 +48,39 @@ if t.TYPE_CHECKING: from sqlmesh.core._typing import CustomMaterializationProperties, SessionProperties + from sqlmesh.core.engine_adapter._typing import GrantsConfig -FunctionCall = t.Tuple[str, t.Dict[str, exp.Expression]] +FunctionCall = t.Tuple[str, t.Dict[str, exp.Expr]] + + +class GrantsTargetLayer(str, Enum): + """Target layer(s) where grants should be applied.""" + + ALL = "all" + PHYSICAL = "physical" + VIRTUAL = "virtual" + + @classproperty + def default(cls) -> "GrantsTargetLayer": + return GrantsTargetLayer.VIRTUAL + + @property + def is_all(self) -> bool: + return self == GrantsTargetLayer.ALL + + @property + def is_physical(self) -> bool: + return self == GrantsTargetLayer.PHYSICAL + + @property + def is_virtual(self) -> bool: + return self == GrantsTargetLayer.VIRTUAL + + def __str__(self) -> str: + return self.name + + def __repr__(self) -> str: + return str(self) class ModelMeta(_Node): @@ -59,8 +92,8 @@ class ModelMeta(_Node): retention: t.Optional[int] = None # not implemented yet table_format: t.Optional[str] = None storage_format: t.Optional[str] = None - partitioned_by_: t.List[exp.Expression] = Field(default=[], alias="partitioned_by") - clustered_by: t.List[exp.Expression] = [] + partitioned_by_: t.List[exp.Expr] = Field(default=[], alias="partitioned_by") + clustered_by: t.List[exp.Expr] = [] default_catalog: t.Optional[str] = None depends_on_: t.Optional[t.Set[str]] = Field(default=None, alias="depends_on") columns_to_types_: t.Optional[t.Dict[str, exp.DataType]] = Field(default=None, alias="columns") @@ -68,8 +101,8 @@ class ModelMeta(_Node): default=None, alias="column_descriptions" ) audits: t.List[FunctionCall] = [] - grains: t.List[exp.Expression] = [] - references: t.List[exp.Expression] = [] + grains: t.List[exp.Expr] = [] + references: t.List[exp.Expr] = [] physical_schema_override: t.Optional[str] = None physical_properties_: t.Optional[exp.Tuple] = Field(default=None, alias="physical_properties") virtual_properties_: t.Optional[exp.Tuple] = Field(default=None, alias="virtual_properties") @@ -85,6 +118,8 @@ class ModelMeta(_Node): ) formatting: t.Optional[bool] = Field(default=None, exclude=True) virtual_environment_mode: VirtualEnvironmentMode = VirtualEnvironmentMode.default + grants_: t.Optional[exp.Tuple] = Field(default=None, alias="grants") + grants_target_layer: GrantsTargetLayer = GrantsTargetLayer.default _bool_validator = bool_validator _model_kind_validator = model_kind_validator @@ -116,11 +151,11 @@ def _normalize(value: t.Any) -> t.Any: if isinstance(v, (exp.Tuple, exp.Array)): return [_normalize(e).name for e in v.expressions] - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return _normalize(v).name if isinstance(v, str): value = _normalize(v) - return value.name if isinstance(value, exp.Expression) else value + return value.name if isinstance(value, exp.Expr) else value if isinstance(v, (list, tuple)): return [cls._validate_value_or_tuple(elm, data, normalize=normalize) for elm in v] @@ -128,7 +163,7 @@ def _normalize(value: t.Any) -> t.Any: @field_validator("table_format", "storage_format", mode="before") def _format_validator(cls, v: t.Any, info: ValidationInfo) -> t.Optional[str]: - if isinstance(v, exp.Expression) and not (isinstance(v, (exp.Literal, exp.Identifier))): + if isinstance(v, exp.Expr) and not (isinstance(v, (exp.Literal, exp.Identifier))): return v.sql(info.data.get("dialect")) return str_or_exp_to_str(v) @@ -153,9 +188,7 @@ def _gateway_validator(cls, v: t.Any) -> t.Optional[str]: return gateway and gateway.lower() @field_validator("partitioned_by_", "clustered_by", mode="before") - def _partition_and_cluster_validator( - cls, v: t.Any, info: ValidationInfo - ) -> t.List[exp.Expression]: + def _partition_and_cluster_validator(cls, v: t.Any, info: ValidationInfo) -> t.List[exp.Expr]: if ( isinstance(v, list) and all(isinstance(i, str) for i in v) @@ -209,9 +242,33 @@ def _columns_validator( return columns_to_types if isinstance(v, dict): - udt = Dialect.get_or_raise(dialect).SUPPORTS_USER_DEFINED_TYPES + dialect_obj = Dialect.get_or_raise(dialect) + udt = dialect_obj.SUPPORTS_USER_DEFINED_TYPES for k, data_type in v.items(): + is_string_type = isinstance(data_type, str) expr = exp.DataType.build(data_type, dialect=dialect, udt=udt) + # When deserializing from a string (e.g. JSON roundtrip), normalize the type + # through the dialect's type system so that aliases (e.g. INT in BigQuery, + # which is an alias for INT64/BIGINT) are resolved to their canonical form. + # This ensures stable data hash computation across serialization/deserialization + # roundtrips. We skip this for DataType objects passed directly (Python API) + # since those should be used as-is. + if ( + is_string_type + and dialect + and expr.this + not in ( + exp.DataType.Type.USERDEFINED, + exp.DataType.Type.UNKNOWN, + ) + ): + sql_repr = expr.sql(dialect=dialect) + try: + normalized = parse_one(sql_repr, read=dialect, into=exp.DataType) + if normalized is not None: + expr = normalized + except Exception: + pass expr.meta["dialect"] = dialect columns_to_types[normalize_identifiers(k, dialect=dialect).name] = expr @@ -260,7 +317,7 @@ def _column_descriptions_validator( return col_descriptions @field_validator("grains", "references", mode="before") - def _refs_validator(cls, vs: t.Any, info: ValidationInfo) -> t.List[exp.Expression]: + def _refs_validator(cls, vs: t.Any, info: ValidationInfo) -> t.List[exp.Expr]: dialect = info.data.get("dialect") if isinstance(vs, exp.Paren): @@ -287,6 +344,14 @@ def _refs_validator(cls, vs: t.Any, info: ValidationInfo) -> t.List[exp.Expressi def ignored_rules_validator(cls, vs: t.Any) -> t.Any: return LinterConfig._validate_rules(vs) + @field_validator("grants_target_layer", mode="before") + def _grants_target_layer_validator(cls, v: t.Any) -> t.Any: + if isinstance(v, exp.Identifier): + return v.this + if isinstance(v, exp.Literal) and v.is_string: + return v.this + return v + @field_validator("session_properties_", mode="before") def session_properties_validator(cls, v: t.Any, info: ValidationInfo) -> t.Any: # use the generic properties validator to parse the session properties @@ -306,7 +371,7 @@ def session_properties_validator(cls, v: t.Any, info: ValidationInfo) -> t.Any: "Invalid value for `session_properties.query_label`. Must be an array or tuple." ) - label_tuples: t.List[exp.Expression] = ( + label_tuples: t.List[exp.Expr] = ( [query_label.unnest()] if isinstance(query_label, exp.Paren) else query_label.expressions @@ -394,6 +459,10 @@ def _root_validator(self) -> Self: f"Model {self.name} has `storage_format` set to a table format '{storage_format}' which is deprecated. Please use the `table_format` property instead." ) + # Validate grants configuration for model kind support + if self.grants is not None and not kind.supports_grants: + raise ValueError(f"grants cannot be set for {kind.name} models") + return self @property @@ -402,7 +471,7 @@ def time_column(self) -> t.Optional[TimeColumn]: return getattr(self.kind, "time_column", None) @property - def unique_key(self) -> t.List[exp.Expression]: + def unique_key(self) -> t.List[exp.Expr]: if isinstance( self.kind, (SCDType2ByTimeKind, SCDType2ByColumnKind, IncrementalByUniqueKeyKind) ): @@ -438,14 +507,14 @@ def batch_concurrency(self) -> t.Optional[int]: return getattr(self.kind, "batch_concurrency", None) @cached_property - def physical_properties(self) -> t.Dict[str, exp.Expression]: + def physical_properties(self) -> t.Dict[str, exp.Expr]: """A dictionary of properties that will be applied to the physical layer. It replaces table_properties which is deprecated.""" if self.physical_properties_: return {e.this.name: e.expression for e in self.physical_properties_.expressions} return {} @cached_property - def virtual_properties(self) -> t.Dict[str, exp.Expression]: + def virtual_properties(self) -> t.Dict[str, exp.Expr]: """A dictionary of properties that will be applied to the virtual layer.""" if self.virtual_properties_: return {e.this.name: e.expression for e in self.virtual_properties_.expressions} @@ -465,6 +534,30 @@ def custom_materialization_properties(self) -> CustomMaterializationProperties: return self.kind.materialization_properties return {} + @cached_property + def grants(self) -> t.Optional[GrantsConfig]: + """A dictionary of grants mapping permission names to lists of grantees.""" + + if self.grants_ is None: + return None + + if not self.grants_.expressions: + return {} + + grants_dict = {} + for eq_expr in self.grants_.expressions: + try: + permission_name = self._validate_config_expression(eq_expr.left) + grantee_list = self._validate_nested_config_values(eq_expr.expression) + grants_dict[permission_name] = grantee_list + except ConfigError as e: + permission_name = ( + eq_expr.left.name if hasattr(eq_expr.left, "name") else str(eq_expr.left) + ) + raise ConfigError(f"Invalid grants configuration for '{permission_name}': {e}") + + return grants_dict if grants_dict else None + @property def all_references(self) -> t.List[Reference]: """All references including grains.""" @@ -497,7 +590,7 @@ def when_matched(self) -> t.Optional[exp.Whens]: return None @property - def merge_filter(self) -> t.Optional[exp.Expression]: + def merge_filter(self) -> t.Optional[exp.Expr]: if isinstance(self.kind, IncrementalByUniqueKeyKind): return self.kind.merge_filter return None @@ -529,3 +622,33 @@ def on_additive_change(self) -> OnAdditiveChange: @property def ignored_rules(self) -> t.Set[str]: return self.ignored_rules_ or set() + + def _validate_config_expression(self, expr: exp.Expr) -> str: + if isinstance(expr, (d.MacroFunc, d.MacroVar)): + raise ConfigError(f"Unresolved macro: {expr.sql(dialect=self.dialect)}") + + if isinstance(expr, exp.Null): + raise ConfigError("NULL value") + + if isinstance(expr, exp.Literal): + return str(expr.this).strip() + if isinstance(expr, (exp.Column, exp.Identifier)): + return expr.name + return expr.sql(dialect=self.dialect).strip() + + def _validate_nested_config_values(self, value_expr: exp.Expr) -> t.List[str]: + result = [] + + def flatten_expr(expr: exp.Expr) -> None: + if isinstance(expr, exp.Array): + for elem in expr.expressions: + flatten_expr(elem) + elif isinstance(expr, (exp.Tuple, exp.Paren)): + expressions = [expr.unnest()] if isinstance(expr, exp.Paren) else expr.expressions + for elem in expressions: + flatten_expr(elem) + else: + result.append(self._validate_config_expression(expr)) + + flatten_expr(value_expr) + return result diff --git a/sqlmesh/core/model/seed.py b/sqlmesh/core/model/seed.py index fe1aa85204..9fd57fe6d3 100644 --- a/sqlmesh/core/model/seed.py +++ b/sqlmesh/core/model/seed.py @@ -49,7 +49,7 @@ def _bool_validator(cls, v: t.Any) -> t.Optional[bool]: ) @classmethod def _str_validator(cls, v: t.Any) -> t.Optional[str]: - if v is None or not isinstance(v, exp.Expression): + if v is None or not isinstance(v, exp.Expr): return v # SQLGlot parses escape sequences like \t as \\t for dialects that don't treat \ as @@ -60,7 +60,7 @@ def _str_validator(cls, v: t.Any) -> t.Optional[str]: @field_validator("na_values", mode="before") @classmethod def _na_values_validator(cls, v: t.Any) -> t.Optional[NaValues]: - if v is None or not isinstance(v, exp.Expression): + if v is None or not isinstance(v, exp.Expr): return v try: diff --git a/sqlmesh/core/node.py b/sqlmesh/core/node.py index b04a59a39f..d3b63312f1 100644 --- a/sqlmesh/core/node.py +++ b/sqlmesh/core/node.py @@ -153,6 +153,101 @@ def milliseconds(self) -> int: return self.seconds * 1000 +class DbtNodeInfo(PydanticModel): + """ + Represents dbt-specific model information set by the dbt loader and intended to be made available at the Snapshot level + (as opposed to hidden within the individual model jinja macro registries). + + This allows for things like injecting implementations of variables / functions into the Jinja context that are compatible with + their dbt equivalents but are backed by the sqlmesh snapshots in any given plan / environment + """ + + unique_id: str + """This is the node/resource name/unique_id that's used as the node key in the dbt manifest. + It's prefixed by the resource type and is exposed in context variables like {{ selected_resources }}. + + Examples: + - test.jaffle_shop.unique_stg_orders_order_id.e3b841c71a + - seed.jaffle_shop.raw_payments + - model.jaffle_shop.stg_orders + """ + + name: str + """Name of this object in the dbt global namespace, used by things like {{ ref() }} calls. + + Examples: + - unique_stg_orders_order_id + - raw_payments + - stg_orders + """ + + fqn: str + """Used for selectors in --select/--exclude. + Takes the filesystem into account so may be structured differently to :unique_id. + + Examples: + - jaffle_shop.staging.unique_stg_orders_order_id + - jaffle_shop.raw_payments + - jaffle_shop.staging.stg_orders + """ + + alias: t.Optional[str] = None + """This is dbt's way of overriding the _physical table_ a model is written to. + + It's used in the following situation: + - Say you have two models, "stg_customers" and "customers" + - You want "stg_customers" to be written to the "staging" schema as eg "staging.customers" - NOT "staging.stg_customers" + - But you cant rename the file to "customers" because it will conflict with your other model file "customers" + - Even if you put it in a different folder, eg "staging/customers.sql" - dbt still has a global namespace so it will conflict + when you try to do something like "{{ ref('customers') }}" + - So dbt's solution to this problem is to keep calling it "stg_customers" at the dbt project/model level, + but allow overriding the physical table to "customers" via something like "{{ config(alias='customers', schema='staging') }}" + + Note that if :alias is set, it does *not* replace :name at the model level and cannot be used interchangably with :name. + It also does not affect the :fqn or :unique_id. It's just used to override :name when it comes time to generate the physical table name. + """ + + @model_validator(mode="after") + def post_init(self) -> Self: + # by default, dbt sets alias to the same as :name + # however, we only want to include :alias if it is actually different / actually providing an override + if self.alias == self.name: + self.alias = None + return self + + def to_expression(self) -> exp.Expr: + """Produce a SQLGlot expression representing this object, for use in things like the model/audit definition renderers""" + return exp.tuple_( + *( + exp.PropertyEQ(this=exp.var(k), expression=exp.Literal.string(v)) + for k, v in sorted(self.model_dump(exclude_none=True).items()) + ) + ) + + +class DbtInfoMixin: + """This mixin encapsulates properties that only exist for dbt compatibility and are otherwise not required + for native projects""" + + @property + def dbt_node_info(self) -> t.Optional[DbtNodeInfo]: + raise NotImplementedError() + + @property + def dbt_unique_id(self) -> t.Optional[str]: + """Used for compatibility with jinja context variables such as {{ selected_resources }}""" + if self.dbt_node_info: + return self.dbt_node_info.unique_id + return None + + @property + def dbt_fqn(self) -> t.Optional[str]: + """Used in the selector engine for compatibility with selectors that select models by dbt fqn""" + if self.dbt_node_info: + return self.dbt_node_info.fqn + return None + + # this must be sorted in descending order INTERVAL_SECONDS = { IntervalUnit.YEAR: 60 * 60 * 24 * 365, @@ -165,7 +260,7 @@ def milliseconds(self) -> int: } -class _Node(PydanticModel): +class _Node(DbtInfoMixin, PydanticModel): """ Node is the core abstraction for entity that can be executed within the scheduler. @@ -199,7 +294,7 @@ class _Node(PydanticModel): interval_unit_: t.Optional[IntervalUnit] = Field(alias="interval_unit", default=None) tags: t.List[str] = [] stamp: t.Optional[str] = None - dbt_name: t.Optional[str] = None # dbt node name + dbt_node_info_: t.Optional[DbtNodeInfo] = Field(alias="dbt_node_info", default=None) _path: t.Optional[Path] = None _data_hash: t.Optional[str] = None _metadata_hash: t.Optional[str] = None @@ -229,7 +324,7 @@ def copy(self, **kwargs: t.Any) -> Self: def _name_validator(cls, v: t.Any) -> t.Optional[str]: if v is None: return None - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return v.meta["sql"] return str(v) @@ -257,7 +352,7 @@ def _cron_tz_validator(cls, v: t.Any) -> t.Optional[zoneinfo.ZoneInfo]: @field_validator("start", "end", mode="before") @classmethod def _date_validator(cls, v: t.Any) -> t.Optional[TimeLike]: - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): v = v.name if v and not to_datetime(v): raise ConfigError(f"'{v}' needs to be time-like: https://pypi.org/project/dateparser") @@ -446,6 +541,10 @@ def is_audit(self) -> bool: """Return True if this is an audit node""" return False + @property + def dbt_node_info(self) -> t.Optional[DbtNodeInfo]: + return self.dbt_node_info_ + class NodeType(str, Enum): MODEL = "model" @@ -456,6 +555,6 @@ def __str__(self) -> str: def str_or_exp_to_str(v: t.Any) -> t.Optional[str]: - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return v.name return str(v) if v is not None else None diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py index a84b3b60dc..01834594cd 100644 --- a/sqlmesh/core/plan/builder.py +++ b/sqlmesh/core/plan/builder.py @@ -65,6 +65,9 @@ class PlanBuilder: restate_models: A list of models for which the data should be restated for the time range specified in this plan. Note: models defined outside SQLMesh (external) won't be a part of the restatement. + restate_all_snapshots: If restatements are present, this flag indicates whether or not the intervals + being restated should be cleared from state for other versions of this model (typically, versions that are present in other environments). + If set to None, the default behaviour is to not clear anything unless the target environment is prod. backfill_models: A list of fully qualified model names for which the data should be backfilled as part of this plan. no_gaps: Whether to ensure that new snapshots for nodes that are already a part of the target environment have no data gaps when compared against previous @@ -103,6 +106,7 @@ def __init__( execution_time: t.Optional[TimeLike] = None, apply: t.Optional[t.Callable[[Plan], None]] = None, restate_models: t.Optional[t.Iterable[str]] = None, + restate_all_snapshots: bool = False, backfill_models: t.Optional[t.Iterable[str]] = None, no_gaps: bool = False, skip_backfill: bool = False, @@ -154,13 +158,14 @@ def __init__( self._auto_categorization_enabled = auto_categorization_enabled self._include_unmodified = include_unmodified self._restate_models = set(restate_models) if restate_models is not None else None + self._restate_all_snapshots = restate_all_snapshots self._effective_from = effective_from # note: this deliberately doesnt default to now() here. # There may be an significant delay between the PlanBuilder producing a Plan and the Plan actually being run # so if execution_time=None is passed to the PlanBuilder, then the resulting Plan should also have execution_time=None # in order to prevent the Plan that was intended to run "as at now" from having "now" fixed to some time in the past - # ref: https://github.com/TobikoData/sqlmesh/pull/4702#discussion_r2140696156 + # ref: https://github.com/SQLMesh/sqlmesh/pull/4702#discussion_r2140696156 self._execution_time = execution_time self._backfill_models = backfill_models @@ -277,7 +282,6 @@ def build(self) -> Plan: if self._latest_plan: return self._latest_plan - self._ensure_no_new_snapshots_with_restatements() self._ensure_new_env_with_changes() self._ensure_valid_date_range() self._ensure_no_broken_references() @@ -338,7 +342,9 @@ def build(self) -> Plan: directly_modified=directly_modified, indirectly_modified=indirectly_modified, deployability_index=deployability_index, + selected_models_to_restate=self._restate_models, restatements=restatements, + restate_all_snapshots=self._restate_all_snapshots, start_override_per_model=self._start_override_per_model, end_override_per_model=end_override_per_model, selected_models_to_backfill=self._backfill_models, @@ -674,6 +680,14 @@ def _categorize_snapshot( if mode == AutoCategorizationMode.FULL: snapshot.categorize_as(SnapshotChangeCategory.BREAKING, forward_only) elif self._context_diff.indirectly_modified(snapshot.name): + if snapshot.is_materialized_view and not forward_only: + # We categorize changes as breaking to allow for instantaneous switches in a virtual layer. + # Otherwise, there might be a potentially long downtime during MVs recreation. + # In the case of forward-only changes this optimization is not applicable because we want to continue + # using the same (existing) table version. + snapshot.categorize_as(SnapshotChangeCategory.INDIRECT_BREAKING, forward_only) + return + all_upstream_forward_only = set() all_upstream_categories = set() direct_parent_categories = set() @@ -858,15 +872,6 @@ def _ensure_no_broken_references(self) -> None: f"""Removed {broken_references_msg} are referenced in '{snapshot.name}'. Please remove broken references before proceeding.""" ) - def _ensure_no_new_snapshots_with_restatements(self) -> None: - if self._restate_models is not None and ( - self._context_diff.new_snapshots or self._context_diff.modified_snapshots - ): - raise PlanError( - "Model changes and restatements can't be a part of the same plan. " - "Revert or apply changes before proceeding with restatements." - ) - def _ensure_new_env_with_changes(self) -> None: if ( self._is_dev diff --git a/sqlmesh/core/plan/common.py b/sqlmesh/core/plan/common.py index 4ae8a3112c..bece17639c 100644 --- a/sqlmesh/core/plan/common.py +++ b/sqlmesh/core/plan/common.py @@ -16,13 +16,25 @@ def should_force_rebuild(old: Snapshot, new: Snapshot) -> bool: if new.is_view and new.is_indirect_non_breaking and not new.is_forward_only: # View models always need to be rebuilt to reflect updated upstream dependencies return True - if new.is_seed: + if new.is_seed and not ( + new.is_metadata + and new.previous_version + and new.previous_version.snapshot_id(new.name) == old.snapshot_id + ): # Seed models always need to be rebuilt to reflect changes in the seed file + # Unless only their metadata has been updated (eg description added) and the seed file has not been touched return True return is_breaking_kind_change(old, new) def is_breaking_kind_change(old: Snapshot, new: Snapshot) -> bool: + if new.is_model != old.is_model: + # If one is a model and the other isn't, then we need to rebuild + return True + if not new.is_model or not old.is_model: + # If neither are models, then we don't need to rebuild + # Note that the remaining checks only apply to model snapshots + return False if old.virtual_environment_mode != new.virtual_environment_mode: # If the virtual environment mode has changed, then we need to rebuild return True diff --git a/sqlmesh/core/plan/definition.py b/sqlmesh/core/plan/definition.py index aaf6ec5dc0..866299eff8 100644 --- a/sqlmesh/core/plan/definition.py +++ b/sqlmesh/core/plan/definition.py @@ -58,7 +58,18 @@ class Plan(PydanticModel, frozen=True): indirectly_modified: t.Dict[SnapshotId, t.Set[SnapshotId]] deployability_index: DeployabilityIndex + selected_models_to_restate: t.Optional[t.Set[str]] = None + """Models that have been explicitly selected for restatement by a user""" restatements: t.Dict[SnapshotId, Interval] + """ + All models being restated, which are typically the explicitly selected ones + their downstream dependencies. + + Note that dev previews are also considered restatements, so :selected_models_to_restate can be empty + while :restatements is still populated with dev previews + """ + restate_all_snapshots: bool + """Whether or not to clear intervals from state for other versions of the models listed in :restatements""" + start_override_per_model: t.Optional[t.Dict[str, datetime]] end_override_per_model: t.Optional[t.Dict[str, datetime]] @@ -202,8 +213,8 @@ def environment(self) -> Environment: snapshots_by_name = self.context_diff.snapshots_by_name snapshots = [s.table_info for s in self.snapshots.values()] - promoted_snapshot_ids = None - if self.is_dev and not self.include_unmodified: + promotable_snapshot_ids = None + if self.is_dev: if self.selected_models_to_backfill is not None: # Only promote models that have been explicitly selected for backfill. promotable_snapshot_ids = { @@ -214,12 +225,14 @@ def environment(self) -> Environment: if m in snapshots_by_name ], } - else: + elif not self.include_unmodified: promotable_snapshot_ids = self.context_diff.promotable_snapshot_ids.copy() - promoted_snapshot_ids = [ - s.snapshot_id for s in snapshots if s.snapshot_id in promotable_snapshot_ids - ] + promoted_snapshot_ids = ( + [s.snapshot_id for s in snapshots if s.snapshot_id in promotable_snapshot_ids] + if promotable_snapshot_ids is not None + else None + ) previous_finalized_snapshots = ( self.context_diff.environment_snapshots @@ -259,6 +272,7 @@ def to_evaluatable(self) -> EvaluatablePlan: skip_backfill=self.skip_backfill, empty_backfill=self.empty_backfill, restatements={s.name: i for s, i in self.restatements.items()}, + restate_all_snapshots=self.restate_all_snapshots, is_dev=self.is_dev, allow_destructive_models=self.allow_destructive_models, allow_additive_models=self.allow_additive_models, @@ -303,6 +317,7 @@ class EvaluatablePlan(PydanticModel): skip_backfill: bool empty_backfill: bool restatements: t.Dict[str, Interval] + restate_all_snapshots: bool is_dev: bool allow_destructive_models: t.Set[str] allow_additive_models: t.Set[str] diff --git a/sqlmesh/core/plan/evaluator.py b/sqlmesh/core/plan/evaluator.py index 79053e018b..f2f432a97e 100644 --- a/sqlmesh/core/plan/evaluator.py +++ b/sqlmesh/core/plan/evaluator.py @@ -22,7 +22,7 @@ from sqlmesh.core.console import Console, get_console from sqlmesh.core.environment import EnvironmentNamingInfo, execute_environment_statements from sqlmesh.core.macros import RuntimeStage -from sqlmesh.core.snapshot.definition import to_view_mapping +from sqlmesh.core.snapshot.definition import to_view_mapping, SnapshotTableInfo from sqlmesh.core.plan import stages from sqlmesh.core.plan.definition import EvaluatablePlan from sqlmesh.core.scheduler import Scheduler @@ -40,7 +40,7 @@ from sqlmesh.core.plan.common import identify_restatement_intervals_across_snapshot_versions from sqlmesh.utils import CorrelationId from sqlmesh.utils.concurrency import NodeExecutionFailedError -from sqlmesh.utils.errors import PlanError, SQLMeshError +from sqlmesh.utils.errors import PlanError, ConflictingPlanError, SQLMeshError from sqlmesh.utils.date import now, to_timestamp logger = logging.getLogger(__name__) @@ -258,6 +258,7 @@ def visit_backfill_stage(self, stage: stages.BackfillStage, plan: EvaluatablePla allow_additive_snapshots=plan.allow_additive_models, selected_snapshot_ids=stage.selected_snapshot_ids, selected_models=plan.selected_models, + is_restatement=bool(plan.restatements), ) if errors: raise PlanError("Plan application failed.") @@ -287,34 +288,78 @@ def visit_audit_only_run_stage( def visit_restatement_stage( self, stage: stages.RestatementStage, plan: EvaluatablePlan ) -> None: - snapshot_intervals_to_restate = { - (s.id_and_version, i) for s, i in stage.snapshot_intervals.items() - } - - # Restating intervals on prod plans should mean that the intervals are cleared across - # all environments, not just the version currently in prod - # This ensures that work done in dev environments can still be promoted to prod - # by forcing dev environments to re-run intervals that changed in prod + # Restating intervals on prod plans means that once the data for the intervals being restated has been backfilled + # (which happens in the backfill stage) then we need to clear those intervals *from state* across all other environments. + # + # This ensures that work done in dev environments can still be promoted to prod by forcing dev environments to + # re-run intervals that changed in prod (because after this stage runs they are cleared from state and thus show as missing) + # + # It also means that any new dev environments created while this restatement plan was running also get the + # correct intervals cleared because we look up matching snapshots as at right now and not as at the time the plan + # was created, which could have been several hours ago if there was a lot of data to restate. # # Without this rule, its possible that promoting a dev table to prod will introduce old data to prod - snapshot_intervals_to_restate.update( - { - (s.snapshot, s.interval) - for s in identify_restatement_intervals_across_snapshot_versions( - state_reader=self.state_sync, - prod_restatements=plan.restatements, - disable_restatement_models=plan.disabled_restatement_models, - loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()}, - current_ts=to_timestamp(plan.execution_time or now()), - ).values() - } - ) - self.state_sync.remove_intervals( - snapshot_intervals=list(snapshot_intervals_to_restate), - remove_shared_versions=plan.is_prod, + intervals_to_clear = identify_restatement_intervals_across_snapshot_versions( + state_reader=self.state_sync, + prod_restatements=plan.restatements, + disable_restatement_models=plan.disabled_restatement_models, + loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()}, + current_ts=to_timestamp(plan.execution_time or now()), ) + if not intervals_to_clear: + # Nothing to do + return + + # While the restatements were being processed, did any of the snapshots being restated get new versions deployed? + # If they did, they will not reflect the data that just got restated, so we need to notify the user + deployed_during_restatement: t.Dict[ + str, t.Tuple[SnapshotTableInfo, SnapshotTableInfo] + ] = {} # tuple of (restated_snapshot, current_prod_snapshot) + + if deployed_env := self.state_sync.get_environment(plan.environment.name): + promoted_snapshots_by_name = {s.name: s for s in deployed_env.snapshots} + + for name in plan.restatements: + snapshot = stage.all_snapshots[name] + version = snapshot.table_info.version + if ( + prod_snapshot := promoted_snapshots_by_name.get(name) + ) and prod_snapshot.version != version: + deployed_during_restatement[name] = ( + snapshot.table_info, + prod_snapshot.table_info, + ) + + # we need to *not* clear the intervals on the snapshots where new versions were deployed while the restatement was running in order to prevent + # subsequent plans from having unexpected intervals to backfill. + # we instead list the affected models and abort the plan with an error so the user can decide what to do + # (either re-attempt the restatement plan or leave things as they are) + filtered_intervals_to_clear = [ + (s.snapshot, s.interval) + for s in intervals_to_clear.values() + if s.snapshot.name not in deployed_during_restatement + ] + + if filtered_intervals_to_clear: + # We still clear intervals in other envs for models that were successfully restated without having new versions promoted during restatement + self.state_sync.remove_intervals( + snapshot_intervals=filtered_intervals_to_clear, + remove_shared_versions=plan.is_prod, + ) + + if deployed_env and deployed_during_restatement: + self.console.log_models_updated_during_restatement( + list(deployed_during_restatement.values()), + plan.environment.naming_info, + self.default_catalog, + ) + raise ConflictingPlanError( + f"Another plan ({deployed_env.summary.plan_id}) deployed new versions of {len(deployed_during_restatement)} models in the target environment '{plan.environment.name}' while they were being restated by this plan.\n" + "Please re-apply your plan if these new versions should be restated." + ) + def visit_environment_record_update_stage( self, stage: stages.EnvironmentRecordUpdateStage, plan: EvaluatablePlan ) -> None: diff --git a/sqlmesh/core/plan/explainer.py b/sqlmesh/core/plan/explainer.py index ee829aeac1..f0a1e44aff 100644 --- a/sqlmesh/core/plan/explainer.py +++ b/sqlmesh/core/plan/explainer.py @@ -1,6 +1,10 @@ +from __future__ import annotations + import abc import typing as t import logging +from dataclasses import dataclass +from collections import defaultdict from rich.console import Console as RichConsole from rich.tree import Tree @@ -8,6 +12,10 @@ from sqlmesh.core import constants as c from sqlmesh.core.console import Console, TerminalConsole, get_console from sqlmesh.core.environment import EnvironmentNamingInfo +from sqlmesh.core.plan.common import ( + SnapshotIntervalClearRequest, + identify_restatement_intervals_across_snapshot_versions, +) from sqlmesh.core.plan.definition import EvaluatablePlan, SnapshotIntervals from sqlmesh.core.plan import stages from sqlmesh.core.plan.evaluator import ( @@ -16,6 +24,8 @@ from sqlmesh.core.state_sync import StateReader from sqlmesh.core.snapshot.definition import ( SnapshotInfoMixin, + SnapshotIdAndVersion, + model_display_name, ) from sqlmesh.utils import Verbosity, rich as srich, to_snake_case from sqlmesh.utils.date import to_ts @@ -45,6 +55,15 @@ def evaluate( explainer_console = _get_explainer_console( self.console, plan.environment, self.default_catalog ) + + # add extra metadata that's only needed at this point for better --explain output + plan_stages = [ + ExplainableRestatementStage.from_restatement_stage(stage, self.state_reader, plan) + if isinstance(stage, stages.RestatementStage) + else stage + for stage in plan_stages + ] + explainer_console.explain(plan_stages) @@ -54,6 +73,41 @@ def explain(self, stages: t.List[stages.PlanStage]) -> None: pass +@dataclass +class ExplainableRestatementStage(stages.RestatementStage): + """ + This brings forward some calculations that would usually be done in the evaluator so the user can be given a better indication + of what might happen when they ask for the plan to be explained + """ + + snapshot_intervals_to_clear: t.Dict[str, t.List[SnapshotIntervalClearRequest]] + """Which snapshots from other environments would have intervals cleared as part of restatement, grouped by name.""" + + @classmethod + def from_restatement_stage( + cls: t.Type[ExplainableRestatementStage], + stage: stages.RestatementStage, + state_reader: StateReader, + plan: EvaluatablePlan, + ) -> ExplainableRestatementStage: + all_restatement_intervals = identify_restatement_intervals_across_snapshot_versions( + state_reader=state_reader, + prod_restatements=plan.restatements, + disable_restatement_models=plan.disabled_restatement_models, + loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()}, + ) + + # Group the interval clear requests by snapshot name to make them easier to write to the console + snapshot_intervals_to_clear = defaultdict(list) + for clear_request in all_restatement_intervals.values(): + snapshot_intervals_to_clear[clear_request.snapshot.name].append(clear_request) + + return cls( + snapshot_intervals_to_clear=snapshot_intervals_to_clear, + all_snapshots=stage.all_snapshots, + ) + + MAX_TREE_LENGTH = 10 @@ -146,11 +200,37 @@ def visit_audit_only_run_stage(self, stage: stages.AuditOnlyRunStage) -> Tree: tree.add(display_name) return tree - def visit_restatement_stage(self, stage: stages.RestatementStage) -> Tree: - tree = Tree("[bold]Invalidate data intervals as part of restatement[/bold]") - for snapshot_table_info, interval in stage.snapshot_intervals.items(): - display_name = self._display_name(snapshot_table_info) - tree.add(f"{display_name} [{to_ts(interval[0])} - {to_ts(interval[1])}]") + def visit_explainable_restatement_stage(self, stage: ExplainableRestatementStage) -> Tree: + return self.visit_restatement_stage(stage) + + def visit_restatement_stage( + self, stage: t.Union[ExplainableRestatementStage, stages.RestatementStage] + ) -> Tree: + tree = Tree( + "[bold]Invalidate data intervals in state for development environments to prevent old data from being promoted[/bold]\n" + "This only affects state and will not clear physical data from the tables until the next plan for each environment" + ) + + if isinstance(stage, ExplainableRestatementStage) and ( + snapshot_intervals := stage.snapshot_intervals_to_clear + ): + for name, clear_requests in snapshot_intervals.items(): + display_name = model_display_name( + name, self.environment_naming_info, self.default_catalog, self.dialect + ) + interval_start = min(cr.interval[0] for cr in clear_requests) + interval_end = max(cr.interval[1] for cr in clear_requests) + + if not interval_start or not interval_end: + continue + + node = tree.add(f"{display_name} [{to_ts(interval_start)} - {to_ts(interval_end)}]") + + all_environment_names = sorted( + set(env_name for cr in clear_requests for env_name in cr.environment_names) + ) + node.add("in environments: " + ", ".join(all_environment_names)) + return tree def visit_backfill_stage(self, stage: stages.BackfillStage) -> Tree: @@ -265,12 +345,14 @@ def visit_finalize_environment_stage( def _display_name( self, - snapshot: SnapshotInfoMixin, + snapshot: t.Union[SnapshotInfoMixin, SnapshotIdAndVersion], environment_naming_info: t.Optional[EnvironmentNamingInfo] = None, ) -> str: return snapshot.display_name( - environment_naming_info or self.environment_naming_info, - self.default_catalog if self.verbosity < Verbosity.VERY_VERBOSE else None, + environment_naming_info=environment_naming_info or self.environment_naming_info, + default_catalog=self.default_catalog + if self.verbosity < Verbosity.VERY_VERBOSE + else None, dialect=self.dialect, ) diff --git a/sqlmesh/core/plan/stages.py b/sqlmesh/core/plan/stages.py index 91c8c6ff14..729e1705b4 100644 --- a/sqlmesh/core/plan/stages.py +++ b/sqlmesh/core/plan/stages.py @@ -12,8 +12,9 @@ Snapshot, SnapshotTableInfo, SnapshotId, - Interval, + snapshots_to_dag, ) +from sqlmesh.utils.errors import PlanError @dataclass @@ -98,14 +99,19 @@ class AuditOnlyRunStage: @dataclass class RestatementStage: - """Restate intervals for given snapshots. + """Clear intervals from state for snapshots in *other* environments, when restatements are requested in prod. + + This stage is effectively a "marker" stage to trigger the plan evaluator to perform the "clear intervals" logic after the BackfillStage has completed. + The "clear intervals" logic is executed just-in-time using the latest state available in order to pick up new snapshots that may have + been created while the BackfillStage was running, which is why we do not build a list of snapshots to clear at plan time and defer to evaluation time. + + Note that this stage is only present on `prod` plans because dev plans do not need to worry about clearing intervals in other environments. Args: - snapshot_intervals: Intervals to restate. - all_snapshots: All snapshots in the plan by name. + all_snapshots: All snapshots in the plan by name. Note that this does not include the snapshots from other environments that will get their + intervals cleared, it's included here as an optimization to prevent having to re-fetch the current plan's snapshots """ - snapshot_intervals: t.Dict[SnapshotTableInfo, Interval] all_snapshots: t.Dict[str, Snapshot] @@ -244,6 +250,7 @@ def build(self, plan: EvaluatablePlan) -> t.List[PlanStage]: stored_snapshots = self.state_reader.get_snapshots(plan.environment.snapshots) snapshots = {**new_snapshots, **stored_snapshots} snapshots_by_name = {s.name: s for s in snapshots.values()} + dag = snapshots_to_dag(snapshots.values()) all_selected_for_backfill_snapshots = { s.snapshot_id for s in snapshots.values() if plan.is_selected_for_backfill(s.name) @@ -261,14 +268,21 @@ def build(self, plan: EvaluatablePlan) -> t.List[PlanStage]: before_promote_snapshots = { s.snapshot_id for s in snapshots.values() - if deployability_index.is_representative(s) + if (deployability_index.is_representative(s) or s.is_seed) and plan.is_selected_for_backfill(s.name) } after_promote_snapshots = all_selected_for_backfill_snapshots - before_promote_snapshots deployability_index = DeployabilityIndex.all_deployable() + snapshot_ids_with_schema_migration = [ + s.snapshot_id for s in snapshots.values() if s.requires_schema_migration_in_prod + ] + # Include all upstream dependencies of snapshots that require schema migration to make sure + # the upstream tables are created before the schema updates are applied snapshots_with_schema_migration = [ - s for s in snapshots.values() if s.requires_schema_migration_in_prod + snapshots[s_id] + for s_id in dag.subdag(*snapshot_ids_with_schema_migration) + if snapshots[s_id].supports_schema_migration_in_prod ] snapshots_to_intervals = self._missing_intervals( @@ -321,10 +335,6 @@ def build(self, plan: EvaluatablePlan) -> t.List[PlanStage]: if audit_only_snapshots: stages.append(AuditOnlyRunStage(snapshots=list(audit_only_snapshots.values()))) - restatement_stage = self._get_restatement_stage(plan, snapshots_by_name) - if restatement_stage: - stages.append(restatement_stage) - if missing_intervals_before_promote: stages.append( BackfillStage( @@ -349,6 +359,15 @@ def build(self, plan: EvaluatablePlan) -> t.List[PlanStage]: ) ) + # note: "restatement stage" (which is clearing intervals in state - not actually performing the restatements, that's the backfill stage) + # needs to come *after* the backfill stage so that at no time do other plans / runs see empty prod intervals and compete with this plan to try to fill them. + # in addition, when we update intervals in state, we only clear intervals from dev snapshots to force dev models to be backfilled based on the new prod data. + # we can leave prod intervals alone because by the time this plan finishes, the intervals in state have not actually changed, since restatement replaces + # data for existing intervals and does not produce new ones + restatement_stage = self._get_restatement_stage(plan, snapshots_by_name) + if restatement_stage: + stages.append(restatement_stage) + stages.append( EnvironmentRecordUpdateStage( no_gaps_snapshot_names={s.name for s in before_promote_snapshots} @@ -443,16 +462,18 @@ def _get_after_all_stage( def _get_restatement_stage( self, plan: EvaluatablePlan, snapshots_by_name: t.Dict[str, Snapshot] ) -> t.Optional[RestatementStage]: - snapshot_intervals_to_restate = {} - for name, interval in plan.restatements.items(): - restated_snapshot = snapshots_by_name[name] - restated_snapshot.remove_interval(interval) - snapshot_intervals_to_restate[restated_snapshot.table_info] = interval - if not snapshot_intervals_to_restate or plan.is_dev: - return None - return RestatementStage( - snapshot_intervals=snapshot_intervals_to_restate, all_snapshots=snapshots_by_name - ) + if plan.restate_all_snapshots: + if plan.is_dev: + raise PlanError( + "Clearing intervals from state across dev model versions is only valid for prod plans" + ) + + if plan.restatements: + return RestatementStage( + all_snapshots=snapshots_by_name, + ) + + return None def _get_physical_layer_update_stage( self, diff --git a/sqlmesh/core/reference.py b/sqlmesh/core/reference.py index 2bf2c04e98..9e93ce7b38 100644 --- a/sqlmesh/core/reference.py +++ b/sqlmesh/core/reference.py @@ -14,7 +14,7 @@ class Reference(PydanticModel, frozen=True): model_name: str - expression: exp.Expression + expression: exp.Expr unique: bool = False _name: str = "" diff --git a/sqlmesh/core/renderer.py b/sqlmesh/core/renderer.py index 18377e0258..7683956064 100644 --- a/sqlmesh/core/renderer.py +++ b/sqlmesh/core/renderer.py @@ -6,7 +6,7 @@ from functools import partial from pathlib import Path -from sqlglot import exp, parse +from sqlglot import exp, Dialect from sqlglot.errors import SqlglotError from sqlglot.helper import ensure_list from sqlglot.optimizer.annotate_types import annotate_types @@ -48,7 +48,7 @@ class BaseExpressionRenderer: def __init__( self, - expression: exp.Expression, + expression: exp.Expr, dialect: DialectType, macro_definitions: t.List[d.MacroDef], path: t.Optional[Path] = None, @@ -73,7 +73,7 @@ def __init__( self._normalize_identifiers = normalize_identifiers self._quote_identifiers = quote_identifiers self.update_schema({} if schema is None else schema) - self._cache: t.List[t.Optional[exp.Expression]] = [] + self._cache: t.List[t.Optional[exp.Expr]] = [] self._model_fqn = model.fqn if model else None self._optimize_query_flag = optimize_query is not False self._model = model @@ -91,7 +91,7 @@ def _render( deployability_index: t.Optional[DeployabilityIndex] = None, runtime_stage: RuntimeStage = RuntimeStage.LOADING, **kwargs: t.Any, - ) -> t.List[t.Optional[exp.Expression]]: + ) -> t.List[t.Optional[exp.Expr]]: """Renders a expression, expanding macros with provided kwargs Args: @@ -196,9 +196,16 @@ def _resolve_table(table: str | exp.Table) -> str: **kwargs, } + if this_model: + render_kwargs["this_model"] = this_model + + macro_evaluator.locals.update(render_kwargs) + variables = kwargs.pop("variables", {}) + if variables: + macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables) - expressions = [self._expression] + expressions: t.List[exp.Expr] = [self._expression] if isinstance(self._expression, d.Jinja): try: jinja_env_kwargs = { @@ -249,23 +256,24 @@ def _resolve_table(table: str | exp.Table) -> str: ) from ex if rendered_expression.strip(): - try: - expressions = [e for e in parse(rendered_expression, read=self._dialect) if e] - - if not expressions: - raise ConfigError(f"Failed to parse an expression:\n{self._expression}") - except Exception as ex: - raise ConfigError( - f"Could not parse the rendered jinja at '{self._path}'.\n{ex}" - ) from ex - - if this_model: - render_kwargs["this_model"] = this_model - - macro_evaluator.locals.update(render_kwargs) - - if variables: - macro_evaluator.locals.setdefault(c.SQLMESH_VARS, {}).update(variables) + # ensure there is actual SQL and not just comments and non-SQL jinja + dialect = Dialect.get_or_raise(self._dialect) + tokens = dialect.tokenize(rendered_expression) + + if tokens: + try: + expressions = [ + e for e in dialect.parser().parse(tokens, rendered_expression) if e + ] + + if not expressions: + raise ConfigError( + f"Failed to parse an expression:\n{rendered_expression}" + ) + except Exception as ex: + raise ConfigError( + f"Could not parse the rendered jinja at '{self._path}'.\n{ex}" + ) from ex for definition in self._macro_definitions: try: @@ -275,7 +283,7 @@ def _resolve_table(table: str | exp.Table) -> str: f"Failed to evaluate macro '{definition}'.\n\n{ex}\n", self._path ) - resolved_expressions: t.List[t.Optional[exp.Expression]] = [] + resolved_expressions: t.List[t.Optional[exp.Expr]] = [] for expression in expressions: try: @@ -286,7 +294,7 @@ def _resolve_table(table: str | exp.Table) -> str: self._path, ) - for expression in t.cast(t.List[exp.Expression], transformed_expressions): + for expression in t.cast(t.List[exp.Expr], transformed_expressions): with self._normalize_and_quote(expression) as expression: if hasattr(expression, "selects"): for select in expression.selects: @@ -312,12 +320,12 @@ def _resolve_table(table: str | exp.Table) -> str: self._cache = resolved_expressions return resolved_expressions - def update_cache(self, expression: t.Optional[exp.Expression]) -> None: + def update_cache(self, expression: t.Optional[exp.Expr]) -> None: self._cache = [expression] def _resolve_table( self, - table_name: str | exp.Expression, + table_name: str | exp.Expr, snapshots: t.Optional[t.Dict[str, Snapshot]] = None, table_mapping: t.Optional[t.Dict[str, str]] = None, deployability_index: t.Optional[DeployabilityIndex] = None, @@ -372,7 +380,7 @@ def _resolve_tables( if snapshot.is_model } - def _expand(node: exp.Expression) -> exp.Expression: + def _expand(node: exp.Expr) -> exp.Expr: if isinstance(node, exp.Table) and snapshots: name = exp.table_name(node, identify=True) model = model_mapping.get(name) @@ -441,7 +449,7 @@ def render( deployability_index: t.Optional[DeployabilityIndex] = None, expand: t.Iterable[str] = tuple(), **kwargs: t.Any, - ) -> t.Optional[t.List[exp.Expression]]: + ) -> t.Optional[t.List[exp.Expr]]: try: expressions = super()._render( start=start, @@ -623,7 +631,7 @@ def render( def update_cache( self, - expression: t.Optional[exp.Expression], + expression: t.Optional[exp.Expr], violated_rules: t.Optional[t.Dict[type[Rule], t.Any]] = None, optimized: bool = False, ) -> None: @@ -682,7 +690,7 @@ def _optimize_query(self, query: exp.Query, all_deps: t.Set[str]) -> exp.Query: except Exception as ex: raise_config_error( - f"Failed to optimize query, please file an issue at https://github.com/TobikoData/sqlmesh/issues/new. {ex}", + f"Failed to optimize query, please file an issue at https://github.com/SQLMesh/sqlmesh/issues/new. {ex}", self._path, ) diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py index fd2e1cf004..5eb0ff40ff 100644 --- a/sqlmesh/core/scheduler.py +++ b/sqlmesh/core/scheduler.py @@ -251,7 +251,9 @@ def evaluate( **kwargs, ) - self.state_sync.add_interval(snapshot, start, end, is_dev=not is_deployable) + self.state_sync.add_interval( + snapshot, start, end, is_dev=not is_deployable, last_altered_ts=now_timestamp() + ) return audit_results def run( @@ -335,6 +337,7 @@ def batch_intervals( deployability_index: t.Optional[DeployabilityIndex], environment_naming_info: EnvironmentNamingInfo, dag: t.Optional[DAG[SnapshotId]] = None, + is_restatement: bool = False, ) -> t.Dict[Snapshot, Intervals]: dag = dag or snapshots_to_dag(merged_intervals) @@ -349,7 +352,7 @@ def batch_intervals( ) for snapshot, intervals in merged_intervals.items() } - snapshot_batches = {} + snapshot_batches: t.Dict[Snapshot, Intervals] = {} all_unready_intervals: t.Dict[str, set[Interval]] = {} for snapshot_id in dag: if snapshot_id not in snapshot_intervals: @@ -361,12 +364,22 @@ def batch_intervals( adapter = self.snapshot_evaluator.get_adapter(snapshot.model_gateway) + parent_intervals: Intervals = [] + for parent_id in snapshot.parents: + parent_snapshot, _ = snapshot_intervals.get(parent_id, (None, [])) + if not parent_snapshot or parent_snapshot.is_external: + continue + + parent_intervals.extend(snapshot_batches[parent_snapshot]) + context = ExecutionContext( adapter, self.snapshots_by_name, deployability_index, default_dialect=adapter.dialect, default_catalog=self.default_catalog, + is_restatement=is_restatement, + parent_intervals=parent_intervals, ) intervals = self._check_ready_intervals( @@ -422,6 +435,7 @@ def run_merged_intervals( run_environment_statements: bool = False, audit_only: bool = False, auto_restatement_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {}, + is_restatement: bool = False, ) -> t.Tuple[t.List[NodeExecutionFailedError[SchedulingUnit]], t.List[SchedulingUnit]]: """Runs precomputed batches of missing intervals. @@ -455,9 +469,12 @@ def run_merged_intervals( snapshot_dag = full_dag.subdag(*selected_snapshot_ids_set) batched_intervals = self.batch_intervals( - merged_intervals, deployability_index, environment_naming_info, dag=snapshot_dag + merged_intervals, + deployability_index, + environment_naming_info, + dag=snapshot_dag, + is_restatement=is_restatement, ) - self.console.start_evaluation_progress( batched_intervals, environment_naming_info, @@ -530,6 +547,10 @@ def run_node(node: SchedulingUnit) -> None: execution_time=execution_time, ) else: + # If batch_index > 0, then the target table must exist since the first batch would have created it + target_table_exists = ( + snapshot.snapshot_id not in snapshots_to_create or node.batch_index > 0 + ) audit_results = self.evaluate( snapshot=snapshot, environment_naming_info=environment_naming_info, @@ -540,7 +561,7 @@ def run_node(node: SchedulingUnit) -> None: batch_index=node.batch_index, allow_destructive_snapshots=allow_destructive_snapshots, allow_additive_snapshots=allow_additive_snapshots, - target_table_exists=snapshot.snapshot_id not in snapshots_to_create, + target_table_exists=target_table_exists, selected_models=selected_models, ) @@ -638,6 +659,7 @@ def _dag( } snapshots_to_create = snapshots_to_create or set() original_snapshots_to_create = snapshots_to_create.copy() + upstream_dependencies_cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]] = {} snapshot_dag = snapshot_dag or snapshots_to_dag(batches) dag = DAG[SchedulingUnit]() @@ -649,12 +671,15 @@ def _dag( snapshot = self.snapshots_by_name[snapshot_id.name] intervals = intervals_per_snapshot.get(snapshot.name, []) - upstream_dependencies: t.List[SchedulingUnit] = [] + upstream_dependencies: t.Set[SchedulingUnit] = set() for p_sid in snapshot.parents: - upstream_dependencies.extend( + upstream_dependencies.update( self._find_upstream_dependencies( - p_sid, intervals_per_snapshot, original_snapshots_to_create + p_sid, + intervals_per_snapshot, + original_snapshots_to_create, + upstream_dependencies_cache, ) ) @@ -705,29 +730,42 @@ def _find_upstream_dependencies( parent_sid: SnapshotId, intervals_per_snapshot: t.Dict[str, Intervals], snapshots_to_create: t.Set[SnapshotId], - ) -> t.List[SchedulingUnit]: + cache: t.Dict[SnapshotId, t.Set[SchedulingUnit]], + ) -> t.Set[SchedulingUnit]: if parent_sid not in self.snapshots: - return [] + return set() + if parent_sid in cache: + return cache[parent_sid] p_intervals = intervals_per_snapshot.get(parent_sid.name, []) + parent_node: t.Optional[SchedulingUnit] = None if p_intervals: if len(p_intervals) > 1: - return [DummyNode(snapshot_name=parent_sid.name)] - interval = p_intervals[0] - return [EvaluateNode(snapshot_name=parent_sid.name, interval=interval, batch_index=0)] - if parent_sid in snapshots_to_create: - return [CreateNode(snapshot_name=parent_sid.name)] + parent_node = DummyNode(snapshot_name=parent_sid.name) + else: + interval = p_intervals[0] + parent_node = EvaluateNode( + snapshot_name=parent_sid.name, interval=interval, batch_index=0 + ) + elif parent_sid in snapshots_to_create: + parent_node = CreateNode(snapshot_name=parent_sid.name) + + if parent_node is not None: + cache[parent_sid] = {parent_node} + return {parent_node} + # This snapshot has no intervals and doesn't need creation which means # that it can be a transitive dependency - transitive_deps: t.List[SchedulingUnit] = [] + transitive_deps: t.Set[SchedulingUnit] = set() parent_snapshot = self.snapshots[parent_sid] for grandparent_sid in parent_snapshot.parents: - transitive_deps.extend( + transitive_deps.update( self._find_upstream_dependencies( - grandparent_sid, intervals_per_snapshot, snapshots_to_create + grandparent_sid, intervals_per_snapshot, snapshots_to_create, cache ) ) + cache[parent_sid] = transitive_deps return transitive_deps def _run_or_audit( @@ -839,7 +877,9 @@ def _run_or_audit( run_environment_statements=run_environment_statements, audit_only=audit_only, auto_restatement_triggers=auto_restatement_triggers, - selected_models={s.node.dbt_name for s in merged_intervals if s.node.dbt_name}, + selected_models={ + s.node.dbt_unique_id for s in merged_intervals if s.node.dbt_unique_id + }, ) return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS @@ -954,6 +994,7 @@ def _check_ready_intervals( python_env=signals.python_env, dialect=snapshot.model.dialect, path=snapshot.model._path, + snapshot=snapshot, kwargs=kwargs, ) except SQLMeshError as e: diff --git a/sqlmesh/core/schema_diff.py b/sqlmesh/core/schema_diff.py index e1f9d72a6c..ecf38b18a8 100644 --- a/sqlmesh/core/schema_diff.py +++ b/sqlmesh/core/schema_diff.py @@ -37,7 +37,7 @@ def is_additive(self) -> bool: @property @abc.abstractmethod - def _alter_actions(self) -> t.List[exp.Expression]: + def _alter_actions(self) -> t.List[exp.Expr]: pass @property @@ -104,7 +104,7 @@ def is_destructive(self) -> bool: return self.is_part_of_destructive_change @property - def _alter_actions(self) -> t.List[exp.Expression]: + def _alter_actions(self) -> t.List[exp.Expr]: column_def = exp.ColumnDef( this=self.column, kind=self.column_type, @@ -127,7 +127,7 @@ def is_destructive(self) -> bool: return True @property - def _alter_actions(self) -> t.List[exp.Expression]: + def _alter_actions(self) -> t.List[exp.Expr]: return [exp.Drop(this=self.column, kind="COLUMN", cascade=self.cascade)] @@ -145,7 +145,7 @@ def is_destructive(self) -> bool: return self.is_part_of_destructive_change @property - def _alter_actions(self) -> t.List[exp.Expression]: + def _alter_actions(self) -> t.List[exp.Expr]: return [ exp.AlterColumn( this=self.column, @@ -363,14 +363,12 @@ class SchemaDiffer(PydanticModel): coerceable_types_: t.Dict[exp.DataType, t.Set[exp.DataType]] = Field( default_factory=dict, alias="coerceable_types" ) - precision_increase_allowed_types: t.Optional[t.Set[exp.DataType.Type]] = None + precision_increase_allowed_types: t.Optional[t.Set[exp.DType]] = None support_coercing_compatible_types: bool = False drop_cascade: bool = False - parameterized_type_defaults: t.Dict[ - exp.DataType.Type, t.List[t.Tuple[t.Union[int, float], ...]] - ] = {} - max_parameter_length: t.Dict[exp.DataType.Type, t.Union[int, float]] = {} - types_with_unlimited_length: t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]] = {} + parameterized_type_defaults: t.Dict[exp.DType, t.List[t.Tuple[t.Union[int, float], ...]]] = {} + max_parameter_length: t.Dict[exp.DType, t.Union[int, float]] = {} + types_with_unlimited_length: t.Dict[exp.DType, t.Set[exp.DType]] = {} treat_alter_data_type_as_destructive: bool = False _coerceable_types: t.Dict[exp.DataType, t.Set[exp.DataType]] = {} diff --git a/sqlmesh/core/selector.py b/sqlmesh/core/selector.py index c44065bdc0..9eaf4995c8 100644 --- a/sqlmesh/core/selector.py +++ b/sqlmesh/core/selector.py @@ -3,6 +3,8 @@ import fnmatch import typing as t from pathlib import Path +from itertools import zip_longest +import abc from sqlglot import exp from sqlglot.errors import ParseError @@ -14,6 +16,7 @@ from sqlmesh.core.dialect import normalize_model_name from sqlmesh.core.environment import Environment from sqlmesh.core.model import update_model_schemas +from sqlmesh.core.audit import StandaloneAudit from sqlmesh.utils import UniqueKeyDict from sqlmesh.utils.dag import DAG from sqlmesh.utils.git import GitClient @@ -23,10 +26,11 @@ if t.TYPE_CHECKING: from typing_extensions import Literal as Lit # noqa from sqlmesh.core.model import Model + from sqlmesh.core.node import Node from sqlmesh.core.state_sync import StateReader -class Selector: +class Selector(abc.ABC): def __init__( self, state_reader: StateReader, @@ -165,20 +169,20 @@ def get_model(fqn: str) -> t.Optional[Model]: return models def expand_model_selections( - self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Model]] = None + self, model_selections: t.Iterable[str], models: t.Optional[t.Dict[str, Node]] = None ) -> t.Set[str]: - """Expands a set of model selections into a set of model names. + """Expands a set of model selections into a set of model fqns that can be looked up in the Context. Args: model_selections: A set of model selections. Returns: - A set of model names. + A set of model fqns. """ node = parse(" | ".join(f"({s})" for s in model_selections)) - all_models = models or self._models + all_models: t.Dict[str, Node] = models or dict(self._models) models_by_tags: t.Dict[str, t.Set[str]] = {} for fqn, model in all_models.items(): @@ -187,17 +191,16 @@ def expand_model_selections( models_by_tags.setdefault(tag, set()) models_by_tags[tag].add(model.fqn) - def evaluate(node: exp.Expression) -> t.Set[str]: + def evaluate(node: exp.Expr) -> t.Set[str]: if isinstance(node, exp.Var): pattern = node.this if "*" in pattern: return { fqn for fqn, model in all_models.items() - if fnmatch.fnmatchcase(model.name, node.this) + if fnmatch.fnmatchcase(self._model_name(model), node.this) } - fqn = normalize_model_name(pattern, self._default_catalog, self._dialect) - return {fqn} if fqn in all_models else set() + return self._pattern_to_model_fqns(pattern, all_models) if isinstance(node, exp.And): return evaluate(node.left) & evaluate(node.right) if isinstance(node, exp.Or): @@ -225,6 +228,13 @@ def evaluate(node: exp.Expression) -> t.Set[str]: if fnmatch.fnmatchcase(tag, pattern) } return models_by_tags.get(pattern, set()) + if isinstance(node, ResourceType): + resource_type = node.name.lower() + return { + fqn + for fqn, model in all_models.items() + if self._matches_resource_type(resource_type, model) + } if isinstance(node, Direction): selected = set() @@ -241,6 +251,117 @@ def evaluate(node: exp.Expression) -> t.Set[str]: return evaluate(node) + @abc.abstractmethod + def _model_name(self, model: Node) -> str: + """Given a model, return the name that a selector pattern contining wildcards should be fnmatch'd on""" + pass + + @abc.abstractmethod + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]: + """Given a pattern, return the keys of the matching models from :all_models""" + pass + + @abc.abstractmethod + def _matches_resource_type(self, resource_type: str, model: Node) -> bool: + """Indicate whether or not the supplied model matches the supplied resource type""" + pass + + +class NativeSelector(Selector): + """Implementation of selectors that matches objects based on SQLMesh native names""" + + def _model_name(self, model: Node) -> str: + return model.name + + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]: + fqn = normalize_model_name(pattern, self._default_catalog, self._dialect) + return {fqn} if fqn in all_models else set() + + def _matches_resource_type(self, resource_type: str, model: Node) -> bool: + if resource_type == "model": + return model.is_model + if resource_type == "audit": + return isinstance(model, StandaloneAudit) + + raise SQLMeshError(f"Unsupported resource type: {resource_type}") + + +class DbtSelector(Selector): + """Implementation of selectors that matches objects based on the DBT names instead of the SQLMesh native names""" + + def _model_name(self, model: Node) -> str: + if dbt_fqn := model.dbt_fqn: + return dbt_fqn + raise SQLMeshError("dbt node information must be populated to use dbt selectors") + + def _pattern_to_model_fqns(self, pattern: str, all_models: t.Dict[str, Node]) -> t.Set[str]: + # a pattern like "staging.customers" should match a model called "jaffle_shop.staging.customers" + # but not a model called "jaffle_shop.customers.staging" + # also a pattern like "aging" should not match "staging" so we need to consider components; not substrings + pattern_components = pattern.split(".") + first_pattern_component = pattern_components[0] + matches = set() + for fqn, model in all_models.items(): + if not model.dbt_fqn: + continue + + dbt_fqn_components = model.dbt_fqn.split(".") + try: + starting_idx = dbt_fqn_components.index(first_pattern_component) + except ValueError: + continue + for pattern_component, fqn_component in zip_longest( + pattern_components, dbt_fqn_components[starting_idx:] + ): + if pattern_component and not fqn_component: + # the pattern still goes but we have run out of fqn components to match; no match + break + if fqn_component and not pattern_component: + # all elements of the pattern have matched elements of the fqn; match + matches.add(fqn) + break + if pattern_component != fqn_component: + # the pattern explicitly doesnt match a component; no match + break + else: + # called if no explicit break, indicating all components of the pattern matched all components of the fqn + matches.add(fqn) + return matches + + def _matches_resource_type(self, resource_type: str, model: Node) -> bool: + """ + ref: https://docs.getdbt.com/reference/node-selection/methods#resource_type + + # supported by SQLMesh + "model" + "seed" + "source" # external model + "test" # standalone audit + + # not supported by SQLMesh yet, commented out to throw an error if someone tries to use them + "analysis" + "exposure" + "metric" + "saved_query" + "semantic_model" + "snapshot" + "unit_test" + """ + if resource_type not in ("model", "seed", "source", "test"): + raise SQLMeshError(f"Unsupported resource type: {resource_type}") + + if isinstance(model, StandaloneAudit): + return resource_type == "test" + + if resource_type == "model": + return model.is_model and not model.kind.is_external and not model.kind.is_seed + if resource_type == "source": + return model.kind.is_external + if resource_type == "seed": + return model.kind.is_seed + + return False + class SelectorDialect(Dialect): IDENTIFIERS_CAN_START_WITH_DIGIT = True @@ -271,11 +392,15 @@ class Tag(exp.Expression): pass +class ResourceType(exp.Expression): + pass + + class Direction(exp.Expression): pass -def parse(selector: str, dialect: DialectType = None) -> exp.Expression: +def parse(selector: str, dialect: DialectType = None) -> exp.Expr: tokens = SelectorDialect().tokenize(selector) i = 0 @@ -319,11 +444,12 @@ def _parse_kind(kind: str) -> bool: return True return False - def _parse_var() -> exp.Expression: + def _parse_var() -> exp.Expr: upstream = _match(TokenType.PLUS) downstream = None tag = _parse_kind("tag") - git = False if tag else _parse_kind("git") + resource_type = False if tag else _parse_kind("resource_type") + git = False if resource_type else _parse_kind("git") lstar = "*" if _match(TokenType.STAR) else "" directions = {} @@ -331,7 +457,7 @@ def _parse_var() -> exp.Expression: name = _prev().text rstar = "*" if _match(TokenType.STAR) else "" downstream = _match(TokenType.PLUS) - this: exp.Expression = exp.Var(this=f"{lstar}{name}{rstar}") + this: exp.Expr = exp.Var(this=f"{lstar}{name}{rstar}") elif _match(TokenType.L_PAREN): this = exp.Paren(this=_parse_conjunction()) @@ -349,18 +475,20 @@ def _parse_var() -> exp.Expression: if tag: this = Tag(this=this) + if resource_type: + this = ResourceType(this=this) if git: this = Git(this=this) if directions: this = Direction(this=this, **directions) return this - def _parse_unary() -> exp.Expression: + def _parse_unary() -> exp.Expr: if _match(TokenType.CARET): return exp.Not(this=_parse_unary()) return _parse_var() - def _parse_conjunction() -> exp.Expression: + def _parse_conjunction() -> exp.Expr: this = _parse_unary() if _match(TokenType.AMP): diff --git a/sqlmesh/core/signal.py b/sqlmesh/core/signal.py index d9ee670922..554dd60a39 100644 --- a/sqlmesh/core/signal.py +++ b/sqlmesh/core/signal.py @@ -1,7 +1,14 @@ from __future__ import annotations - +import typing as t from sqlmesh.utils import UniqueKeyDict, registry_decorator +from sqlmesh.utils.errors import MissingSourceError + +if t.TYPE_CHECKING: + from sqlmesh.core.context import ExecutionContext + from sqlmesh.core.snapshot.definition import Snapshot + from sqlmesh.utils.date import DatetimeRanges + from sqlmesh.core.snapshot.definition import DeployabilityIndex class signal(registry_decorator): @@ -33,3 +40,59 @@ class signal(registry_decorator): SignalRegistry = UniqueKeyDict[str, signal] + + +@signal() +def freshness( + batch: DatetimeRanges, + snapshot: Snapshot, + context: ExecutionContext, +) -> bool: + """ + Implements model freshness as a signal, i.e it considers this model to be fresh if: + - Any upstream SQLMesh model has available intervals to compute i.e is fresh + - Any upstream external model has been altered since the last time the model was evaluated + """ + adapter = context.engine_adapter + if context.is_restatement or not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS: + return True + + deployability_index = context.deployability_index or DeployabilityIndex.all_deployable() + + last_altered_ts = ( + snapshot.last_altered_ts + if deployability_index.is_deployable(snapshot) + else snapshot.dev_last_altered_ts + ) + + if not last_altered_ts: + return True + + parent_snapshots = {context.snapshots[p.name] for p in snapshot.parents} + + upstream_parent_snapshots = {p for p in parent_snapshots if not p.is_external} + external_parents = snapshot.node.depends_on - {p.name for p in upstream_parent_snapshots} + + if context.parent_intervals: + # At least one upstream sqlmesh model has intervals to compute (i.e is fresh), + # so the current model is considered fresh too + return True + + if external_parents: + external_last_altered_timestamps = adapter.get_table_last_modified_ts( + list(external_parents) + ) + + if len(external_last_altered_timestamps) != len(external_parents): + raise MissingSourceError( + f"Expected {len(external_parents)} sources to be present, but got {len(external_last_altered_timestamps)}." + ) + + # Finding new data means that the upstream depedencies have been altered + # since the last time the model was evaluated + return any( + external_last_altered_ts > last_altered_ts + for external_last_altered_ts in external_last_altered_timestamps + ) + + return False diff --git a/sqlmesh/core/snapshot/definition.py b/sqlmesh/core/snapshot/definition.py index c17e94be10..0c9635a7c2 100644 --- a/sqlmesh/core/snapshot/definition.py +++ b/sqlmesh/core/snapshot/definition.py @@ -185,6 +185,8 @@ class SnapshotIntervals(PydanticModel): intervals: Intervals = [] dev_intervals: Intervals = [] pending_restatement_intervals: Intervals = [] + last_altered_ts: t.Optional[int] = None + dev_last_altered_ts: t.Optional[int] = None @property def snapshot_id(self) -> t.Optional[SnapshotId]: @@ -205,6 +207,12 @@ def add_dev_interval(self, start: int, end: int) -> None: def add_pending_restatement_interval(self, start: int, end: int) -> None: self._add_interval(start, end, "pending_restatement_intervals") + def update_last_altered_ts(self, last_altered_ts: t.Optional[int]) -> None: + self._update_last_altered_ts(last_altered_ts, "last_altered_ts") + + def update_dev_last_altered_ts(self, last_altered_ts: t.Optional[int]) -> None: + self._update_last_altered_ts(last_altered_ts, "dev_last_altered_ts") + def remove_interval(self, start: int, end: int) -> None: self._remove_interval(start, end, "intervals") @@ -224,6 +232,13 @@ def _add_interval(self, start: int, end: int, interval_attr: str) -> None: target_intervals = merge_intervals([*target_intervals, (start, end)]) setattr(self, interval_attr, target_intervals) + def _update_last_altered_ts( + self, last_altered_ts: t.Optional[int], last_altered_attr: str + ) -> None: + if last_altered_ts: + existing_last_altered_ts = getattr(self, last_altered_attr) + setattr(self, last_altered_attr, max(existing_last_altered_ts or 0, last_altered_ts)) + def _remove_interval(self, start: int, end: int, interval_attr: str) -> None: target_intervals = getattr(self, interval_attr) target_intervals = remove_interval(target_intervals, start, end) @@ -638,6 +653,16 @@ def dev_version(self) -> str: def model_kind_name(self) -> t.Optional[ModelKindName]: return self.kind_name_ + def display_name( + self, + environment_naming_info: EnvironmentNamingInfo, + default_catalog: t.Optional[str], + dialect: DialectType = None, + ) -> str: + return model_display_name( + self.name, environment_naming_info, default_catalog, dialect=dialect + ) + class Snapshot(PydanticModel, SnapshotInfoMixin): """A snapshot represents a node at a certain point in time. @@ -703,6 +728,10 @@ class Snapshot(PydanticModel, SnapshotInfoMixin): dev_table_suffix: str = "dev" table_naming_convention: TableNamingConvention = TableNamingConvention.default forward_only: bool = False + # Physical table last modified timestamp, not to be confused with the "updated_ts" field + # which is for the snapshot record itself + last_altered_ts: t.Optional[int] = None + dev_last_altered_ts: t.Optional[int] = None @field_validator("ttl") @classmethod @@ -741,6 +770,7 @@ def hydrate_with_intervals_by_version( ) for interval in snapshot_intervals: snapshot.merge_intervals(interval) + result.append(snapshot) return result @@ -947,12 +977,20 @@ def merge_intervals(self, other: t.Union[Snapshot, SnapshotIntervals]) -> None: if not apply_effective_from or end <= effective_from_ts: self.add_interval(start, end) + if other.last_altered_ts: + self.last_altered_ts = max(self.last_altered_ts or 0, other.last_altered_ts) + if self.dev_version == other.dev_version: # Merge dev intervals if the dev versions match which would mean # that this and the other snapshot are pointing to the same dev table. for start, end in other.dev_intervals: self.add_interval(start, end, is_dev=True) + if other.dev_last_altered_ts: + self.dev_last_altered_ts = max( + self.dev_last_altered_ts or 0, other.dev_last_altered_ts + ) + self.pending_restatement_intervals = merge_intervals( [*self.pending_restatement_intervals, *other.pending_restatement_intervals] ) @@ -1071,6 +1109,7 @@ def check_ready_intervals( python_env=signals.python_env, dialect=self.model.dialect, path=self.model._path, + snapshot=self, kwargs=kwargs, ) except SQLMeshError as e: @@ -1467,19 +1506,19 @@ def expiration_ts(self) -> int: check_categorical_relative_expression=False, ) + @property + def supports_schema_migration_in_prod(self) -> bool: + """Returns whether or not this snapshot supports schema migration when deployed to production.""" + return self.is_paused and self.is_model and not self.is_symbolic and not self.is_seed + @property def requires_schema_migration_in_prod(self) -> bool: """Returns whether or not this snapshot requires a schema migration when deployed to production.""" - return ( - self.is_paused - and self.is_model - and self.is_materialized - and ( - (self.previous_version and self.previous_version.version == self.version) - or self.model.forward_only - or bool(self.model.physical_version) - or not self.virtual_environment_mode.is_full - ) + return self.supports_schema_migration_in_prod and ( + (self.previous_version and self.previous_version.version == self.version) + or self.model.forward_only + or bool(self.model.physical_version) + or not self.virtual_environment_mode.is_full ) @property @@ -1788,7 +1827,19 @@ def display_name( """ if snapshot_info_like.is_audit: return snapshot_info_like.name - view_name = exp.to_table(snapshot_info_like.name) + + return model_display_name( + snapshot_info_like.name, environment_naming_info, default_catalog, dialect + ) + + +def model_display_name( + node_name: str, + environment_naming_info: EnvironmentNamingInfo, + default_catalog: t.Optional[str], + dialect: DialectType = None, +) -> str: + view_name = exp.to_table(node_name) catalog = ( None @@ -2030,16 +2081,20 @@ def missing_intervals( continue snapshot_end_date = existing_interval_end + snapshot_start_date = max( + to_datetime(snapshot_start_date), + to_datetime(start_date(snapshot, snapshots, cache, relative_to=snapshot_end_date)), + ) + if snapshot_start_date > to_datetime(snapshot_end_date): + continue + missing_interval_end_date = snapshot_end_date node_end_date = snapshot.node.end if node_end_date and (to_datetime(node_end_date) < to_datetime(snapshot_end_date)): missing_interval_end_date = node_end_date intervals = snapshot.missing_intervals( - max( - to_datetime(snapshot_start_date), - to_datetime(start_date(snapshot, snapshots, cache, relative_to=snapshot_end_date)), - ), + snapshot_start_date, missing_interval_end_date, execution_time=execution_time, deployability_index=deployability_index, @@ -2244,14 +2299,16 @@ def start_date( if not isinstance(snapshots, dict): snapshots = {snapshot.snapshot_id: snapshot for snapshot in snapshots} - earliest = snapshot.node.cron_prev(snapshot.node.cron_floor(relative_to or now())) - - for parent in snapshot.parents: - if parent in snapshots: - earliest = min( - earliest, - start_date(snapshots[parent], snapshots, cache=cache, relative_to=relative_to), - ) + parent_starts = [ + start_date(snapshots[parent], snapshots, cache=cache, relative_to=relative_to) + for parent in snapshot.parents + if parent in snapshots + ] + earliest = ( + min(parent_starts) + if parent_starts + else snapshot.node.cron_prev(snapshot.node.cron_floor(relative_to or now())) + ) cache[key] = earliest return earliest @@ -2399,6 +2456,7 @@ def check_ready_intervals( python_env: t.Dict[str, Executable], dialect: DialectType = None, path: t.Optional[Path] = None, + snapshot: t.Optional[Snapshot] = None, kwargs: t.Optional[t.Dict] = None, ) -> Intervals: checked_intervals: Intervals = [] @@ -2414,6 +2472,7 @@ def check_ready_intervals( provided_args=(batch,), provided_kwargs=(kwargs or {}), context=context, + snapshot=snapshot, ) except Exception as ex: raise SignalEvalError(format_evaluated_code_exception(ex, python_env)) diff --git a/sqlmesh/core/snapshot/evaluator.py b/sqlmesh/core/snapshot/evaluator.py index 961062fe45..b1ffd4dc26 100644 --- a/sqlmesh/core/snapshot/evaluator.py +++ b/sqlmesh/core/snapshot/evaluator.py @@ -32,12 +32,14 @@ from sqlglot import exp, select from sqlglot.executor import execute +from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_not_exception_type from sqlmesh.core import constants as c from sqlmesh.core import dialect as d from sqlmesh.core.audit import Audit, StandaloneAudit from sqlmesh.core.dialect import schema_ from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, DataObjectType, DataObject +from sqlmesh.core.model.meta import GrantsTargetLayer from sqlmesh.core.macros import RuntimeStage from sqlmesh.core.model import ( AuditResult, @@ -49,7 +51,7 @@ ViewKind, CustomKind, ) -from sqlmesh.core.model.kind import _Incremental +from sqlmesh.core.model.kind import _Incremental, DbtCustomKind from sqlmesh.utils import CompletionStatus, columns_to_types_all_known from sqlmesh.core.schema_diff import ( has_drop_alteration, @@ -66,7 +68,7 @@ SnapshotTableCleanupTask, ) from sqlmesh.core.snapshot.execution_tracker import QueryExecutionTracker -from sqlmesh.utils import random_id, CorrelationId +from sqlmesh.utils import random_id, CorrelationId, AttributeDict from sqlmesh.utils.concurrency import ( concurrent_apply_to_snapshots, concurrent_apply_to_values, @@ -76,11 +78,13 @@ from sqlmesh.utils.errors import ( ConfigError, DestructiveChangeError, + MigrationNotSupportedError, SQLMeshError, format_destructive_change_msg, format_additive_change_msg, AdditiveChangeError, ) +from sqlmesh.utils.jinja import MacroReturnVal if sys.version_info >= (3, 12): from importlib import metadata @@ -245,7 +249,7 @@ def evaluate_and_fetch( query_or_df = next(queries_or_dfs) if isinstance(query_or_df, pd.DataFrame): return query_or_df.head(limit) - if not isinstance(query_or_df, exp.Expression): + if not isinstance(query_or_df, exp.Expr): # We assume that if this branch is reached, `query_or_df` is a pyspark / snowpark / bigframe dataframe, # so we use `limit` instead of `head` to get back a dataframe instead of List[Row] # https://spark.apache.org/docs/3.1.1/api/python/reference/api/pyspark.sql.DataFrame.head.html#pyspark.sql.DataFrame.head @@ -304,6 +308,9 @@ def promote( ] self._create_schemas(gateway_table_pairs=gateway_table_pairs) + # Fetch the view data objects for the promoted snapshots to get them cached + self._get_virtual_data_objects(target_snapshots, environment_naming_info) + deployability_index = deployability_index or DeployabilityIndex.all_deployable() with self.concurrent_context(): concurrent_apply_to_snapshots( @@ -422,7 +429,9 @@ def get_snapshots_to_create( target_snapshots: Target snapshots. deployability_index: Determines snapshots that are deployable / representative in the context of this creation. """ - existing_data_objects = self._get_data_objects(target_snapshots, deployability_index) + existing_data_objects = self._get_physical_data_objects( + target_snapshots, deployability_index + ) snapshots_to_create = [] for snapshot in target_snapshots: if not snapshot.is_model or snapshot.is_symbolic: @@ -479,7 +488,7 @@ def migrate( deployability_index: Determines snapshots that are deployable in the context of this evaluation. """ deployability_index = deployability_index or DeployabilityIndex.all_deployable() - target_data_objects = self._get_data_objects(target_snapshots, deployability_index) + target_data_objects = self._get_physical_data_objects(target_snapshots, deployability_index) if not target_data_objects: return @@ -489,15 +498,14 @@ def migrate( allow_destructive_snapshots = allow_destructive_snapshots or set() allow_additive_snapshots = allow_additive_snapshots or set() snapshots_by_name = {s.name: s for s in snapshots.values()} - snapshots_with_data_objects = [snapshots[s_id] for s_id in target_data_objects] with self.concurrent_context(): # Only migrate snapshots for which there's an existing data object concurrent_apply_to_snapshots( - snapshots_with_data_objects, + target_snapshots, lambda s: self._migrate_snapshot( s, snapshots_by_name, - target_data_objects[s.snapshot_id], + target_data_objects.get(s.snapshot_id), allow_destructive_snapshots, allow_additive_snapshots, self.get_adapter(s.model_gateway), @@ -517,10 +525,12 @@ def cleanup( target_snapshots: Snapshots to cleanup. on_complete: A callback to call on each successfully deleted database object. """ + target_snapshots = [ + t for t in target_snapshots if t.snapshot.is_model and not t.snapshot.is_symbolic + ] snapshots_to_dev_table_only = { t.snapshot.snapshot_id: t.dev_table_only for t in target_snapshots } - with self.concurrent_context(): concurrent_apply_to_snapshots( [t.snapshot for t in target_snapshots], @@ -704,7 +714,7 @@ def _evaluate_snapshot( deployability_index = deployability_index or DeployabilityIndex.all_deployable() is_snapshot_deployable = deployability_index.is_deployable(snapshot) target_table_name = snapshot.table_name(is_deployable=is_snapshot_deployable) - # https://github.com/TobikoData/sqlmesh/issues/2609 + # https://github.com/SQLMesh/sqlmesh/issues/2609 # If there are no existing intervals yet; only consider this a first insert for the first snapshot in the batch if target_table_exists is None: target_table_exists = adapter.table_exists(target_table_name) @@ -740,38 +750,51 @@ def _evaluate_snapshot( **render_statements_kwargs ) + evaluation_strategy = _evaluation_strategy(snapshot, adapter) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": False}, + ) + with ( adapter.transaction(), adapter.session(snapshot.model.render_session_properties(**render_statements_kwargs)), ): - adapter.execute(model.render_pre_statements(**render_statements_kwargs)) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": True}, + ) if not target_table_exists or (model.is_seed and not snapshot.intervals): - columns_to_types_provided = ( + # Only create the empty table if the columns were provided explicitly by the user + should_create_empty_table = ( model.kind.is_materialized and model.columns_to_types_ and columns_to_types_all_known(model.columns_to_types_) ) + if not should_create_empty_table: + # Or if the model is self-referential and its query is fully annotated with types + should_create_empty_table = model.depends_on_self and model.annotated if self._can_clone(snapshot, deployability_index): self._clone_snapshot_in_dev( snapshot=snapshot, snapshots=snapshots, deployability_index=deployability_index, render_kwargs=create_render_kwargs, - rendered_physical_properties=rendered_physical_properties, + rendered_physical_properties=rendered_physical_properties.copy(), allow_destructive_snapshots=allow_destructive_snapshots, allow_additive_snapshots=allow_additive_snapshots, ) runtime_stage = RuntimeStage.EVALUATING target_table_exists = True - elif columns_to_types_provided or model.is_seed or model.kind.is_scd_type_2: + elif should_create_empty_table or model.is_seed or model.kind.is_scd_type_2: self._execute_create( snapshot=snapshot, table_name=target_table_name, is_table_deployable=is_snapshot_deployable, deployability_index=deployability_index, create_render_kwargs=create_render_kwargs, - rendered_physical_properties=rendered_physical_properties, + rendered_physical_properties=rendered_physical_properties.copy(), dry_run=False, run_pre_post_statements=False, ) @@ -788,6 +811,7 @@ def _evaluate_snapshot( if ( snapshot.is_materialized and target_table_exists + and adapter.wap_enabled and (model.wap_supported or adapter.wap_supported(target_table_name)) ): wap_id = random_id()[0:8] @@ -809,9 +833,17 @@ def _evaluate_snapshot( batch_index=batch_index, ) - adapter.execute(model.render_post_statements(**render_statements_kwargs)) + evaluation_strategy.run_post_statements( + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": True}, + ) - return wap_id + evaluation_strategy.run_post_statements( + snapshot=snapshot, + render_kwargs={**render_statements_kwargs, "inside_transaction": False}, + ) + + return wap_id def create_snapshot( self, @@ -845,6 +877,11 @@ def create_snapshot( deployability_index=deployability_index, ) + evaluation_strategy = _evaluation_strategy(snapshot, adapter) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} + ) + with ( adapter.transaction(), adapter.session(snapshot.model.render_session_properties(**create_render_kwargs)), @@ -862,6 +899,7 @@ def create_snapshot( rendered_physical_properties=rendered_physical_properties, allow_destructive_snapshots=allow_destructive_snapshots, allow_additive_snapshots=allow_additive_snapshots, + run_pre_post_statements=True, ) else: is_table_deployable = deployability_index.is_deployable(snapshot) @@ -875,6 +913,10 @@ def create_snapshot( dry_run=True, ) + evaluation_strategy.run_post_statements( + snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} + ) + if on_complete is not None: on_complete(snapshot) @@ -898,7 +940,7 @@ def _render_and_insert_snapshot( snapshots: t.Dict[str, Snapshot], render_kwargs: t.Dict[str, t.Any], create_render_kwargs: t.Dict[str, t.Any], - rendered_physical_properties: t.Dict[str, exp.Expression], + rendered_physical_properties: t.Dict[str, exp.Expr], deployability_index: DeployabilityIndex, target_table_name: str, is_first_insert: bool, @@ -912,6 +954,7 @@ def _render_and_insert_snapshot( model = snapshot.model adapter = self.get_adapter(model.gateway) evaluation_strategy = _evaluation_strategy(snapshot, adapter) + is_snapshot_deployable = deployability_index.is_deployable(snapshot) queries_or_dfs = self._render_snapshot_for_evaluation( snapshot, @@ -935,6 +978,7 @@ def apply(query_or_df: QueryOrDF, index: int = 0) -> None: execution_time=execution_time, physical_properties=rendered_physical_properties, render_kwargs=create_render_kwargs, + is_snapshot_deployable=is_snapshot_deployable, ) else: logger.info( @@ -957,6 +1001,7 @@ def apply(query_or_df: QueryOrDF, index: int = 0) -> None: execution_time=execution_time, physical_properties=rendered_physical_properties, render_kwargs=create_render_kwargs, + is_snapshot_deployable=is_snapshot_deployable, ) # DataFrames, unlike SQL expressions, can provide partial results by yielding dataframes. As a result, @@ -976,6 +1021,11 @@ def apply(query_or_df: QueryOrDF, index: int = 0) -> None: ): import pandas as pd + try: + first_query_or_df = next(queries_or_dfs) + except StopIteration: + return + query_or_df = reduce( lambda a, b: ( pd.concat([a, b], ignore_index=True) # type: ignore @@ -983,6 +1033,7 @@ def apply(query_or_df: QueryOrDF, index: int = 0) -> None: else a.union_all(b) # type: ignore ), # type: ignore queries_or_dfs, + first_query_or_df, ) apply(query_or_df, index=0) else: @@ -1018,9 +1069,10 @@ def _clone_snapshot_in_dev( snapshots: t.Dict[str, Snapshot], deployability_index: DeployabilityIndex, render_kwargs: t.Dict[str, t.Any], - rendered_physical_properties: t.Dict[str, exp.Expression], + rendered_physical_properties: t.Dict[str, exp.Expr], allow_destructive_snapshots: t.Set[str], allow_additive_snapshots: t.Set[str], + run_pre_post_statements: bool = False, ) -> None: adapter = self.get_adapter(snapshot.model.gateway) @@ -1032,7 +1084,6 @@ def _clone_snapshot_in_dev( adapter.clone_table( target_table_name, snapshot.table_name(), - replace=True, rendered_physical_properties=rendered_physical_properties, ) self._migrate_target_table( @@ -1044,7 +1095,9 @@ def _clone_snapshot_in_dev( rendered_physical_properties=rendered_physical_properties, allow_destructive_snapshots=allow_destructive_snapshots, allow_additive_snapshots=allow_additive_snapshots, + run_pre_post_statements=run_pre_post_statements, ) + except Exception: adapter.drop_table(target_table_name) raise @@ -1059,7 +1112,7 @@ def _migrate_snapshot( adapter: EngineAdapter, deployability_index: DeployabilityIndex, ) -> None: - if not snapshot.requires_schema_migration_in_prod: + if not snapshot.is_model or snapshot.is_symbolic: return deployability_index = DeployabilityIndex.all_deployable() @@ -1071,6 +1124,11 @@ def _migrate_snapshot( ) target_table_name = snapshot.table_name() + evaluation_strategy = _evaluation_strategy(snapshot, adapter) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} + ) + with ( adapter.transaction(), adapter.session(snapshot.model.render_session_properties(**render_kwargs)), @@ -1081,6 +1139,10 @@ def _migrate_snapshot( ): table_exists = False + rendered_physical_properties = snapshot.model.render_physical_properties( + **render_kwargs + ) + if table_exists: self._migrate_target_table( target_table_name=target_table_name, @@ -1088,14 +1150,35 @@ def _migrate_snapshot( snapshots=snapshots, deployability_index=deployability_index, render_kwargs=render_kwargs, - rendered_physical_properties=snapshot.model.render_physical_properties( - **render_kwargs - ), + rendered_physical_properties=rendered_physical_properties, allow_destructive_snapshots=allow_destructive_snapshots, allow_additive_snapshots=allow_additive_snapshots, run_pre_post_statements=True, ) + else: + self._execute_create( + snapshot=snapshot, + table_name=snapshot.table_name(is_deployable=True), + is_table_deployable=True, + deployability_index=deployability_index, + create_render_kwargs=render_kwargs, + rendered_physical_properties=rendered_physical_properties, + dry_run=True, + ) + evaluation_strategy.run_post_statements( + snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} + ) + + # Retry in case when the table is migrated concurrently from another plan application + @retry( + reraise=True, + stop=stop_after_attempt(5), + wait=wait_exponential(min=1, max=16), + retry=retry_if_not_exception_type( + (DestructiveChangeError, AdditiveChangeError, MigrationNotSupportedError) + ), + ) def _migrate_target_table( self, target_table_name: str, @@ -1103,14 +1186,17 @@ def _migrate_target_table( snapshots: t.Dict[str, Snapshot], deployability_index: DeployabilityIndex, render_kwargs: t.Dict[str, t.Any], - rendered_physical_properties: t.Dict[str, exp.Expression], + rendered_physical_properties: t.Dict[str, exp.Expr], allow_destructive_snapshots: t.Set[str], allow_additive_snapshots: t.Set[str], run_pre_post_statements: bool = False, ) -> None: adapter = self.get_adapter(snapshot.model.gateway) - tmp_table_name = f"{target_table_name}_schema_tmp" + tmp_table = exp.to_table(target_table_name) + tmp_table.this.set("this", f"{tmp_table.name}_schema_tmp") + tmp_table_name = tmp_table.sql() + if snapshot.is_materialized: self._execute_create( snapshot=snapshot, @@ -1121,6 +1207,7 @@ def _migrate_target_table( rendered_physical_properties=rendered_physical_properties, dry_run=False, run_pre_post_statements=run_pre_post_statements, + skip_grants=True, # skip grants for tmp table ) try: evaluation_strategy = _evaluation_strategy(snapshot, adapter) @@ -1138,6 +1225,7 @@ def _migrate_target_table( allow_additive_snapshots=allow_additive_snapshots, ignore_destructive=snapshot.model.on_destructive_change.is_ignore, ignore_additive=snapshot.model.on_additive_change.is_ignore, + deployability_index=deployability_index, ) finally: if snapshot.is_materialized: @@ -1187,6 +1275,7 @@ def _promote_snapshot( model=snapshot.model, environment=environment_naming_info.name, snapshots=snapshots, + snapshot=snapshot, **render_kwargs, ) @@ -1383,9 +1472,10 @@ def _execute_create( is_table_deployable: bool, deployability_index: DeployabilityIndex, create_render_kwargs: t.Dict[str, t.Any], - rendered_physical_properties: t.Dict[str, exp.Expression], + rendered_physical_properties: t.Dict[str, exp.Expr], dry_run: bool, run_pre_post_statements: bool = True, + skip_grants: bool = False, ) -> None: adapter = self.get_adapter(snapshot.model.gateway) evaluation_strategy = _evaluation_strategy(snapshot, adapter) @@ -1399,19 +1489,28 @@ def _execute_create( "table_mapping": {snapshot.name: table_name}, } if run_pre_post_statements: - adapter.execute(snapshot.model.render_pre_statements(**create_render_kwargs)) + evaluation_strategy.run_pre_statements( + snapshot=snapshot, + render_kwargs={**create_render_kwargs, "inside_transaction": True}, + ) evaluation_strategy.create( table_name=table_name, model=snapshot.model, is_table_deployable=is_table_deployable, + skip_grants=skip_grants, render_kwargs=create_render_kwargs, is_snapshot_deployable=is_snapshot_deployable, is_snapshot_representative=is_snapshot_representative, dry_run=dry_run, physical_properties=rendered_physical_properties, + snapshot=snapshot, + deployability_index=deployability_index, ) if run_pre_post_statements: - adapter.execute(snapshot.model.render_post_statements(**create_render_kwargs)) + evaluation_strategy.run_post_statements( + snapshot=snapshot, + render_kwargs={**create_render_kwargs, "inside_transaction": True}, + ) def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex) -> bool: adapter = self.get_adapter(snapshot.model.gateway) @@ -1420,13 +1519,15 @@ def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex and snapshot.is_materialized and bool(snapshot.previous_versions) and adapter.SUPPORTS_CLONING - # managed models cannot have their schema mutated because theyre based on queries, so clone + alter wont work + # managed models cannot have their schema mutated because they're based on queries, so clone + alter won't work and not snapshot.is_managed - # If the deployable table is missing we can't clone it + and not snapshot.is_dbt_custom and not deployability_index.is_deployable(snapshot) + # If the deployable table is missing we can't clone it + and adapter.table_exists(snapshot.table_name()) ) - def _get_data_objects( + def _get_physical_data_objects( self, target_snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex, @@ -1442,18 +1543,70 @@ def _get_data_objects( A dictionary of snapshot IDs to existing data objects of their physical tables. If the data object for a snapshot is not found, it will not be included in the dictionary. """ + return self._get_data_objects( + target_snapshots, + lambda s: exp.to_table( + s.table_name(deployability_index.is_deployable(s)), dialect=s.model.dialect + ), + ) + + def _get_virtual_data_objects( + self, + target_snapshots: t.Iterable[Snapshot], + environment_naming_info: EnvironmentNamingInfo, + ) -> t.Dict[SnapshotId, DataObject]: + """Returns a dictionary of snapshot IDs to existing data objects of their virtual views. + + Args: + target_snapshots: Target snapshots. + environment_naming_info: The environment naming info of the target virtual environment. + + Returns: + A dictionary of snapshot IDs to existing data objects of their virtual views. If the data object + for a snapshot is not found, it will not be included in the dictionary. + """ + + def _get_view_name(s: Snapshot) -> exp.Table: + adapter = ( + self.get_adapter(s.model_gateway) + if environment_naming_info.gateway_managed + else self.adapter + ) + return exp.to_table( + s.qualified_view_name.for_environment( + environment_naming_info, dialect=adapter.dialect + ), + dialect=adapter.dialect, + ) + + return self._get_data_objects(target_snapshots, _get_view_name) + + def _get_data_objects( + self, + target_snapshots: t.Iterable[Snapshot], + table_name_callable: t.Callable[[Snapshot], exp.Table], + ) -> t.Dict[SnapshotId, DataObject]: + """Returns a dictionary of snapshot IDs to existing data objects. + + Args: + target_snapshots: Target snapshots. + table_name_callable: A function that takes a snapshot and returns the table to look for. + + Returns: + A dictionary of snapshot IDs to existing data objects. If the data object for a snapshot is not found, + it will not be included in the dictionary. + """ tables_by_gateway_and_schema: t.Dict[t.Union[str, None], t.Dict[exp.Table, set[str]]] = ( defaultdict(lambda: defaultdict(set)) ) - snapshots_by_table_name: t.Dict[str, Snapshot] = {} + snapshots_by_table_name: t.Dict[exp.Table, t.Dict[str, Snapshot]] = defaultdict(dict) for snapshot in target_snapshots: if not snapshot.is_model or snapshot.is_symbolic: continue - is_deployable = deployability_index.is_deployable(snapshot) - table = exp.to_table(snapshot.table_name(is_deployable), dialect=snapshot.model.dialect) + table = table_name_callable(snapshot) table_schema = d.schema_(table.db, catalog=table.catalog) tables_by_gateway_and_schema[snapshot.model_gateway][table_schema].add(table.name) - snapshots_by_table_name[table.name] = snapshot + snapshots_by_table_name[table_schema][table.name] = snapshot def _get_data_objects_in_schema( schema: exp.Table, @@ -1461,26 +1614,30 @@ def _get_data_objects_in_schema( gateway: t.Optional[str] = None, ) -> t.List[DataObject]: logger.info("Listing data objects in schema %s", schema.sql()) - return self.get_adapter(gateway).get_data_objects(schema, object_names) + return self.get_adapter(gateway).get_data_objects( + schema, object_names, safe_to_cache=True + ) with self.concurrent_context(): - existing_objects: t.List[DataObject] = [] + snapshot_id_to_obj: t.Dict[SnapshotId, DataObject] = {} # A schema can be shared across multiple engines, so we need to group tables by both gateway and schema for gateway, tables_by_schema in tables_by_gateway_and_schema.items(): - objs_for_gateway = [ - obj - for objs in concurrent_apply_to_values( - list(tables_by_schema), - lambda s: _get_data_objects_in_schema( - schema=s, object_names=tables_by_schema.get(s), gateway=gateway - ), - self.ddl_concurrent_tasks, - ) - for obj in objs - ] - existing_objects.extend(objs_for_gateway) + schema_list = list(tables_by_schema.keys()) + results = concurrent_apply_to_values( + schema_list, + lambda s: _get_data_objects_in_schema( + schema=s, object_names=tables_by_schema.get(s), gateway=gateway + ), + self.ddl_concurrent_tasks, + ) + + for schema, objs in zip(schema_list, results): + snapshots_by_name = snapshots_by_table_name.get(schema, {}) + for obj in objs: + if obj.name in snapshots_by_name: + snapshot_id_to_obj[snapshots_by_name[obj.name].snapshot_id] = obj - return {snapshots_by_table_name[obj.name].snapshot_id: obj for obj in existing_objects} + return snapshot_id_to_obj def _evaluation_strategy(snapshot: SnapshotInfoLike, adapter: EngineAdapter) -> EvaluationStrategy: @@ -1505,6 +1662,19 @@ def _evaluation_strategy(snapshot: SnapshotInfoLike, adapter: EngineAdapter) -> klass = ViewStrategy elif snapshot.is_scd_type_2: klass = SCDType2Strategy + elif snapshot.is_dbt_custom: + if hasattr(snapshot, "model") and isinstance( + (model_kind := snapshot.model.kind), DbtCustomKind + ): + return DbtCustomMaterializationStrategy( + adapter=adapter, + materialization_name=model_kind.materialization, + materialization_template=model_kind.definition, + ) + + raise SQLMeshError( + f"Expected DbtCustomKind for dbt custom materialization in model '{snapshot.name}'" + ) elif snapshot.is_custom: if snapshot.custom_materialization is None: raise SQLMeshError( @@ -1572,6 +1742,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: """Creates the target table or view. @@ -1644,6 +1815,84 @@ def demote(self, view_name: str, **kwargs: t.Any) -> None: view_name: The name of the target view in the virtual layer. """ + @abc.abstractmethod + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + """Executes the snapshot's pre statements. + + Args: + snapshot: The target snapshot. + render_kwargs: Additional key-value arguments to pass when rendering the statements. + """ + + @abc.abstractmethod + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + """Executes the snapshot's post statements. + + Args: + snapshot: The target snapshot. + render_kwargs: Additional key-value arguments to pass when rendering the statements. + """ + + def _apply_grants( + self, + model: Model, + table_name: str, + target_layer: GrantsTargetLayer, + is_snapshot_deployable: bool = False, + ) -> None: + """Apply grants for a model if grants are configured. + + This method provides consistent grants application across all evaluation strategies. + It ensures that whenever a physical database object (table, view, materialized view) + is created or modified, the appropriate grants are applied. + + Args: + model: The SQLMesh model containing grants configuration + table_name: The target table/view name to apply grants to + target_layer: The grants application layer (physical or virtual) + is_snapshot_deployable: Whether the snapshot is deployable (targeting production) + """ + grants_config = model.grants + if grants_config is None: + return + + if not self.adapter.SUPPORTS_GRANTS: + logger.warning( + f"Engine {self.adapter.__class__.__name__} does not support grants. " + f"Skipping grants application for model {model.name}" + ) + return + + model_grants_target_layer = model.grants_target_layer + deployable_vde_dev_only = ( + is_snapshot_deployable and model.virtual_environment_mode.is_dev_only + ) + + # table_type is always a VIEW in the virtual layer unless model is deployable and VDE is dev_only + # in which case we fall back to the model's model_grants_table_type + if target_layer == GrantsTargetLayer.VIRTUAL and not deployable_vde_dev_only: + model_grants_table_type = DataObjectType.VIEW + else: + model_grants_table_type = model.grants_table_type + + if ( + model_grants_target_layer.is_all + or model_grants_target_layer == target_layer + # Always apply grants in production when VDE is dev_only regardless of target_layer + # since only physical tables are created in production + or deployable_vde_dev_only + ): + logger.info(f"Applying grants for model {model.name} to table {table_name}") + self.adapter.sync_grants_config( + exp.to_table(table_name, dialect=self.adapter.dialect), + grants_config, + model_grants_table_type, + ) + else: + logger.debug( + f"Skipping grants application for model {model.name} in {target_layer} layer" + ) + class SymbolicStrategy(EvaluationStrategy): def insert( @@ -1673,6 +1922,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: pass @@ -1705,6 +1955,12 @@ def promote( def demote(self, view_name: str, **kwargs: t.Any) -> None: pass + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: + pass + + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: + pass + class EmbeddedStrategy(SymbolicStrategy): def promote( @@ -1748,10 +2004,27 @@ def promote( view_properties=model.render_virtual_properties(**render_kwargs), ) + snapshot = kwargs.get("snapshot") + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) + if snapshot and deployability_index + else False + ) + + # Apply grants to the virtual layer (view) after promotion + self._apply_grants(model, view_name, GrantsTargetLayer.VIRTUAL, is_snapshot_deployable) + def demote(self, view_name: str, **kwargs: t.Any) -> None: logger.info("Dropping view '%s'", view_name) self.adapter.drop_view(view_name, cascade=False) + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + self.adapter.execute(snapshot.model.render_pre_statements(**render_kwargs)) + + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + self.adapter.execute(snapshot.model.render_post_statements(**render_kwargs)) + class MaterializableStrategy(PromotableStrategy, abc.ABC): def create( @@ -1760,6 +2033,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: ctas_query = model.ctas_query(**render_kwargs) @@ -1804,6 +2078,13 @@ def create( column_descriptions=model.column_descriptions if is_table_deployable else None, ) + # Apply grants after table creation (unless explicitly skipped by caller) + if not skip_grants: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def migrate( self, target_table_name: str, @@ -1829,6 +2110,15 @@ def migrate( ) self.adapter.alter_table(alter_operations) + # Apply grants after schema migration + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) if deployability_index else False + ) + self._apply_grants( + snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def delete(self, name: str, **kwargs: t.Any) -> None: _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) self.adapter.drop_table(name, cascade=kwargs.pop("cascade", False)) @@ -1840,6 +2130,7 @@ def _replace_query_for_model( name: str, query_or_df: QueryOrDF, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool = False, **kwargs: t.Any, ) -> None: """Replaces the table for the given model. @@ -1876,6 +2167,11 @@ def _replace_query_for_model( source_columns=source_columns, ) + # Apply grants after table replacement (unless explicitly skipped by caller) + if not skip_grants: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants(model, name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) + def _get_target_and_source_columns( self, model: Model, @@ -1897,7 +2193,13 @@ def _get_target_and_source_columns( if model.on_destructive_change.is_ignore or model.on_additive_change.is_ignore: # We need to identify the columns that are only in the source so we create an empty table with # the user query to determine that - with self.adapter.temp_table(model.ctas_query(**render_kwargs)) as temp_table: + temp_table_name = exp.table_( + "diff", + db=model.physical_schema, + ) + with self.adapter.temp_table( + model.ctas_query(**render_kwargs), name=temp_table_name + ) as temp_table: source_columns = list(self.adapter.columns(temp_table)) else: source_columns = None @@ -2123,6 +2425,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: model = t.cast(SeedModel, model) @@ -2136,20 +2439,53 @@ def create( ) return - super().create(table_name, model, is_table_deployable, render_kwargs, **kwargs) + super().create( + table_name, + model, + is_table_deployable, + render_kwargs, + skip_grants=True, # Skip grants; they're applied after data insertion + **kwargs, + ) # For seeds we insert data at the time of table creation. try: for index, df in enumerate(model.render_seed()): if index == 0: - self._replace_query_for_model(model, table_name, df, render_kwargs, **kwargs) + self._replace_query_for_model( + model, + table_name, + df, + render_kwargs, + skip_grants=True, # Skip grants; they're applied after data insertion + **kwargs, + ) else: self.adapter.insert_append( table_name, df, target_columns_to_types=model.columns_to_types ) + + if not skip_grants: + # Apply grants after seed table creation and data insertion + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) except Exception: self.adapter.drop_table(table_name) raise + def migrate( + self, + target_table_name: str, + source_table_name: str, + snapshot: Snapshot, + *, + ignore_destructive: bool, + ignore_additive: bool, + **kwargs: t.Any, + ) -> None: + raise NotImplementedError("Seeds do not support migrations.") + def insert( self, table_name: str, @@ -2181,6 +2517,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: assert isinstance(model.kind, (SCDType2ByTimeKind, SCDType2ByColumnKind)) @@ -2210,9 +2547,17 @@ def create( model, is_table_deployable, render_kwargs, + skip_grants, **kwargs, ) + if not skip_grants: + # Apply grants after SCD Type 2 table creation + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def insert( self, table_name: str, @@ -2280,6 +2625,10 @@ def insert( f"Unexpected SCD Type 2 kind: {model.kind}. This is not expected and please report this as a bug." ) + # Apply grants after SCD Type 2 table recreation + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) + def append( self, table_name: str, @@ -2336,6 +2685,10 @@ def insert( column_descriptions=model.column_descriptions, ) + # Apply grants after view creation / replacement + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) + def append( self, table_name: str, @@ -2352,12 +2705,21 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + if self.adapter.table_exists(table_name): # Make sure we don't recreate the view to prevent deletion of downstream views in engines with no late # binding support (because of DROP CASCADE). logger.info("View '%s' already exists", table_name) + + if not skip_grants: + # Always apply grants when present, even if view exists, to handle grants updates + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) return logger.info("Creating view '%s'", table_name) @@ -2381,6 +2743,12 @@ def create( column_descriptions=model.column_descriptions if is_table_deployable else None, ) + if not skip_grants: + # Apply grants after view creation + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def migrate( self, target_table_name: str, @@ -2407,6 +2775,15 @@ def migrate( column_descriptions=model.column_descriptions, ) + # Apply grants after view migration + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) if deployability_index else False + ) + self._apply_grants( + snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def delete(self, name: str, **kwargs: t.Any) -> None: cascade = kwargs.pop("cascade", False) try: @@ -2546,6 +2923,169 @@ def get_custom_materialization_type_or_raise( raise SQLMeshError(f"Custom materialization '{name}' not present in the Python environment") +class DbtCustomMaterializationStrategy(MaterializableStrategy): + def __init__( + self, + adapter: EngineAdapter, + materialization_name: str, + materialization_template: str, + ): + super().__init__(adapter) + self.materialization_name = materialization_name + self.materialization_template = materialization_template + + def create( + self, + table_name: str, + model: Model, + is_table_deployable: bool, + render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, + **kwargs: t.Any, + ) -> None: + original_query = model.render_query_or_raise(**render_kwargs) + self._execute_materialization( + table_name=table_name, + query_or_df=original_query.limit(0), + model=model, + is_first_insert=True, + render_kwargs=render_kwargs, + create_only=True, + **kwargs, + ) + + # Apply grants after dbt custom materialization table creation + if not skip_grants: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + + def insert( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + self._execute_materialization( + table_name=table_name, + query_or_df=query_or_df, + model=model, + is_first_insert=is_first_insert, + render_kwargs=render_kwargs, + **kwargs, + ) + + # Apply grants after custom materialization insert (only on first insert) + if is_first_insert: + is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + + def append( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + return self.insert( + table_name, + query_or_df, + model, + is_first_insert=False, + render_kwargs=render_kwargs, + **kwargs, + ) + + def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + # in dbt custom materialisations it's up to the user to run the pre hooks inside the transaction + if not render_kwargs.get("inside_transaction", True): + super().run_pre_statements( + snapshot=snapshot, + render_kwargs=render_kwargs, + ) + + def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: + # in dbt custom materialisations it's up to the user to run the post hooks inside the transaction + if not render_kwargs.get("inside_transaction", True): + super().run_post_statements( + snapshot=snapshot, + render_kwargs=render_kwargs, + ) + + def _execute_materialization( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + create_only: bool = False, + **kwargs: t.Any, + ) -> None: + jinja_macros = model.jinja_macros + + # For vdes we need to use the table, since we don't know the schema/table at parse time + parts = exp.to_table(table_name, dialect=self.adapter.dialect) + + existing_globals = jinja_macros.global_objs + relation_info = existing_globals.get("this") + if isinstance(relation_info, dict): + relation_info["database"] = parts.catalog + relation_info["identifier"] = parts.name + relation_info["name"] = parts.name + + jinja_globals = { + **existing_globals, + "this": relation_info, + "database": parts.catalog, + "schema": parts.db, + "identifier": parts.name, + "target": existing_globals.get("target", {"type": self.adapter.dialect}), + "execution_dt": kwargs.get("execution_time"), + "engine_adapter": self.adapter, + "sql": str(query_or_df), + "is_first_insert": is_first_insert, + "create_only": create_only, + "pre_hooks": [ + AttributeDict({"sql": s.this.this, "transaction": transaction}) + for s in model.pre_statements + if (transaction := s.args.get("transaction", True)) + ], + "post_hooks": [ + AttributeDict({"sql": s.this.this, "transaction": transaction}) + for s in model.post_statements + if (transaction := s.args.get("transaction", True)) + ], + "model_instance": model, + **kwargs, + } + + try: + jinja_env = jinja_macros.build_environment(**jinja_globals) + template = jinja_env.from_string(self.materialization_template) + + try: + template.render() + except MacroReturnVal as ret: + # this is a successful return from a macro call (dbt uses this list of Relations to update their relation cache) + returned_relations = ret.value.get("relations", []) + logger.info( + f"Materialization {self.materialization_name} returned relations: {returned_relations}" + ) + + except Exception as e: + raise SQLMeshError( + f"Failed to execute dbt materialization '{self.materialization_name}': {e}" + ) from e + + class EngineManagedStrategy(MaterializableStrategy): def create( self, @@ -2553,6 +3093,7 @@ def create( model: Model, is_table_deployable: bool, render_kwargs: t.Dict[str, t.Any], + skip_grants: bool, **kwargs: t.Any, ) -> None: is_snapshot_deployable: bool = kwargs["is_snapshot_deployable"] @@ -2565,12 +3106,19 @@ def create( query=model.render_query_or_raise(**render_kwargs), target_columns_to_types=model.columns_to_types, partitioned_by=model.partitioned_by, - clustered_by=model.clustered_by, + clustered_by=model.clustered_by, # type: ignore[arg-type] table_properties=kwargs.get("physical_properties", model.physical_properties), table_description=model.description, column_descriptions=model.column_descriptions, table_format=model.table_format, ) + + # Apply grants after managed table creation + if not skip_grants: + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + elif not is_table_deployable: # Only create the dev preview table as a normal table. # For the main table, if the snapshot is cant be deployed to prod (eg upstream is forward-only) do nothing. @@ -2581,6 +3129,7 @@ def create( model=model, is_table_deployable=is_table_deployable, render_kwargs=render_kwargs, + skip_grants=skip_grants, **kwargs, ) @@ -2596,19 +3145,21 @@ def insert( deployability_index: DeployabilityIndex = kwargs["deployability_index"] snapshot: Snapshot = kwargs["snapshot"] is_snapshot_deployable = deployability_index.is_deployable(snapshot) - if is_first_insert and is_snapshot_deployable and not self.adapter.table_exists(table_name): self.adapter.create_managed_table( table_name=table_name, query=query_or_df, # type: ignore target_columns_to_types=model.columns_to_types, partitioned_by=model.partitioned_by, - clustered_by=model.clustered_by, + clustered_by=model.clustered_by, # type: ignore[arg-type] table_properties=kwargs.get("physical_properties", model.physical_properties), table_description=model.description, column_descriptions=model.column_descriptions, table_format=model.table_format, ) + self._apply_grants( + model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) elif not is_snapshot_deployable: # Snapshot isnt deployable; update the preview table instead # If the snapshot was deployable, then data would have already been loaded in create() because a managed table would have been created @@ -2653,10 +3204,19 @@ def migrate( ) if len(potential_alter_operations) > 0: # this can happen if a user changes a managed model and deliberately overrides a plan to be forward only, eg `sqlmesh plan --forward-only` - raise SQLMeshError( + raise MigrationNotSupportedError( f"The schema of the managed model '{target_table_name}' cannot be updated in a forward-only fashion." ) + # Apply grants after verifying no schema changes + deployability_index = kwargs.get("deployability_index") + is_snapshot_deployable = ( + deployability_index.is_deployable(snapshot) if deployability_index else False + ) + self._apply_grants( + snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable + ) + def delete(self, name: str, **kwargs: t.Any) -> None: # a dev preview table is created as a normal table, so it needs to be dropped as a normal table _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) diff --git a/sqlmesh/core/state_sync/__init__.py b/sqlmesh/core/state_sync/__init__.py index 1585d6211f..12ea77ac8f 100644 --- a/sqlmesh/core/state_sync/__init__.py +++ b/sqlmesh/core/state_sync/__init__.py @@ -20,5 +20,4 @@ Versions as Versions, ) from sqlmesh.core.state_sync.cache import CachingStateSync as CachingStateSync -from sqlmesh.core.state_sync.common import cleanup_expired_views as cleanup_expired_views from sqlmesh.core.state_sync.db import EngineAdapterStateSync as EngineAdapterStateSync diff --git a/sqlmesh/core/state_sync/base.py b/sqlmesh/core/state_sync/base.py index 450d6f7408..3c8c72845d 100644 --- a/sqlmesh/core/state_sync/base.py +++ b/sqlmesh/core/state_sync/base.py @@ -11,7 +11,6 @@ from sqlmesh import migrations from sqlmesh.core.environment import ( Environment, - EnvironmentNamingInfo, EnvironmentStatements, EnvironmentSummary, ) @@ -21,8 +20,6 @@ SnapshotIdLike, SnapshotIdAndVersionLike, SnapshotInfoLike, - SnapshotTableCleanupTask, - SnapshotTableInfo, SnapshotNameVersion, SnapshotIdAndVersion, ) @@ -30,8 +27,13 @@ from sqlmesh.utils import major_minor from sqlmesh.utils.date import TimeLike from sqlmesh.utils.errors import SQLMeshError -from sqlmesh.utils.pydantic import PydanticModel, ValidationInfo, field_validator -from sqlmesh.core.state_sync.common import StateStream +from sqlmesh.utils.pydantic import PydanticModel, field_validator +from sqlmesh.core.state_sync.common import ( + StateStream, + ExpiredSnapshotBatch, + PromotionResult, + ExpiredBatchRange, +) logger = logging.getLogger(__name__) @@ -72,20 +74,6 @@ def _schema_version_validator(cls, v: t.Any) -> int: SCHEMA_VERSION: int = MIN_SCHEMA_VERSION + len(MIGRATIONS) - 1 -class PromotionResult(PydanticModel): - added: t.List[SnapshotTableInfo] - removed: t.List[SnapshotTableInfo] - removed_environment_naming_info: t.Optional[EnvironmentNamingInfo] - - @field_validator("removed_environment_naming_info") - def _validate_removed_environment_naming_info( - cls, v: t.Optional[EnvironmentNamingInfo], info: ValidationInfo - ) -> t.Optional[EnvironmentNamingInfo]: - if v and not info.data.get("removed"): - raise ValueError("removed_environment_naming_info must be None if removed is empty") - return v - - class StateReader(abc.ABC): """Abstract base class for read-only operations on snapshot and environment state.""" @@ -315,15 +303,21 @@ def export(self, environment_names: t.Optional[t.List[str]] = None) -> StateStre @abc.abstractmethod def get_expired_snapshots( - self, current_ts: t.Optional[int] = None, ignore_ttl: bool = False - ) -> t.List[SnapshotTableCleanupTask]: - """Aggregates the id's of the expired snapshots and creates a list of table cleanup tasks. + self, + *, + batch_range: ExpiredBatchRange, + current_ts: t.Optional[int] = None, + ignore_ttl: bool = False, + ) -> t.Optional[ExpiredSnapshotBatch]: + """Returns a single batch of expired snapshots ordered by (updated_ts, name, identifier). - Expired snapshots are snapshots that have exceeded their time-to-live - and are no longer in use within an environment. + Args: + current_ts: Timestamp used to evaluate expiration. + ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). + batch_range: The range of the batch to fetch. Returns: - The list of table cleanup tasks. + A batch describing expired snapshots or None if no snapshots are pending cleanup. """ @abc.abstractmethod @@ -363,7 +357,10 @@ def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: @abc.abstractmethod def delete_expired_snapshots( - self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None + self, + batch_range: ExpiredBatchRange, + ignore_ttl: bool = False, + current_ts: t.Optional[int] = None, ) -> None: """Removes expired snapshots. @@ -371,8 +368,10 @@ def delete_expired_snapshots( and are no longer in use within an environment. Args: + batch_range: The range of snapshots to delete in this batch. ignore_ttl: Ignore the TTL on the snapshot when considering it expired. This has the effect of deleting all snapshots that are not referenced in any environment + current_ts: Timestamp used to evaluate expiration. """ @abc.abstractmethod @@ -496,6 +495,7 @@ def add_interval( start: TimeLike, end: TimeLike, is_dev: bool = False, + last_altered_ts: t.Optional[int] = None, ) -> None: """Add an interval to a snapshot and sync it to the store. @@ -504,6 +504,7 @@ def add_interval( start: The start of the interval to add. end: The end of the interval to add. is_dev: Indicates whether the given interval is being added while in development mode + last_altered_ts: The timestamp of the last modification of the physical table """ start_ts, end_ts = snapshot.inclusive_exclusive(start, end, strict=False, expand=False) if not snapshot.version: @@ -516,6 +517,8 @@ def add_interval( dev_version=snapshot.dev_version, intervals=intervals if not is_dev else [], dev_intervals=intervals if is_dev else [], + last_altered_ts=last_altered_ts if not is_dev else None, + dev_last_altered_ts=last_altered_ts if is_dev else None, ) self.add_snapshots_intervals([snapshot_intervals]) diff --git a/sqlmesh/core/state_sync/cache.py b/sqlmesh/core/state_sync/cache.py index 3de4e7bf51..77f3fc6ba5 100644 --- a/sqlmesh/core/state_sync/cache.py +++ b/sqlmesh/core/state_sync/cache.py @@ -12,6 +12,7 @@ ) from sqlmesh.core.snapshot.definition import Interval, SnapshotIntervals from sqlmesh.core.state_sync.base import DelegatingStateSync, StateSync +from sqlmesh.core.state_sync.common import ExpiredBatchRange from sqlmesh.utils.date import TimeLike, now_timestamp @@ -108,11 +109,17 @@ def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: self.state_sync.delete_snapshots(snapshot_ids) def delete_expired_snapshots( - self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None + self, + batch_range: ExpiredBatchRange, + ignore_ttl: bool = False, + current_ts: t.Optional[int] = None, ) -> None: - current_ts = current_ts or now_timestamp() self.snapshot_cache.clear() - self.state_sync.delete_expired_snapshots(current_ts=current_ts, ignore_ttl=ignore_ttl) + self.state_sync.delete_expired_snapshots( + batch_range=batch_range, + ignore_ttl=ignore_ttl, + current_ts=current_ts, + ) def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None: for snapshot_intervals in snapshots_intervals: diff --git a/sqlmesh/core/state_sync/common.py b/sqlmesh/core/state_sync/common.py index cd8c389e33..d1208c5213 100644 --- a/sqlmesh/core/state_sync/common.py +++ b/sqlmesh/core/state_sync/common.py @@ -7,124 +7,25 @@ import abc from dataclasses import dataclass + +from pydantic_core.core_schema import ValidationInfo from sqlglot import exp -from sqlmesh.core.console import Console -from sqlmesh.core.dialect import schema_ -from sqlmesh.utils.pydantic import PydanticModel -from sqlmesh.core.environment import Environment, EnvironmentStatements -from sqlmesh.utils.errors import SQLMeshError -from sqlmesh.core.snapshot import Snapshot +from sqlmesh.utils.pydantic import PydanticModel, field_validator +from sqlmesh.core.environment import Environment, EnvironmentStatements, EnvironmentNamingInfo +from sqlmesh.core.snapshot import ( + Snapshot, + SnapshotId, + SnapshotTableCleanupTask, + SnapshotTableInfo, +) if t.TYPE_CHECKING: - from sqlmesh.core.engine_adapter.base import EngineAdapter - from sqlmesh.core.state_sync.base import Versions + from sqlmesh.core.state_sync.base import Versions, StateReader logger = logging.getLogger(__name__) - -def cleanup_expired_views( - default_adapter: EngineAdapter, - engine_adapters: t.Dict[str, EngineAdapter], - environments: t.List[Environment], - warn_on_delete_failure: bool = False, - console: t.Optional[Console] = None, -) -> None: - expired_schema_or_catalog_environments = [ - environment - for environment in environments - if environment.suffix_target.is_schema or environment.suffix_target.is_catalog - ] - expired_table_environments = [ - environment for environment in environments if environment.suffix_target.is_table - ] - - # We have to use the corresponding adapter if the virtual layer is gateway managed - def get_adapter(gateway_managed: bool, gateway: t.Optional[str] = None) -> EngineAdapter: - if gateway_managed and gateway: - return engine_adapters.get(gateway, default_adapter) - return default_adapter - - catalogs_to_drop: t.Set[t.Tuple[EngineAdapter, str]] = set() - schemas_to_drop: t.Set[t.Tuple[EngineAdapter, exp.Table]] = set() - - # Collect schemas and catalogs to drop - for engine_adapter, expired_catalog, expired_schema, suffix_target in { - ( - (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), - snapshot.qualified_view_name.catalog_for_environment( - environment.naming_info, dialect=engine_adapter.dialect - ), - snapshot.qualified_view_name.schema_for_environment( - environment.naming_info, dialect=engine_adapter.dialect - ), - environment.suffix_target, - ) - for environment in expired_schema_or_catalog_environments - for snapshot in environment.snapshots - if snapshot.is_model and not snapshot.is_symbolic - }: - if suffix_target.is_catalog: - if expired_catalog: - catalogs_to_drop.add((engine_adapter, expired_catalog)) - else: - schema = schema_(expired_schema, expired_catalog) - schemas_to_drop.add((engine_adapter, schema)) - - # Drop the views for the expired environments - for engine_adapter, expired_view in { - ( - (engine_adapter := get_adapter(environment.gateway_managed, snapshot.model_gateway)), - snapshot.qualified_view_name.for_environment( - environment.naming_info, dialect=engine_adapter.dialect - ), - ) - for environment in expired_table_environments - for snapshot in environment.snapshots - if snapshot.is_model and not snapshot.is_symbolic - }: - try: - engine_adapter.drop_view(expired_view, ignore_if_not_exists=True) - if console: - console.update_cleanup_progress(expired_view) - except Exception as e: - message = f"Failed to drop the expired environment view '{expired_view}': {e}" - if warn_on_delete_failure: - logger.warning(message) - else: - raise SQLMeshError(message) from e - - # Drop the schemas for the expired environments - for engine_adapter, schema in schemas_to_drop: - try: - engine_adapter.drop_schema( - schema, - ignore_if_not_exists=True, - cascade=True, - ) - if console: - console.update_cleanup_progress(schema.sql(dialect=engine_adapter.dialect)) - except Exception as e: - message = f"Failed to drop the expired environment schema '{schema}': {e}" - if warn_on_delete_failure: - logger.warning(message) - else: - raise SQLMeshError(message) from e - - # Drop any catalogs that were associated with a snapshot where the engine adapter supports dropping catalogs - # catalogs_to_drop is only populated when environment_suffix_target is set to 'catalog' - for engine_adapter, catalog in catalogs_to_drop: - if engine_adapter.SUPPORTS_CREATE_DROP_CATALOG: - try: - engine_adapter.drop_catalog(catalog) - if console: - console.update_cleanup_progress(catalog) - except Exception as e: - message = f"Failed to drop the expired environment catalog '{catalog}': {e}" - if warn_on_delete_failure: - logger.warning(message) - else: - raise SQLMeshError(message) from e +EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE = 200 def transactional() -> t.Callable[[t.Callable], t.Callable]: @@ -215,3 +116,207 @@ def __iter__(self) -> t.Iterator[StateStreamContents]: yield EnvironmentsChunk(environments) return _StateStream() + + +class ExpiredBatchRange(PydanticModel): + start: RowBoundary + end: t.Union[RowBoundary, LimitBoundary] + + @classmethod + def init_batch_range(cls, batch_size: int) -> ExpiredBatchRange: + return ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=LimitBoundary(batch_size=batch_size), + ) + + @classmethod + def all_batch_range(cls) -> ExpiredBatchRange: + return ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=RowBoundary.highest_boundary(), + ) + + @classmethod + def _expanded_tuple_comparison( + cls, + columns: t.List[exp.Column], + values: t.List[t.Union[exp.Literal, exp.Neg]], + operator: t.Type[exp.Expr], + ) -> exp.Condition: + """Generate expanded tuple comparison that works across all SQL engines. + + Converts tuple comparisons like (a, b, c) OP (x, y, z) into an expanded form + that's compatible with all SQL engines, since native tuple comparisons have + inconsistent support across engines (especially DuckDB, MySQL, SQLite). + + Repro of problem with DuckDB: + "SELECT * FROM VALUES(1,'2') as test(a,b) WHERE ((a, b) > (1, 'foo')) AND ((a, b) <= (10, 'baz'))" + + Args: + columns: List of column expressions to compare + values: List of value expressions to compare against + operator: The comparison operator class (exp.GT, exp.GTE, exp.LT, exp.LTE) + + Examples: + (a, b, c) > (x, y, z) expands to: + a > x OR (a = x AND b > y) OR (a = x AND b = y AND c > z) + + (a, b, c) <= (x, y, z) expands to: + a < x OR (a = x AND b < y) OR (a = x AND b = y AND c <= z) + + (a, b, c) >= (x, y, z) expands to: + a > x OR (a = x AND b > y) OR (a = x AND b = y AND c >= z) + + Returns: + An expanded OR expression representing the tuple comparison + """ + if operator not in (exp.GT, exp.GTE, exp.LT, exp.LTE): + raise ValueError(f"Unsupported operator: {operator}. Use GT, GTE, LT, or LTE.") + + # For <= and >=, we use the strict operator for all but the last column + # e.g., (a, b) <= (x, y) becomes: a < x OR (a = x AND b <= y) + # For < and >, we use the strict operator throughout + # e.g., (a, b) > (x, y) becomes: a > x OR (a = x AND b > x) + strict_operator: t.Type[exp.Expr] + final_operator: t.Type[exp.Expr] + + if operator in (exp.LTE, exp.GTE): + # For inclusive operators (<=, >=), use strict form for intermediate columns + # but keep inclusive form for the last column + strict_operator = exp.LT if operator == exp.LTE else exp.GT + final_operator = operator # Keep LTE/GTE for last column + else: + # For strict operators (<, >), use them throughout + strict_operator = operator + final_operator = operator + + conditions: t.List[exp.Expr] = [] + for i in range(len(columns)): + # Build equality conditions for all columns before current + equality_conditions = [exp.EQ(this=columns[j], expression=values[j]) for j in range(i)] + + # Use the final operator for the last column, strict for others + comparison_op = final_operator if i == len(columns) - 1 else strict_operator + comparison_condition = comparison_op(this=columns[i], expression=values[i]) + + if equality_conditions: + conditions.append(exp.and_(*equality_conditions, comparison_condition)) + else: + conditions.append(comparison_condition) + + return exp.or_(*conditions) if len(conditions) > 1 else t.cast(exp.Condition, conditions[0]) + + @property + def where_filter(self) -> exp.Condition: + # Use expanded tuple comparisons for cross-engine compatibility + # Native tuple comparisons like (a, b) > (x, y) don't work reliably across all SQL engines + columns = [ + exp.column("updated_ts"), + exp.column("name"), + exp.column("identifier"), + ] + start_values = [ + exp.Literal.number(self.start.updated_ts), + exp.Literal.string(self.start.name), + exp.Literal.string(self.start.identifier), + ] + + start_condition = self._expanded_tuple_comparison(columns, start_values, exp.GT) + + range_filter: exp.Condition + if isinstance(self.end, RowBoundary): + end_values = [ + exp.Literal.number(self.end.updated_ts), + exp.Literal.string(self.end.name), + exp.Literal.string(self.end.identifier), + ] + end_condition = self._expanded_tuple_comparison(columns, end_values, exp.LTE) + range_filter = exp.and_(start_condition, end_condition) + else: + range_filter = start_condition + return range_filter + + +class RowBoundary(PydanticModel): + updated_ts: int + name: str + identifier: str + + @classmethod + def lowest_boundary(cls) -> RowBoundary: + return RowBoundary(updated_ts=0, name="", identifier="") + + @classmethod + def highest_boundary(cls) -> RowBoundary: + # 9999-12-31T23:59:59.999Z in epoch milliseconds + return RowBoundary(updated_ts=253_402_300_799_999, name="", identifier="") + + +class LimitBoundary(PydanticModel): + batch_size: int + + @classmethod + def init_batch_boundary(cls, batch_size: int) -> LimitBoundary: + return LimitBoundary(batch_size=batch_size) + + +class PromotionResult(PydanticModel): + added: t.List[SnapshotTableInfo] + removed: t.List[SnapshotTableInfo] + removed_environment_naming_info: t.Optional[EnvironmentNamingInfo] + + @field_validator("removed_environment_naming_info") + def _validate_removed_environment_naming_info( + cls, v: t.Optional[EnvironmentNamingInfo], info: ValidationInfo + ) -> t.Optional[EnvironmentNamingInfo]: + if v and not info.data.get("removed"): + raise ValueError("removed_environment_naming_info must be None if removed is empty") + return v + + +class ExpiredSnapshotBatch(PydanticModel): + """A batch of expired snapshots to be cleaned up.""" + + expired_snapshot_ids: t.Set[SnapshotId] + cleanup_tasks: t.List[SnapshotTableCleanupTask] + batch_range: ExpiredBatchRange + + +def iter_expired_snapshot_batches( + state_reader: StateReader, + *, + current_ts: int, + ignore_ttl: bool = False, + batch_size: t.Optional[int] = None, +) -> t.Iterator[ExpiredSnapshotBatch]: + """Yields expired snapshot batches. + + Args: + state_reader: StateReader instance to query expired snapshots from. + current_ts: Timestamp used to evaluate expiration. + ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced). + batch_size: Maximum number of snapshots to fetch per batch. + """ + + batch_size = batch_size if batch_size is not None else EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE + batch_range = ExpiredBatchRange.init_batch_range(batch_size=batch_size) + + while True: + batch = state_reader.get_expired_snapshots( + current_ts=current_ts, + ignore_ttl=ignore_ttl, + batch_range=batch_range, + ) + + if batch is None: + return + + yield batch + + assert isinstance(batch.batch_range.end, RowBoundary), ( + "Only RowBoundary is supported for pagination currently" + ) + batch_range = ExpiredBatchRange( + start=batch.batch_range.end, + end=LimitBoundary(batch_size=batch_size), + ) diff --git a/sqlmesh/core/state_sync/db/environment.py b/sqlmesh/core/state_sync/db/environment.py index e3f1d1ec9e..713ce0193e 100644 --- a/sqlmesh/core/state_sync/db/environment.py +++ b/sqlmesh/core/state_sync/db/environment.py @@ -296,7 +296,7 @@ def _environment_summmary_from_row(self, row: t.Tuple[str, ...]) -> EnvironmentS def _environments_query( self, - where: t.Optional[str | exp.Expression] = None, + where: t.Optional[str | exp.Expr] = None, lock_for_update: bool = False, required_fields: t.Optional[t.List[str]] = None, ) -> exp.Select: @@ -310,7 +310,7 @@ def _environments_query( return query.lock(copy=False) return query - def _create_expiration_filter_expr(self, current_ts: int) -> exp.Expression: + def _create_expiration_filter_expr(self, current_ts: int) -> exp.Expr: """Creates a SQLGlot filter expression to find expired environments. Args: @@ -322,7 +322,7 @@ def _create_expiration_filter_expr(self, current_ts: int) -> exp.Expression: ) def _fetch_environment_summaries( - self, where: t.Optional[str | exp.Expression] = None + self, where: t.Optional[str | exp.Expr] = None ) -> t.List[EnvironmentSummary]: return [ self._environment_summmary_from_row(row) diff --git a/sqlmesh/core/state_sync/db/facade.py b/sqlmesh/core/state_sync/db/facade.py index 29fc9f1740..64042624f3 100644 --- a/sqlmesh/core/state_sync/db/facade.py +++ b/sqlmesh/core/state_sync/db/facade.py @@ -35,7 +35,6 @@ SnapshotInfoLike, SnapshotIntervals, SnapshotNameVersion, - SnapshotTableCleanupTask, SnapshotTableInfo, start_date, ) @@ -43,7 +42,6 @@ Interval, ) from sqlmesh.core.state_sync.base import ( - PromotionResult, StateSync, Versions, ) @@ -55,6 +53,9 @@ StateStream, chunk_iterable, EnvironmentWithStatements, + ExpiredSnapshotBatch, + PromotionResult, + ExpiredBatchRange, ) from sqlmesh.core.state_sync.db.interval import IntervalState from sqlmesh.core.state_sync.db.environment import EnvironmentState @@ -261,11 +262,18 @@ def invalidate_environment(self, name: str, protect_prod: bool = True) -> None: self.environment_state.invalidate_environment(name, protect_prod) def get_expired_snapshots( - self, current_ts: t.Optional[int] = None, ignore_ttl: bool = False - ) -> t.List[SnapshotTableCleanupTask]: + self, + *, + batch_range: ExpiredBatchRange, + current_ts: t.Optional[int] = None, + ignore_ttl: bool = False, + ) -> t.Optional[ExpiredSnapshotBatch]: current_ts = current_ts or now_timestamp() return self.snapshot_state.get_expired_snapshots( - self.environment_state.get_environments(), current_ts=current_ts, ignore_ttl=ignore_ttl + environments=self.environment_state.get_environments(), + current_ts=current_ts, + ignore_ttl=ignore_ttl, + batch_range=batch_range, ) def get_expired_environments(self, current_ts: int) -> t.List[EnvironmentSummary]: @@ -273,14 +281,19 @@ def get_expired_environments(self, current_ts: int) -> t.List[EnvironmentSummary @transactional() def delete_expired_snapshots( - self, ignore_ttl: bool = False, current_ts: t.Optional[int] = None + self, + batch_range: ExpiredBatchRange, + ignore_ttl: bool = False, + current_ts: t.Optional[int] = None, ) -> None: - current_ts = current_ts or now_timestamp() - for expired_snapshot_ids, cleanup_targets in self.snapshot_state._get_expired_snapshots( - self.environment_state.get_environments(), ignore_ttl=ignore_ttl, current_ts=current_ts - ): - self.snapshot_state.delete_snapshots(expired_snapshot_ids) - self.interval_state.cleanup_intervals(cleanup_targets, expired_snapshot_ids) + batch = self.get_expired_snapshots( + ignore_ttl=ignore_ttl, + current_ts=current_ts, + batch_range=batch_range, + ) + if batch and batch.expired_snapshot_ids: + self.snapshot_state.delete_snapshots(batch.expired_snapshot_ids) + self.interval_state.cleanup_intervals(batch.cleanup_tasks, batch.expired_snapshot_ids) @transactional() def delete_expired_environments( @@ -381,8 +394,9 @@ def add_interval( start: TimeLike, end: TimeLike, is_dev: bool = False, + last_altered_ts: t.Optional[int] = None, ) -> None: - super().add_interval(snapshot, start, end, is_dev) + super().add_interval(snapshot, start, end, is_dev, last_altered_ts) @transactional() def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None: @@ -455,7 +469,7 @@ def migrate( ) -> None: """Migrate the state sync to the latest SQLMesh / SQLGlot version.""" self.migrator.migrate( - self, + self.schema, skip_backup=skip_backup, promoted_snapshots_only=promoted_snapshots_only, ) diff --git a/sqlmesh/core/state_sync/db/interval.py b/sqlmesh/core/state_sync/db/interval.py index b15ad2d57b..8ccdc58fa0 100644 --- a/sqlmesh/core/state_sync/db/interval.py +++ b/sqlmesh/core/state_sync/db/interval.py @@ -60,6 +60,7 @@ def __init__( "is_removed": exp.DataType.build("boolean"), "is_compacted": exp.DataType.build("boolean"), "is_pending_restatement": exp.DataType.build("boolean"), + "last_altered_ts": exp.DataType.build("bigint"), } def add_snapshots_intervals(self, snapshots_intervals: t.Sequence[SnapshotIntervals]) -> None: @@ -215,13 +216,23 @@ def _push_snapshot_intervals( for start_ts, end_ts in snapshot.intervals: new_intervals.append( _interval_to_df( - snapshot, start_ts, end_ts, is_dev=False, is_compacted=is_compacted + snapshot, + start_ts, + end_ts, + is_dev=False, + is_compacted=is_compacted, + last_altered_ts=snapshot.last_altered_ts, ) ) for start_ts, end_ts in snapshot.dev_intervals: new_intervals.append( _interval_to_df( - snapshot, start_ts, end_ts, is_dev=True, is_compacted=is_compacted + snapshot, + start_ts, + end_ts, + is_dev=True, + is_compacted=is_compacted, + last_altered_ts=snapshot.dev_last_altered_ts, ) ) @@ -236,6 +247,7 @@ def _push_snapshot_intervals( is_dev=False, is_compacted=is_compacted, is_pending_restatement=True, + last_altered_ts=snapshot.last_altered_ts, ) ) @@ -284,6 +296,7 @@ def _get_snapshot_intervals( is_dev, is_removed, is_pending_restatement, + last_altered_ts, ) in rows: interval_ids.add(interval_id) merge_key = (name, version, dev_version, identifier) @@ -318,8 +331,10 @@ def _get_snapshot_intervals( else: if is_dev: intervals[merge_key].add_dev_interval(start, end) + intervals[merge_key].update_dev_last_altered_ts(last_altered_ts) else: intervals[merge_key].add_interval(start, end) + intervals[merge_key].update_last_altered_ts(last_altered_ts) # Remove all pending restatement intervals recorded before the current interval has been added intervals[ pending_restatement_interval_merge_key @@ -340,6 +355,7 @@ def _get_snapshot_intervals_query(self, uncompacted_only: bool) -> exp.Select: "is_dev", "is_removed", "is_pending_restatement", + "last_altered_ts", ) .from_(exp.to_table(self.intervals_table).as_("intervals")) .order_by( @@ -460,6 +476,7 @@ def _interval_to_df( is_removed: bool = False, is_compacted: bool = False, is_pending_restatement: bool = False, + last_altered_ts: t.Optional[int] = None, ) -> t.Dict[str, t.Any]: return { "id": random_id(), @@ -474,4 +491,5 @@ def _interval_to_df( "is_removed": is_removed, "is_compacted": is_compacted, "is_pending_restatement": is_pending_restatement, + "last_altered_ts": last_altered_ts, } diff --git a/sqlmesh/core/state_sync/db/migrator.py b/sqlmesh/core/state_sync/db/migrator.py index b803a5cc40..8d73e1d395 100644 --- a/sqlmesh/core/state_sync/db/migrator.py +++ b/sqlmesh/core/state_sync/db/migrator.py @@ -30,7 +30,6 @@ MIN_SCHEMA_VERSION, MIN_SQLMESH_VERSION, ) -from sqlmesh.core.state_sync.base import StateSync from sqlmesh.core.state_sync.db.environment import EnvironmentState from sqlmesh.core.state_sync.db.interval import IntervalState from sqlmesh.core.state_sync.db.snapshot import SnapshotState @@ -85,7 +84,7 @@ def __init__( def migrate( self, - state_sync: StateSync, + schema: t.Optional[str], skip_backup: bool = False, promoted_snapshots_only: bool = True, ) -> None: @@ -94,7 +93,7 @@ def migrate( migration_start_ts = time.perf_counter() try: - migrate_rows = self._apply_migrations(state_sync, skip_backup) + migrate_rows = self._apply_migrations(schema, skip_backup) if not migrate_rows and major_minor(SQLMESH_VERSION) == versions.minor_sqlmesh_version: return @@ -153,7 +152,7 @@ def rollback(self) -> None: def _apply_migrations( self, - state_sync: StateSync, + schema: t.Optional[str], skip_backup: bool, ) -> bool: versions = self.version_state.get_versions() @@ -184,10 +183,10 @@ def _apply_migrations( for migration in migrations: logger.info(f"Applying migration {migration}") - migration.migrate_schemas(state_sync) + migration.migrate_schemas(engine_adapter=self.engine_adapter, schema=schema) if state_table_exist: # No need to run DML for the initial migration since all tables are empty - migration.migrate_rows(state_sync) + migration.migrate_rows(engine_adapter=self.engine_adapter, schema=schema) snapshot_count_after = self.snapshot_state.count() @@ -196,7 +195,7 @@ def _apply_migrations( raise SQLMeshError( f"Number of snapshots before ({snapshot_count_before}) and after " f"({snapshot_count_after}) applying migration scripts {scripts} does not match. " - "Please file an issue issue at https://github.com/TobikoData/sqlmesh/issues/new." + "Please file an issue issue at https://github.com/SQLMesh/sqlmesh/issues/new." ) migrate_snapshots_and_environments = ( @@ -229,6 +228,7 @@ def _migrate_snapshot_rows( "updated_ts": updated_ts, "unpaused_ts": unpaused_ts, "unrestorable": unrestorable, + "forward_only": forward_only, } for where in ( snapshot_id_filter( @@ -237,10 +237,16 @@ def _migrate_snapshot_rows( if snapshots is not None else [None] ) - for name, identifier, raw_snapshot, updated_ts, unpaused_ts, unrestorable in fetchall( + for name, identifier, raw_snapshot, updated_ts, unpaused_ts, unrestorable, forward_only in fetchall( self.engine_adapter, exp.select( - "name", "identifier", "snapshot", "updated_ts", "unpaused_ts", "unrestorable" + "name", + "identifier", + "snapshot", + "updated_ts", + "unpaused_ts", + "unrestorable", + "forward_only", ) .from_(self.snapshot_state.snapshots_table) .where(where) diff --git a/sqlmesh/core/state_sync/db/snapshot.py b/sqlmesh/core/state_sync/db/snapshot.py index 4a8b2c44c5..8ca98f2d48 100644 --- a/sqlmesh/core/state_sync/db/snapshot.py +++ b/sqlmesh/core/state_sync/db/snapshot.py @@ -14,7 +14,6 @@ snapshot_id_filter, fetchone, fetchall, - create_batches, ) from sqlmesh.core.environment import Environment from sqlmesh.core.model import SeedModel, ModelKindName @@ -30,6 +29,12 @@ SnapshotId, SnapshotFingerprint, ) +from sqlmesh.core.state_sync.common import ( + RowBoundary, + ExpiredSnapshotBatch, + ExpiredBatchRange, + LimitBoundary, +) from sqlmesh.utils.migration import index_text_type, blob_text_type from sqlmesh.utils.date import now_timestamp, TimeLike, to_timestamp from sqlmesh.utils import unique @@ -43,9 +48,6 @@ class SnapshotState: SNAPSHOT_BATCH_SIZE = 1000 - # Use a smaller batch size for expired snapshots to account for fetching - # of all snapshots that share the same version. - EXPIRED_SNAPSHOT_BATCH_SIZE = 200 def __init__( self, @@ -166,53 +168,62 @@ def get_expired_snapshots( self, environments: t.Iterable[Environment], current_ts: int, - ignore_ttl: bool = False, - ) -> t.List[SnapshotTableCleanupTask]: - """Aggregates the id's of the expired snapshots and creates a list of table cleanup tasks. - - Expired snapshots are snapshots that have exceeded their time-to-live - and are no longer in use within an environment. - - Returns: - The set of expired snapshot ids. - The list of table cleanup tasks. - """ - all_cleanup_targets = [] - for _, cleanup_targets in self._get_expired_snapshots( - environments=environments, - current_ts=current_ts, - ignore_ttl=ignore_ttl, - ): - all_cleanup_targets.extend(cleanup_targets) - return all_cleanup_targets - - def _get_expired_snapshots( - self, - environments: t.Iterable[Environment], - current_ts: int, - ignore_ttl: bool = False, - ) -> t.Iterator[t.Tuple[t.Set[SnapshotId], t.List[SnapshotTableCleanupTask]]]: - expired_query = exp.select("name", "identifier", "version").from_(self.snapshots_table) + ignore_ttl: bool, + batch_range: ExpiredBatchRange, + ) -> t.Optional[ExpiredSnapshotBatch]: + expired_query = exp.select("name", "identifier", "version", "updated_ts").from_( + self.snapshots_table + ) if not ignore_ttl: expired_query = expired_query.where( (exp.column("updated_ts") + exp.column("ttl_ms")) <= current_ts ) + expired_query = expired_query.where(batch_range.where_filter) + + promoted_snapshot_ids = { + snapshot.snapshot_id + for environment in environments + for snapshot in ( + environment.snapshots + if environment.finalized_ts is not None + # If the environment is not finalized, check both the current snapshots and the previous finalized snapshots + else [*environment.snapshots, *(environment.previous_finalized_snapshots or [])] + ) + } + + if promoted_snapshot_ids: + not_in_conditions = [ + exp.not_(condition) + for condition in snapshot_id_filter( + self.engine_adapter, + promoted_snapshot_ids, + batch_size=self.SNAPSHOT_BATCH_SIZE, + ) + ] + expired_query = expired_query.where(exp.and_(*not_in_conditions)) + + expired_query = expired_query.order_by( + exp.column("updated_ts"), exp.column("name"), exp.column("identifier") + ) + + if isinstance(batch_range.end, LimitBoundary): + expired_query = expired_query.limit(batch_range.end.batch_size) + + rows = fetchall(self.engine_adapter, expired_query) + + if not rows: + return None + expired_candidates = { SnapshotId(name=name, identifier=identifier): SnapshotNameVersion( name=name, version=version ) - for name, identifier, version in fetchall(self.engine_adapter, expired_query) + for name, identifier, version, _ in rows } if not expired_candidates: - return - - promoted_snapshot_ids = { - snapshot.snapshot_id - for environment in environments - for snapshot in environment.snapshots - } + return None def _is_snapshot_used(snapshot: SnapshotIdAndVersion) -> bool: return ( @@ -220,57 +231,73 @@ def _is_snapshot_used(snapshot: SnapshotIdAndVersion) -> bool: or snapshot.snapshot_id not in expired_candidates ) - unique_expired_versions = unique(expired_candidates.values()) - version_batches = create_batches( - unique_expired_versions, batch_size=self.EXPIRED_SNAPSHOT_BATCH_SIZE + # Extract cursor values from last row for pagination + last_row = rows[-1] + last_row_boundary = RowBoundary( + updated_ts=last_row[3], + name=last_row[0], + identifier=last_row[1], ) - for versions_batch in version_batches: - snapshots = self._get_snapshots_with_same_version(versions_batch) - - snapshots_by_version = defaultdict(set) - snapshots_by_dev_version = defaultdict(set) - for s in snapshots: - snapshots_by_version[(s.name, s.version)].add(s.snapshot_id) - snapshots_by_dev_version[(s.name, s.dev_version)].add(s.snapshot_id) - - expired_snapshots = [s for s in snapshots if not _is_snapshot_used(s)] - all_expired_snapshot_ids = {s.snapshot_id for s in expired_snapshots} - - cleanup_targets: t.List[t.Tuple[SnapshotId, bool]] = [] - for snapshot in expired_snapshots: - shared_version_snapshots = snapshots_by_version[(snapshot.name, snapshot.version)] - shared_version_snapshots.discard(snapshot.snapshot_id) - - shared_dev_version_snapshots = snapshots_by_dev_version[ - (snapshot.name, snapshot.dev_version) - ] - shared_dev_version_snapshots.discard(snapshot.snapshot_id) - - if not shared_dev_version_snapshots: - dev_table_only = bool(shared_version_snapshots) - cleanup_targets.append((snapshot.snapshot_id, dev_table_only)) - - snapshot_ids_to_cleanup = [snapshot_id for snapshot_id, _ in cleanup_targets] - for snapshot_id_batch in create_batches( - snapshot_ids_to_cleanup, batch_size=self.SNAPSHOT_BATCH_SIZE - ): - snapshot_id_batch_set = set(snapshot_id_batch) - full_snapshots = self._get_snapshots(snapshot_id_batch_set) - cleanup_tasks = [ + # The returned batch_range represents the actual range of rows in this batch + result_batch_range = ExpiredBatchRange( + start=batch_range.start, + end=last_row_boundary, + ) + + unique_expired_versions = unique(expired_candidates.values()) + expired_snapshot_ids: t.Set[SnapshotId] = set() + cleanup_tasks: t.List[SnapshotTableCleanupTask] = [] + + snapshots = self._get_snapshots_with_same_version(unique_expired_versions) + + snapshots_by_version = defaultdict(set) + snapshots_by_dev_version = defaultdict(set) + for s in snapshots: + snapshots_by_version[(s.name, s.version)].add(s.snapshot_id) + snapshots_by_dev_version[(s.name, s.dev_version)].add(s.snapshot_id) + + expired_snapshots = [s for s in snapshots if not _is_snapshot_used(s)] + all_expired_snapshot_ids = {s.snapshot_id for s in expired_snapshots} + + cleanup_targets: t.List[t.Tuple[SnapshotId, bool]] = [] + for snapshot in expired_snapshots: + shared_version_snapshots = snapshots_by_version[(snapshot.name, snapshot.version)] + shared_version_snapshots.discard(snapshot.snapshot_id) + + shared_dev_version_snapshots = snapshots_by_dev_version[ + (snapshot.name, snapshot.dev_version) + ] + shared_dev_version_snapshots.discard(snapshot.snapshot_id) + + if not shared_dev_version_snapshots: + dev_table_only = bool(shared_version_snapshots) + cleanup_targets.append((snapshot.snapshot_id, dev_table_only)) + + snapshot_ids_to_cleanup = [snapshot_id for snapshot_id, _ in cleanup_targets] + full_snapshots = self._get_snapshots(snapshot_ids_to_cleanup) + for snapshot_id, dev_table_only in cleanup_targets: + if snapshot_id in full_snapshots: + cleanup_tasks.append( SnapshotTableCleanupTask( snapshot=full_snapshots[snapshot_id].table_info, dev_table_only=dev_table_only, ) - for snapshot_id, dev_table_only in cleanup_targets - if snapshot_id in full_snapshots - ] - all_expired_snapshot_ids -= snapshot_id_batch_set - yield snapshot_id_batch_set, cleanup_tasks + ) + expired_snapshot_ids.add(snapshot_id) + all_expired_snapshot_ids.discard(snapshot_id) + + # Add any remaining expired snapshots that don't require cleanup + if all_expired_snapshot_ids: + expired_snapshot_ids.update(all_expired_snapshot_ids) + + if expired_snapshot_ids or cleanup_tasks: + return ExpiredSnapshotBatch( + expired_snapshot_ids=expired_snapshot_ids, + cleanup_tasks=cleanup_tasks, + batch_range=result_batch_range, + ) - if all_expired_snapshot_ids: - # Remaining expired snapshots for which there are no tables - # to cleanup - yield all_expired_snapshot_ids, [] + return None def delete_snapshots(self, snapshot_ids: t.Iterable[SnapshotIdLike]) -> None: """Deletes snapshots. @@ -596,7 +623,7 @@ def _get_snapshots_expressions( self, snapshot_ids: t.Iterable[SnapshotIdLike], lock_for_update: bool = False, - ) -> t.Iterator[exp.Expression]: + ) -> t.Iterator[exp.Expr]: for where in snapshot_id_filter( self.engine_adapter, snapshot_ids, diff --git a/sqlmesh/core/state_sync/db/utils.py b/sqlmesh/core/state_sync/db/utils.py index 87c259f5d6..b0f321e21f 100644 --- a/sqlmesh/core/state_sync/db/utils.py +++ b/sqlmesh/core/state_sync/db/utils.py @@ -123,11 +123,9 @@ def create_batches(l: t.List[T], batch_size: int) -> t.List[t.List[T]]: return [l[i : i + batch_size] for i in range(0, len(l), batch_size)] -def fetchone( - engine_adapter: EngineAdapter, query: t.Union[exp.Expression, str] -) -> t.Optional[t.Tuple]: +def fetchone(engine_adapter: EngineAdapter, query: t.Union[exp.Expr, str]) -> t.Optional[t.Tuple]: return engine_adapter.fetchone(query, ignore_unsupported_errors=True, quote_identifiers=True) -def fetchall(engine_adapter: EngineAdapter, query: t.Union[exp.Expression, str]) -> t.List[t.Tuple]: +def fetchall(engine_adapter: EngineAdapter, query: t.Union[exp.Expr, str]) -> t.List[t.Tuple]: return engine_adapter.fetchall(query, ignore_unsupported_errors=True, quote_identifiers=True) diff --git a/sqlmesh/core/state_sync/export_import.py b/sqlmesh/core/state_sync/export_import.py index 3a63351ddb..2461ee50fa 100644 --- a/sqlmesh/core/state_sync/export_import.py +++ b/sqlmesh/core/state_sync/export_import.py @@ -29,7 +29,7 @@ class SQLMeshJSONStreamEncoder(JSONStreamEncoder): def default(self, obj: t.Any) -> t.Any: - if isinstance(obj, exp.Expression): + if isinstance(obj, exp.Expr): return _expression_encoder(obj) return super().default(obj) diff --git a/sqlmesh/core/table_diff.py b/sqlmesh/core/table_diff.py index b9dfadc075..df99227f89 100644 --- a/sqlmesh/core/table_diff.py +++ b/sqlmesh/core/table_diff.py @@ -224,9 +224,9 @@ def __init__( adapter: EngineAdapter, source: TableName, target: TableName, - on: t.List[str] | exp.Condition, + on: t.List[str] | exp.Expr, skip_columns: t.List[str] | None = None, - where: t.Optional[str | exp.Condition] = None, + where: t.Optional[str | exp.Expr] = None, limit: int = 20, source_alias: t.Optional[str] = None, target_alias: t.Optional[str] = None, @@ -305,18 +305,18 @@ def key_columns(self) -> t.Tuple[t.List[exp.Column], t.List[exp.Column], t.List[ return s_index, t_index, index_cols @property - def source_key_expression(self) -> exp.Expression: + def source_key_expression(self) -> exp.Expr: s_index, _, _ = self.key_columns return self._key_expression(s_index, self.source_schema) @property - def target_key_expression(self) -> exp.Expression: + def target_key_expression(self) -> exp.Expr: _, t_index, _ = self.key_columns return self._key_expression(t_index, self.target_schema) def _key_expression( self, cols: t.List[exp.Column], schema: t.Dict[str, exp.DataType] - ) -> exp.Expression: + ) -> exp.Expr: # if there is a single column, dont do anything fancy to it in order to allow existing indexes to be hit if len(cols) == 1: return exp.to_column(cols[0].name) @@ -363,12 +363,12 @@ def row_diff( s_index_names = [c.name for c in s_index] t_index_names = [t.name for t in t_index] - def _column_expr(name: str, table: str) -> exp.Expression: + def _column_expr(name: str, table: str) -> exp.Expr: column_type = matched_columns[name] qualified_column = exp.column(name, table) - if column_type.is_type(*exp.DataType.FLOAT_TYPES): - return exp.func("ROUND", qualified_column, exp.Literal.number(self.decimals)) + if column_type.is_type(*exp.DataType.REAL_TYPES): + return self.adapter._normalize_decimal_value(qualified_column, self.decimals) if column_type.is_type(*exp.DataType.NESTED_TYPES): return self.adapter._normalize_nested_value(qualified_column) @@ -678,9 +678,9 @@ def _column_expr(name: str, table: str) -> exp.Expression: def _fetch_sample( self, sample_table: exp.Table, - s_selects: t.Dict[str, exp.Alias], + s_selects: t.Dict[str, exp.Expr], s_index: t.List[exp.Column], - t_selects: t.Dict[str, exp.Alias], + t_selects: t.Dict[str, exp.Expr], t_index: t.List[exp.Column], limit: int, ) -> pd.DataFrame: @@ -742,5 +742,5 @@ def _fetch_sample( return self.adapter.fetchdf(query, quote_identifiers=True) -def name(e: exp.Expression) -> str: +def name(e: exp.Expr) -> str: return e.args["alias"].sql(identify=True) diff --git a/sqlmesh/core/test/definition.py b/sqlmesh/core/test/definition.py index a12dafec19..629e8f8d5b 100644 --- a/sqlmesh/core/test/definition.py +++ b/sqlmesh/core/test/definition.py @@ -355,11 +355,12 @@ def _to_hashable(x: t.Any) -> t.Any: for df in _split_df_by_column_pairs(diff) ) else: - from pandas import MultiIndex + from pandas import DataFrame, MultiIndex levels = t.cast(MultiIndex, diff.columns).levels[0] for col in levels: - col_diff = diff[col] + # diff[col] returns a DataFrame when columns is a MultiIndex + col_diff = t.cast(DataFrame, diff[col]) if not col_diff.empty: table = df_to_table( f"[bold red]Column '{col}' mismatch{failed_subtest}[/bold red]", @@ -454,6 +455,9 @@ def _validate_and_normalize_test(self) -> None: query = outputs.get("query") partial = outputs.pop("partial", None) + if ctes is None and query is None: + _raise_error("Incomplete test, outputs must contain 'query' or 'ctes'", self.path) + def _normalize_rows( values: t.List[Row] | t.Dict, name: str, @@ -644,16 +648,16 @@ def _create_df( return self._execute(query) rows = values["rows"] + columns_str: t.Optional[t.List[str]] = None if columns: + columns_str = [str(c) for c in columns] referenced_columns = list(dict.fromkeys(col for row in rows for col in row)) _raise_if_unexpected_columns(columns, referenced_columns) if partial: - columns = referenced_columns + columns_str = [c for c in columns_str if c in referenced_columns] - return pd.DataFrame.from_records( - rows, columns=[str(c) for c in columns] if columns else None - ) + return pd.DataFrame.from_records(rows, columns=columns_str) def _add_missing_columns( self, query: exp.Query, all_columns: t.Optional[t.Collection[str]] = None @@ -670,7 +674,7 @@ def _add_missing_columns( class SqlModelTest(ModelTest): - def test_ctes(self, ctes: t.Dict[str, exp.Expression], recursive: bool = False) -> None: + def test_ctes(self, ctes: t.Dict[str, exp.Expr], recursive: bool = False) -> None: """Run CTE queries and compare output to expected output""" for cte_name, values in self.body["outputs"].get("ctes", {}).items(): with self.subTest(cte=cte_name): @@ -707,7 +711,7 @@ def runTest(self) -> None: query = self._render_model_query() sql = query.sql(self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql) - with_clause = query.args.get("with") + with_clause = query.args.get("with_") if with_clause: self.test_ctes( @@ -804,7 +808,7 @@ def runTest(self) -> None: actual_df.reset_index(drop=True, inplace=True) expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial) - self.assert_equal(expected, actual_df, sort=False, partial=partial) + self.assert_equal(expected, actual_df, sort=True, partial=partial) def _execute_model(self) -> pd.DataFrame: """Executes the python model and returns a DataFrame.""" @@ -815,7 +819,7 @@ def _execute_model(self) -> pd.DataFrame: time_kwargs = {key: variables.pop(key) for key in TIME_KWARG_KEYS if key in variables} df = next(self.model.render(context=self.context, variables=variables, **time_kwargs)) - assert not isinstance(df, exp.Expression) + assert not isinstance(df, exp.Expr) return df if isinstance(df, pd.DataFrame) else df.toPandas() @@ -901,7 +905,7 @@ def generate_test( if isinstance(model, SqlModel): assert isinstance(test, SqlModelTest) model_query = test._render_model_query() - with_clause = model_query.args.get("with") + with_clause = model_query.args.get("with_") if with_clause and include_ctes: ctes = {} @@ -922,8 +926,7 @@ def generate_test( cte_output = test._execute(cte_query) ctes[cte.alias] = ( pandas_timestamp_to_pydatetime( - cte_output.apply(lambda col: col.map(_normalize_df_value)), - cte_query.named_selects, + df=cte_output.apply(lambda col: col.map(_normalize_df_value)), ) .replace({np.nan: None}) .to_dict(orient="records") diff --git a/sqlmesh/core/test/discovery.py b/sqlmesh/core/test/discovery.py index 0f60fe6fa9..9afe3dd7fc 100644 --- a/sqlmesh/core/test/discovery.py +++ b/sqlmesh/core/test/discovery.py @@ -20,6 +20,10 @@ class ModelTestMetadata(PydanticModel): def fully_qualified_test_name(self) -> str: return f"{self.path}::{self.test_name}" + @property + def model_name(self) -> str: + return self.body.get("model", "") + def __hash__(self) -> int: return self.fully_qualified_test_name.__hash__() diff --git a/sqlmesh/dbt/adapter.py b/sqlmesh/dbt/adapter.py index 236d4cee6b..7f7c7eb4fb 100644 --- a/sqlmesh/dbt/adapter.py +++ b/sqlmesh/dbt/adapter.py @@ -115,30 +115,39 @@ def quote_as_configured(self, value: str, component_type: str) -> str: """Returns the value quoted according to the quote policy.""" return self.quote(value) if getattr(self.quote_policy, component_type, False) else value - def dispatch(self, name: str, package: t.Optional[str] = None) -> t.Callable: + def dispatch( + self, + macro_name: str, + macro_namespace: t.Optional[str] = None, + ) -> t.Callable: """Returns a dialect-specific version of a macro with the given name.""" target_type = self.jinja_globals["target"]["type"] - macro_suffix = f"__{name}" + macro_suffix = f"__{macro_name}" def _relevance(package_name_pair: t.Tuple[t.Optional[str], str]) -> t.Tuple[int, int]: """Lower scores more relevant.""" - macro_package, macro_name = package_name_pair + macro_package, name = package_name_pair - package_score = 0 if macro_package == package else 1 + package_score = 0 if macro_package == macro_namespace else 1 name_score = 1 - if macro_name.startswith("default"): + if name.startswith("default"): name_score = 2 - elif macro_name.startswith(target_type): + elif name.startswith(target_type): name_score = 0 return name_score, package_score jinja_env = self.jinja_macros.build_environment(**self.jinja_globals).globals - packages_to_check: t.List[t.Optional[str]] = [ - package, - *(k for k in jinja_env if k.startswith("dbt")), - ] + + packages_to_check: t.List[t.Optional[str]] = [None] + if macro_namespace is not None: + if macro_namespace in jinja_env: + packages_to_check = [self.jinja_macros.root_package_name, macro_namespace] + + # Add dbt packages as fallback + packages_to_check.extend(k for k in jinja_env if k.startswith("dbt")) + candidates = {} for macro_package in packages_to_check: macros = jinja_env.get(macro_package, {}) if macro_package else jinja_env @@ -156,7 +165,7 @@ def _relevance(package_name_pair: t.Tuple[t.Optional[str], str]) -> t.Tuple[int, sorted_candidates = sorted(candidates, key=_relevance) return candidates[sorted_candidates[0]] - raise ConfigError(f"Macro '{name}', package '{package}' was not found.") + raise ConfigError(f"Macro '{macro_name}', package '{macro_namespace}' was not found.") def type(self) -> str: return self.project_dialect or "" diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index 3534b95bc3..32a76aba13 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -13,6 +13,8 @@ from sqlmesh.core.config.base import UpdateStrategy from sqlmesh.core.config.common import VirtualEnvironmentMode from sqlmesh.core.model import Model +from sqlmesh.core.model.common import ParsableSql +from sqlmesh.core.node import DbtNodeInfo from sqlmesh.dbt.column import ( ColumnConfig, column_descriptions_to_sqlmesh, @@ -56,6 +58,12 @@ class Materialization(str, Enum): # Snowflake, https://docs.getdbt.com/reference/resource-configs/snowflake-configs#dynamic-tables DYNAMIC_TABLE = "dynamic_table" + CUSTOM = "custom" + + @classmethod + def _missing_(cls, value): # type: ignore + return cls.CUSTOM + class SnapshotStrategy(str, Enum): """DBT snapshot strategies""" @@ -80,7 +88,7 @@ class Hook(DbtConfig): """ sql: SqlStr - transaction: bool = True # TODO not yet supported + transaction: bool = True _sql_validator = sql_str_validator @@ -120,8 +128,10 @@ class BaseModelConfig(GeneralConfig): grain: t.Union[str, t.List[str]] = [] # DBT configuration fields + unique_id: str = "" name: str = "" package_name: str = "" + fqn_: t.List[str] = Field(default_factory=list, alias="fqn") schema_: str = Field("", alias="schema") database: t.Optional[str] = None alias: t.Optional[str] = None @@ -156,7 +166,11 @@ def _validate_hooks(cls, v: t.Union[str, t.List[t.Union[SqlStr, str]]]) -> t.Lis @field_validator("grants", mode="before") @classmethod - def _validate_grants(cls, v: t.Dict[str, str]) -> t.Dict[str, t.List[str]]: + def _validate_grants( + cls, v: t.Optional[t.Dict[str, str]] + ) -> t.Optional[t.Dict[str, t.List[str]]]: + if v is None: + return None return {key: ensure_list(value) for key, value in v.items()} _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { @@ -268,17 +282,17 @@ def remove_tests_with_invalid_refs(self, context: DbtContext) -> None: and all(source in context.sources for source in test.dependencies.sources) ] + @property + def fqn(self) -> str: + return ".".join(self.fqn_) + @property def sqlmesh_config_fields(self) -> t.Set[str]: return {"description", "owner", "stamp", "storage_format"} @property - def node_name(self) -> str: - resource_type = getattr(self, "resource_type", "model") - node_name = f"{resource_type}.{self.package_name}.{self.name}" - if self.version: - node_name += f".v{self.version}" - return node_name + def node_info(self) -> DbtNodeInfo: + return DbtNodeInfo(unique_id=self.unique_id, name=self.name, fqn=self.fqn, alias=self.alias) def sqlmesh_model_kwargs( self, @@ -294,7 +308,19 @@ def sqlmesh_model_kwargs( # precisely which variables are referenced in the model dependencies.variables |= set(context.variables) + if ( + getattr(self, "model_materialization", None) == Materialization.CUSTOM + and hasattr(self, "_get_custom_materialization") + and (custom_mat := self._get_custom_materialization(context)) + ): + # include custom materialization dependencies as they might use macros + dependencies = dependencies.union(custom_mat.dependencies) + model_dialect = self.dialect(context) + + # Only keep refs and sources that exist in the context to match dbt behavior + dependencies.refs.intersection_update(context.refs) + dependencies.sources.intersection_update(context.sources) model_context = context.context_for_dependencies( dependencies.union(self.tests_ref_source_dependencies) ) @@ -304,15 +330,28 @@ def sqlmesh_model_kwargs( jinja_macros.add_globals(self._model_jinja_context(model_context, dependencies)) model_kwargs = { - "audits": [(test.name, {}) for test in self.tests], + "audits": [(test.canonical_name, {}) for test in self.tests], "column_descriptions": column_descriptions_to_sqlmesh(self.columns) or None, "depends_on": { model.canonical_name(context) for model in model_context.refs.values() - }.union({source.canonical_name(context) for source in model_context.sources.values()}), + }.union( + { + source.canonical_name(context) + for source in model_context.sources.values() + if source.fqn not in context.model_fqns + # Allow dbt projects to reference a model as a source without causing a cycle + }, + ), "jinja_macros": jinja_macros, "path": self.path, - "pre_statements": [d.jinja_statement(hook.sql) for hook in self.pre_hook], - "post_statements": [d.jinja_statement(hook.sql) for hook in self.post_hook], + "pre_statements": [ + ParsableSql(sql=d.jinja_statement(hook.sql).sql(), transaction=hook.transaction) + for hook in self.pre_hook + ], + "post_statements": [ + ParsableSql(sql=d.jinja_statement(hook.sql).sql(), transaction=hook.transaction) + for hook in self.post_hook + ], "tags": self.tags, "physical_schema_mapping": context.sqlmesh_config.physical_schema_mapping, "default_catalog": context.target.database, @@ -349,8 +388,8 @@ def to_sqlmesh( def _model_jinja_context( self, context: DbtContext, dependencies: Dependencies ) -> t.Dict[str, t.Any]: - if context._manifest and self.node_name in context._manifest._manifest.nodes: - attributes = context._manifest._manifest.nodes[self.node_name].to_dict() + if context._manifest and self.unique_id in context._manifest._manifest.nodes: + attributes = context._manifest._manifest.nodes[self.unique_id].to_dict() if dependencies.model_attrs.all_attrs: model_node: AttributeDict[str, t.Any] = AttributeDict(attributes) else: diff --git a/sqlmesh/dbt/builtin.py b/sqlmesh/dbt/builtin.py index e284c11797..fa05e3d7f9 100644 --- a/sqlmesh/dbt/builtin.py +++ b/sqlmesh/dbt/builtin.py @@ -25,7 +25,7 @@ from sqlmesh.dbt.util import DBT_VERSION from sqlmesh.utils import AttributeDict, debug_mode_enabled, yaml from sqlmesh.utils.date import now -from sqlmesh.utils.errors import ConfigError, MacroEvalError +from sqlmesh.utils.errors import ConfigError from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroReference, MacroReturnVal logger = logging.getLogger(__name__) @@ -50,6 +50,22 @@ def warn(self, msg: str) -> str: return "" +def try_or_compiler_error( + message_if_exception: str, func: t.Callable, *args: t.Any, **kwargs: t.Any +) -> t.Any: + try: + return func(*args, **kwargs) + except Exception: + if DBT_VERSION >= (1, 4, 0): + from dbt.exceptions import CompilationError + + raise CompilationError(message_if_exception) + else: + from dbt.exceptions import CompilationException # type: ignore + + raise CompilationException(message_if_exception) + + class Api: def __init__(self, dialect: t.Optional[str]) -> None: if dialect: @@ -365,18 +381,16 @@ def do_zip(*args: t.Any, default: t.Optional[t.Any] = None) -> t.Optional[t.Any] return default -def as_bool(value: str) -> bool: - result = _try_literal_eval(value) - if isinstance(result, bool): - return result - raise MacroEvalError(f"Failed to convert '{value}' into boolean.") +def as_bool(value: t.Any) -> t.Any: + # dbt's jinja TEXT_FILTERS just return the input value as is + # https://github.com/dbt-labs/dbt-common/blob/main/dbt_common/clients/jinja.py#L559 + return value def as_number(value: str) -> t.Any: - result = _try_literal_eval(value) - if isinstance(value, (int, float)) and not isinstance(result, bool): - return result - raise MacroEvalError(f"Failed to convert '{value}' into number.") + # dbt's jinja TEXT_FILTERS just return the input value as is + # https://github.com/dbt-labs/dbt-common/blob/main/dbt_common/clients/jinja.py#L559 + return value def _try_literal_eval(value: str) -> t.Any: @@ -411,6 +425,7 @@ def debug() -> str: "sqlmesh_incremental": True, "tojson": to_json, "toyaml": to_yaml, + "try_or_compiler_error": try_or_compiler_error, "zip": do_zip, "zip_strict": lambda *args: list(zip(*args)), } @@ -465,7 +480,7 @@ def create_builtin_globals( if variables is not None: builtin_globals["var"] = Var(variables) - builtin_globals["config"] = Config(jinja_globals.pop("config", {})) + builtin_globals["config"] = Config(jinja_globals.pop("config", {"tags": []})) deployability_index = ( jinja_globals.get("deployability_index") or DeployabilityIndex.all_deployable() @@ -546,6 +561,7 @@ def create_builtin_globals( "statement": sql_execution.statement, "graph": adapter.graph, "selected_resources": list(jinja_globals.get("selected_models") or []), + "write": lambda input: None, # We don't support writing yet } ) diff --git a/sqlmesh/dbt/column.py b/sqlmesh/dbt/column.py index 327f7cd539..80a6ad9325 100644 --- a/sqlmesh/dbt/column.py +++ b/sqlmesh/dbt/column.py @@ -1,6 +1,7 @@ from __future__ import annotations import typing as t +import logging from sqlglot import exp, parse_one from sqlglot.helper import ensure_list @@ -9,6 +10,8 @@ from sqlmesh.utils.conversions import ensure_bool from sqlmesh.utils.pydantic import field_validator +logger = logging.getLogger(__name__) + def yaml_to_columns( yaml: t.Dict[str, ColumnConfig] | t.List[t.Dict[str, ColumnConfig]], @@ -31,11 +34,20 @@ def column_types_to_sqlmesh( Returns: A dict of column name to exp.DataType """ - return { - name: parse_one(column.data_type, into=exp.DataType, dialect=dialect or "") - for name, column in columns.items() - if column.enabled and column.data_type - } + col_types_to_sqlmesh: t.Dict[str, exp.DataType] = {} + for name, column in columns.items(): + if column.enabled and column.data_type: + column_def = parse_one( + f"{name} {column.data_type}", into=exp.ColumnDef, dialect=dialect or "" + ) + if column_def.args.get("constraints"): + logger.warning( + f"Ignoring unsupported constraints for column '{name}' with definition '{column.data_type}'. Please refer to github.com/SQLMesh/sqlmesh/issues/4717 for more information." + ) + kind = column_def.kind + if kind: + col_types_to_sqlmesh[name] = kind + return col_types_to_sqlmesh def column_descriptions_to_sqlmesh(columns: t.Dict[str, ColumnConfig]) -> t.Dict[str, str]: diff --git a/sqlmesh/dbt/common.py b/sqlmesh/dbt/common.py index 240d59084a..67e1a788cf 100644 --- a/sqlmesh/dbt/common.py +++ b/sqlmesh/dbt/common.py @@ -46,7 +46,9 @@ def load_yaml(source: str | Path) -> t.Dict: raise ConfigError(f"{source}: {ex}" if isinstance(source, Path) else f"{ex}") -def parse_meta(v: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: +def parse_meta(v: t.Optional[t.Dict[str, t.Any]]) -> t.Dict[str, t.Any]: + if v is None: + return {} for key, value in v.items(): if isinstance(value, str): v[key] = try_str_to_bool(value) @@ -115,7 +117,7 @@ def _validate_list(cls, v: t.Union[str, t.List[str]]) -> t.List[str]: @field_validator("meta", mode="before") @classmethod - def _validate_meta(cls, v: t.Dict[str, t.Union[str, t.Any]]) -> t.Dict[str, t.Any]: + def _validate_meta(cls, v: t.Optional[t.Dict[str, t.Union[str, t.Any]]]) -> t.Dict[str, t.Any]: return parse_meta(v) _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { diff --git a/sqlmesh/dbt/context.py b/sqlmesh/dbt/context.py index a56a6ca4d6..29eb03700d 100644 --- a/sqlmesh/dbt/context.py +++ b/sqlmesh/dbt/context.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import typing as t from dataclasses import dataclass, field, replace from pathlib import Path @@ -28,12 +29,16 @@ from sqlmesh.dbt.seed import SeedConfig from sqlmesh.dbt.source import SourceConfig +logger = logging.getLogger(__name__) + @dataclass class DbtContext: """Context for DBT environment""" project_root: Path = Path() + profiles_dir: t.Optional[Path] = None + """Optional override to specify the directory where profiles.yml is located, if not at the :project_root""" target_name: t.Optional[str] = None profile_name: t.Optional[str] = None project_schema: t.Optional[str] = None @@ -48,6 +53,7 @@ class DbtContext: _project_name: t.Optional[str] = None _variables: t.Dict[str, t.Any] = field(default_factory=dict) _models: t.Dict[str, ModelConfig] = field(default_factory=dict) + _model_fqns: t.Set[str] = field(default_factory=set) _seeds: t.Dict[str, SeedConfig] = field(default_factory=dict) _sources: t.Dict[str, SourceConfig] = field(default_factory=dict) _refs: t.Dict[str, t.Union[ModelConfig, SeedConfig]] = field(default_factory=dict) @@ -125,7 +131,7 @@ def _var(name: str, default: t.Optional[t.Any] = None) -> t.Any: try: rendered_variables[k] = _render_var(v) except Exception as ex: - raise ConfigError(f"Failed to render variable '{k}', value '{v}': {ex}") from ex + logger.warning(f"Failed to render variable '{k}', value '{v}': {ex}") self.variables = rendered_variables @@ -141,6 +147,7 @@ def models(self) -> t.Dict[str, ModelConfig]: def models(self, models: t.Dict[str, ModelConfig]) -> None: self._models = {} self._refs = {} + self._model_fqns = set() self.add_models(models) def add_models(self, models: t.Dict[str, ModelConfig]) -> None: @@ -148,6 +155,12 @@ def add_models(self, models: t.Dict[str, ModelConfig]) -> None: self._models.update(models) self._jinja_environment = None + @property + def model_fqns(self) -> t.Set[str]: + if not self._model_fqns: + self._model_fqns = {model.fqn for model in self._models.values()} + return self._model_fqns + @property def seeds(self) -> t.Dict[str, SeedConfig]: return self._seeds diff --git a/sqlmesh/dbt/loader.py b/sqlmesh/dbt/loader.py index 695aff3c45..fb3ecb2c77 100644 --- a/sqlmesh/dbt/loader.py +++ b/sqlmesh/dbt/loader.py @@ -5,11 +5,13 @@ import typing as t import sqlmesh.core.dialect as d from pathlib import Path +from collections import defaultdict from sqlmesh.core.config import ( Config, ConnectionConfig, GatewayConfig, ModelDefaultsConfig, + DbtConfig as RootDbtConfig, ) from sqlmesh.core.environment import EnvironmentStatements from sqlmesh.core.loader import CacheBase, LoadedProject, Loader @@ -48,11 +50,21 @@ def sqlmesh_config( dbt_profile_name: t.Optional[str] = None, dbt_target_name: t.Optional[str] = None, variables: t.Optional[t.Dict[str, t.Any]] = None, + threads: t.Optional[int] = None, register_comments: t.Optional[bool] = None, + infer_state_schema_name: bool = False, + profiles_dir: t.Optional[Path] = None, **kwargs: t.Any, ) -> Config: project_root = project_root or Path() - context = DbtContext(project_root=project_root, profile_name=dbt_profile_name) + context = DbtContext( + project_root=project_root, profiles_dir=profiles_dir, profile_name=dbt_profile_name + ) + + # note: Profile.load() is called twice with different DbtContext's: + # - once here with the above DbtContext (to determine connnection / gateway config which has to be set up before everything else) + # - again on the SQLMesh side via GenericContext.load() -> DbtLoader._load_projects() -> Project.load() which constructs a fresh DbtContext and ignores the above one + # it's important to ensure that the DbtContext created within the DbtLoader uses the same project root / profiles dir that we use here profile = Profile.load(context, target_name=dbt_target_name) model_defaults = kwargs.pop("model_defaults", ModelDefaultsConfig()) if model_defaults.dialect is None: @@ -66,16 +78,45 @@ def sqlmesh_config( if not issubclass(loader, DbtLoader): raise ConfigError("The loader must be a DbtLoader.") + if threads is not None: + # the to_sqlmesh() function on TargetConfig maps self.threads -> concurrent_tasks + profile.target.threads = threads + + gateway_kwargs = {} + if infer_state_schema_name: + profile_name = context.profile_name + + # Note: we deliberately isolate state based on the target *schema* and not the target name. + # It is assumed that the project will define a target, eg 'dev', and then in each users own ~/.dbt/profiles.yml the schema + # for the 'dev' target is overriden to something user-specific, rather than making the target name itself user-specific. + # This means that the schema name is the indicator of isolated state, not the target name which may be re-used across multiple schemas. + target_schema = profile.target.schema_ + + # dbt-core doesnt allow schema to be undefined, but it does allow an empty string, and then just + # fails at runtime when `CREATE SCHEMA ""` doesnt work + if not target_schema: + raise ConfigError( + f"Target '{profile.target_name}' does not specify a schema.\n" + "A schema is required in order to infer where to store SQLMesh state" + ) + + inferred_state_schema_name = f"sqlmesh_state_{profile_name}_{target_schema}" + logger.info("Inferring state schema: %s", inferred_state_schema_name) + gateway_kwargs["state_schema"] = inferred_state_schema_name + return Config( loader=loader, + loader_kwargs=dict(profiles_dir=profiles_dir), model_defaults=model_defaults, variables=variables or {}, + dbt=RootDbtConfig(infer_state_schema_name=infer_state_schema_name), **{ "default_gateway": profile.target_name if "gateways" not in kwargs else "", "gateways": { profile.target_name: GatewayConfig( connection=profile.target.to_sqlmesh(**target_to_sqlmesh_args), state_connection=state_connection, + **gateway_kwargs, ) }, # type: ignore **kwargs, @@ -84,9 +125,12 @@ def sqlmesh_config( class DbtLoader(Loader): - def __init__(self, context: GenericContext, path: Path) -> None: + def __init__( + self, context: GenericContext, path: Path, profiles_dir: t.Optional[Path] = None + ) -> None: self._projects: t.List[Project] = [] self._macros_max_mtime: t.Optional[float] = None + self._profiles_dir = profiles_dir super().__init__(context, path) def load(self) -> LoadedProject: @@ -137,16 +181,22 @@ def _to_sqlmesh(config: BMC, context: DbtContext) -> Model: package_context.set_and_render_variables(package.variables, package.name) package_models: t.Dict[str, BaseModelConfig] = {**package.models, **package.seeds} + package_models_by_path: t.Dict[Path, t.List[BaseModelConfig]] = defaultdict(list) for model in package_models.values(): if isinstance(model, ModelConfig) and not model.sql.strip(): logger.info(f"Skipping empty model '{model.name}' at path '{model.path}'.") continue + package_models_by_path[model.path].append(model) - sqlmesh_model = cache.get_or_load_models( - model.path, loader=lambda: [_to_sqlmesh(model, package_context)] - )[0] - - models[sqlmesh_model.fqn] = sqlmesh_model + for path, path_models in package_models_by_path.items(): + sqlmesh_models = cache.get_or_load_models( + path, + loader=lambda: [ + _to_sqlmesh(model, package_context) for model in path_models + ], + ) + for sqlmesh_model in sqlmesh_models: + models[sqlmesh_model.fqn] = sqlmesh_model models.update(self._load_external_models(audits, cache)) @@ -165,7 +215,8 @@ def _load_audits( for test in package.tests.values(): logger.debug("Converting '%s' to sqlmesh format", test.name) try: - audits[test.name] = test.to_sqlmesh(package_context) + audits[test.canonical_name] = test.to_sqlmesh(package_context) + except BaseMissingReferenceError as e: ref_type = "model" if isinstance(e, MissingModelError) else "source" logger.warning( @@ -186,6 +237,7 @@ def _load_projects(self) -> t.List[Project]: project = Project.load( DbtContext( project_root=self.config_path, + profiles_dir=self._profiles_dir, target_name=target_name, sqlmesh_config=self.config, ), diff --git a/sqlmesh/dbt/manifest.py b/sqlmesh/dbt/manifest.py index 15377e59dc..fce561a24d 100644 --- a/sqlmesh/dbt/manifest.py +++ b/sqlmesh/dbt/manifest.py @@ -11,7 +11,7 @@ from functools import cached_property from pathlib import Path -from dbt import constants as dbt_constants, flags +from dbt import flags from sqlmesh.dbt.util import DBT_VERSION from sqlmesh.utils.conversions import make_serializable @@ -19,6 +19,8 @@ # Override the file name to prevent dbt commands from invalidating the cache. if DBT_VERSION >= (1, 6, 0): + from dbt import constants as dbt_constants + dbt_constants.PARTIAL_PARSE_FILE_NAME = "sqlmesh_partial_parse.msgpack" # type: ignore else: from dbt.parser import manifest as dbt_manifest # type: ignore @@ -47,7 +49,7 @@ from sqlmesh.dbt.builtin import BUILTIN_FILTERS, BUILTIN_GLOBALS, OVERRIDDEN_MACROS from sqlmesh.dbt.common import Dependencies from sqlmesh.dbt.model import ModelConfig -from sqlmesh.dbt.package import HookConfig, MacroConfig +from sqlmesh.dbt.package import HookConfig, MacroConfig, MaterializationConfig from sqlmesh.dbt.seed import SeedConfig from sqlmesh.dbt.source import SourceConfig from sqlmesh.dbt.target import TargetConfig @@ -61,6 +63,7 @@ extract_call_names, jinja_call_arg_name, ) +from sqlglot.helper import ensure_list if t.TYPE_CHECKING: from dbt.contracts.graph.manifest import Macro, Manifest @@ -75,6 +78,7 @@ SourceConfigs = t.Dict[str, SourceConfig] MacroConfigs = t.Dict[str, MacroConfig] HookConfigs = t.Dict[str, HookConfig] +MaterializationConfigs = t.Dict[str, MaterializationConfig] IGNORED_PACKAGES = {"elementary"} @@ -135,6 +139,7 @@ def __init__( self._on_run_start_per_package: t.Dict[str, HookConfigs] = defaultdict(dict) self._on_run_end_per_package: t.Dict[str, HookConfigs] = defaultdict(dict) + self._materializations: MaterializationConfigs = {} def tests(self, package_name: t.Optional[str] = None) -> TestConfigs: self._load_all() @@ -164,6 +169,10 @@ def on_run_end(self, package_name: t.Optional[str] = None) -> HookConfigs: self._load_all() return self._on_run_end_per_package[package_name or self._project_name] + def materializations(self) -> MaterializationConfigs: + self._load_all() + return self._materializations + @property def all_macros(self) -> t.Dict[str, t.Dict[str, MacroInfo]]: self._load_all() @@ -213,6 +222,7 @@ def _load_all(self) -> None: self._calls = {k: (v, False) for k, v in (self._call_cache.get("") or {}).items()} self._load_macros() + self._load_materializations() self._load_sources() self._load_tests() self._load_models_and_seeds() @@ -250,11 +260,14 @@ def _load_sources(self) -> None: def _load_macros(self) -> None: for macro in self._manifest.macros.values(): + if macro.name.startswith("materialization_"): + continue + if macro.name.startswith("test_"): macro.macro_sql = _convert_jinja_test_to_macro(macro.macro_sql) dependencies = Dependencies(macros=_macro_references(self._manifest, macro)) - if not macro.name.startswith("materialization_") and not macro.name.startswith("test_"): + if not macro.name.startswith("test_"): dependencies = dependencies.union( self._extra_dependencies(macro.macro_sql, macro.package_name) ) @@ -281,6 +294,32 @@ def _load_macros(self) -> None: if pos > 0 and name[pos + 2 :] in adapter_macro_names: macro_config.info.is_top_level = True + def _load_materializations(self) -> None: + for macro in self._manifest.macros.values(): + if macro.name.startswith("materialization_"): + # Extract name and adapter ( "materialization_{name}_{adapter}" or "materialization_{name}_default") + name_parts = macro.name.split("_") + if len(name_parts) >= 3: + mat_name = "_".join(name_parts[1:-1]) + adapter = name_parts[-1] + + dependencies = Dependencies(macros=_macro_references(self._manifest, macro)) + macro.macro_sql = _strip_jinja_materialization_tags(macro.macro_sql) + dependencies = dependencies.union( + self._extra_dependencies(macro.macro_sql, macro.package_name) + ) + + materialization_config = MaterializationConfig( + name=mat_name, + adapter=adapter, + definition=macro.macro_sql, + dependencies=dependencies, + path=Path(macro.original_file_path), + ) + + key = f"{mat_name}_{adapter}" + self._materializations[key] = materialization_config + def _load_tests(self) -> None: for node in self._manifest.nodes.values(): if node.resource_type != "test": @@ -317,15 +356,17 @@ def _load_tests(self) -> None: ) test_model = _test_model(node) + node_config = _node_base_config(node) + node_config["name"] = _build_test_name(node, dependencies) test = TestConfig( sql=sql, model_name=test_model, test_kwargs=node.test_metadata.kwargs if hasattr(node, "test_metadata") else {}, dependencies=dependencies, - **_node_base_config(node), + **node_config, ) - self._tests_per_package[node.package_name][node.name.lower()] = test + self._tests_per_package[node.package_name][node.unique_id] = test if test_model: self._tests_by_owner[test_model].append(test) @@ -359,6 +400,12 @@ def _load_models_and_seeds(self) -> None: dependencies = dependencies.union( self._extra_dependencies(sql, node.package_name, track_all_model_attrs=True) ) + for hook in [*node_config.get("pre-hook", []), *node_config.get("post-hook", [])]: + dependencies = dependencies.union( + self._extra_dependencies( + hook["sql"], node.package_name, track_all_model_attrs=True + ) + ) dependencies = dependencies.union( self._flatten_dependencies_from_macros(dependencies.macros, node.package_name) ) @@ -661,7 +708,7 @@ def _macro_references( return result for macro_node_id in node.depends_on.macros: - if not macro_node_id: + if not macro_node_id or macro_node_id == "None": continue macro_node = manifest.macros[macro_node_id] @@ -699,7 +746,12 @@ def _test_model(node: ManifestNode) -> t.Optional[str]: attached_node = getattr(node, "attached_node", None) if attached_node: pieces = attached_node.split(".") - return pieces[-1] if pieces[0] in ["model", "seed"] else None + if pieces[0] in ["model", "seed"]: + # versioned models have format "model.package.model_name.v1" (4 parts) + if len(pieces) == 4: + return f"{pieces[2]}_{pieces[3]}" + return pieces[-1] + return None key_name = getattr(node, "file_key_name", None) if key_name: @@ -732,3 +784,77 @@ def _convert_jinja_test_to_macro(test_jinja: str) -> str: macro = macro_tag + test_jinja[match.span()[-1] :] return re.sub(ENDTEST_REGEX, lambda m: m.group(0).replace("endtest", "endmacro"), macro) + + +def _strip_jinja_materialization_tags(materialization_jinja: str) -> str: + MATERIALIZATION_TAG_REGEX = r"\s*{%-?\s*materialization\s+[^%]*%}\s*\n?" + ENDMATERIALIZATION_REGEX = r"{%-?\s*endmaterialization\s*-?%}\s*\n?" + + if not re.match(MATERIALIZATION_TAG_REGEX, materialization_jinja): + return materialization_jinja + + materialization_jinja = re.sub( + MATERIALIZATION_TAG_REGEX, + "", + materialization_jinja, + flags=re.IGNORECASE, + ) + + materialization_jinja = re.sub( + ENDMATERIALIZATION_REGEX, + "", + materialization_jinja, + flags=re.IGNORECASE, + ) + + return materialization_jinja.strip() + + +def _build_test_name(node: ManifestNode, dependencies: Dependencies) -> str: + """ + Build a user-friendly test name that includes the test's model/source, column, + and args for tests with custom user names. Needed because dbt only generates these + names for tests that do not specify the "name" field in their YAML definition. + + Name structure + - Model test: [namespace]_[test name]_[model name]_[column name]__[arg values] + - Source test: [namespace]_source_[test name]_[source name]_[table name]_[column name]__[arg values] + """ + # standalone test + if not hasattr(node, "test_metadata"): + return node.name + + model_name = _test_model(node) + source_name = None + if not model_name and dependencies.sources: + # extract source and table names + source_parts = list(dependencies.sources)[0].split(".") + source_name = "_".join(source_parts) if len(source_parts) == 2 else source_parts[-1] + entity_name = model_name or source_name or "" + entity_name = f"_{entity_name}" if entity_name else "" + + name_prefix = "" + if namespace := getattr(node.test_metadata, "namespace", None): + name_prefix += f"{namespace}_" + if source_name and not model_name: + name_prefix += "source_" + + metadata_kwargs = node.test_metadata.kwargs + arg_val_parts = [] + for arg, val in sorted(metadata_kwargs.items()): + if arg == "model": + continue + if isinstance(val, dict): + val = list(val.values()) + val = [re.sub("[^0-9a-zA-Z_]+", "_", str(v)) for v in ensure_list(val)] + arg_val_parts.extend(val) + unique_args = "__".join(arg_val_parts) if arg_val_parts else "" + unique_args = f"_{unique_args}" if unique_args else "" + + auto_name = f"{name_prefix}{node.test_metadata.name}{entity_name}{unique_args}" + + if node.name == auto_name: + return node.name + + custom_prefix = name_prefix if source_name and not model_name else "" + return f"{custom_prefix}{node.name}{entity_name}{unique_args}" diff --git a/sqlmesh/dbt/model.py b/sqlmesh/dbt/model.py index 124d900c4b..55994abf85 100644 --- a/sqlmesh/dbt/model.py +++ b/sqlmesh/dbt/model.py @@ -31,6 +31,7 @@ OnAdditiveChange, on_destructive_change_validator, on_additive_change_validator, + DbtCustomKind, ) from sqlmesh.dbt.basemodel import BaseModelConfig, Materialization, SnapshotStrategy from sqlmesh.dbt.common import SqlStr, sql_str_validator @@ -40,6 +41,7 @@ if t.TYPE_CHECKING: from sqlmesh.core.audit.definition import ModelAudit from sqlmesh.dbt.context import DbtContext + from sqlmesh.dbt.package import MaterializationConfig logger = logging.getLogger(__name__) @@ -170,6 +172,22 @@ def _validate_check_cols(cls, v: t.Union[str, t.List[str]]) -> t.Union[str, t.Li return "*" return ensure_list(v) + @field_validator("updated_at", mode="before") + @classmethod + def _validate_updated_at(cls, v: t.Optional[str]) -> t.Optional[str]: + """ + Extract column name if updated_at contains a cast. + + SCDType2ByTimeKind and SCDType2ByColumnKind expect a column, and the casting is done later. + """ + if v is None: + return None + parsed = d.parse_one(v) + if isinstance(parsed, exp.Cast) and isinstance(parsed.this, exp.Column): + return parsed.this.name + + return v + @field_validator("sql", mode="before") @classmethod def _validate_sql(cls, v: t.Union[str, SqlStr]) -> SqlStr: @@ -197,6 +215,14 @@ def _validate_partition_by( ): granularity = v["granularity"] raise ConfigError(f"Unexpected granularity '{granularity}' in partition_by '{v}'.") + if "data_type" in v and v["data_type"].lower() not in ( + "timestamp", + "date", + "datetime", + "int64", + ): + data_type = v["data_type"] + raise ConfigError(f"Unexpected data_type '{data_type}' in partition_by '{v}'.") return {"data_type": "date", "granularity": "day", **v} raise ConfigError(f"Invalid format for partition_by '{v}'") @@ -444,9 +470,22 @@ def model_kind(self, context: DbtContext) -> ModelKind: if materialization == Materialization.DYNAMIC_TABLE: return ManagedKind() + if materialization == Materialization.CUSTOM: + if custom_materialization := self._get_custom_materialization(context): + return DbtCustomKind( + materialization=self.materialized, + adapter=custom_materialization.adapter, + dialect=self.dialect(context), + definition=custom_materialization.definition, + ) + + raise ConfigError( + f"Unknown materialization '{self.materialized}'. Custom materializations must be defined in your dbt project." + ) + raise ConfigError(f"{materialization.value} materialization not supported.") - def _big_query_partition_by_expr(self, context: DbtContext) -> exp.Expression: + def _big_query_partition_by_expr(self, context: DbtContext) -> exp.Expr: assert isinstance(self.partition_by, dict) data_type = self.partition_by["data_type"].lower() raw_field = self.partition_by["field"] @@ -483,6 +522,18 @@ def _big_query_partition_by_expr(self, context: DbtContext) -> exp.Expression: dialect="bigquery", ) + def _get_custom_materialization(self, context: DbtContext) -> t.Optional[MaterializationConfig]: + materializations = context.manifest.materializations() + name, target_adapter = self.materialized, context.target.dialect + + adapter_specific_key = f"{name}_{target_adapter}" + default_key = f"{name}_default" + if adapter_specific_key in materializations: + return materializations[adapter_specific_key] + if default_key in materializations: + return materializations[default_key] + return None + @property def sqlmesh_config_fields(self) -> t.Set[str]: return super().sqlmesh_config_fields | { @@ -510,10 +561,17 @@ def to_sqlmesh( physical_properties: t.Dict[str, t.Any] = {} if self.partition_by: - if isinstance(kind, ViewKind): + if isinstance(kind, (ViewKind, EmbeddedKind)): + logger.warning( + "Ignoring partition_by config for model '%s'; partition_by is not supported for %s.", + self.name, + "views" if isinstance(kind, ViewKind) else "ephemeral models", + ) + elif context.target.dialect == "snowflake": logger.warning( - "Ignoring partition_by config for model '%s'; partition_by is not supported for views.", + "Ignoring partition_by config for model '%s' targeting %s. The partition_by config is not supported for Snowflake.", self.name, + context.target.dialect, ) else: partitioned_by = [] @@ -539,16 +597,23 @@ def to_sqlmesh( optional_kwargs["partitioned_by"] = partitioned_by if self.cluster_by: - if isinstance(kind, ViewKind): + if isinstance(kind, (ViewKind, EmbeddedKind)): logger.warning( - "Ignoring cluster_by config for model '%s'; cluster_by is not supported for views.", + "Ignoring cluster_by config for model '%s'; cluster_by is not supported for %s.", self.name, + "views" if isinstance(kind, ViewKind) else "ephemeral models", ) else: clustered_by = [] for c in self.cluster_by: try: - clustered_by.append(d.parse_one(c, dialect=model_dialect)) + cluster_expr = exp.maybe_parse( + c, into=exp.Cluster, prefix="CLUSTER BY", dialect=model_dialect + ) + for expr in cluster_expr.expressions: + clustered_by.append( + expr.this if isinstance(expr, exp.Ordered) else expr + ) except SqlglotError as e: raise ConfigError( f"Failed to parse model '{self.canonical_name(context)}' cluster_by field '{c}' in '{self.path}': {e}" @@ -650,11 +715,20 @@ def to_sqlmesh( if physical_properties: model_kwargs["physical_properties"] = physical_properties + kind = self.model_kind(context) + + # A falsy grants config (None or {}) is considered as unmanaged per dbt semantics + if self.grants and kind.supports_grants: + model_kwargs["grants"] = self.grants + allow_partials = model_kwargs.pop("allow_partials", None) if allow_partials is None: # Set allow_partials to True for dbt models to preserve the original semantics. allow_partials = True + # pop begin for all models so we don't pass it through for non-incremental materializations + # (happens if model config is microbatch but project config overrides) + begin = model_kwargs.pop("begin", None) if kind.is_incremental: if self.batch_size and isinstance(self.batch_size, str): if "interval_unit" in model_kwargs: @@ -664,7 +738,7 @@ def to_sqlmesh( else: model_kwargs["interval_unit"] = self.batch_size self.batch_size = None - if begin := model_kwargs.pop("begin", None): + if begin: if "start" in model_kwargs: get_console().log_warning( f"Both 'begin' and 'start' are set for model '{self.canonical_name(context)}'. 'start' will be used." @@ -693,7 +767,7 @@ def to_sqlmesh( extract_dependencies_from_query=False, allow_partials=allow_partials, virtual_environment_mode=virtual_environment_mode, - dbt_name=self.node_name, + dbt_node_info=self.node_info, **optional_kwargs, **model_kwargs, ) diff --git a/sqlmesh/dbt/package.py b/sqlmesh/dbt/package.py index 420cf3cb73..dbaa832c22 100644 --- a/sqlmesh/dbt/package.py +++ b/sqlmesh/dbt/package.py @@ -37,6 +37,16 @@ class HookConfig(PydanticModel): dependencies: Dependencies +class MaterializationConfig(PydanticModel): + """Class to contain custom materialization configuration.""" + + name: str + adapter: str + definition: str + dependencies: Dependencies + path: Path + + class Package(PydanticModel): """Class to contain package configuration""" @@ -47,6 +57,7 @@ class Package(PydanticModel): models: t.Dict[str, ModelConfig] variables: t.Dict[str, t.Any] macros: t.Dict[str, MacroConfig] + materializations: t.Dict[str, MaterializationConfig] on_run_start: t.Dict[str, HookConfig] on_run_end: t.Dict[str, HookConfig] files: t.Set[Path] @@ -94,6 +105,7 @@ def load(self, package_root: Path) -> Package: models = _fix_paths(self._context.manifest.models(package_name), package_root) seeds = _fix_paths(self._context.manifest.seeds(package_name), package_root) macros = _fix_paths(self._context.manifest.macros(package_name), package_root) + materializations = _fix_paths(self._context.manifest.materializations(), package_root) on_run_start = _fix_paths(self._context.manifest.on_run_start(package_name), package_root) on_run_end = _fix_paths(self._context.manifest.on_run_end(package_name), package_root) sources = self._context.manifest.sources(package_name) @@ -114,13 +126,16 @@ def load(self, package_root: Path) -> Package: seeds=seeds, variables=package_variables, macros=macros, + materializations=materializations, files=config_paths, on_run_start=on_run_start, on_run_end=on_run_end, ) -T = t.TypeVar("T", TestConfig, ModelConfig, MacroConfig, SeedConfig, HookConfig) +T = t.TypeVar( + "T", TestConfig, ModelConfig, MacroConfig, MaterializationConfig, SeedConfig, HookConfig +) def _fix_paths(configs: t.Dict[str, T], package_root: Path) -> t.Dict[str, T]: diff --git a/sqlmesh/dbt/profile.py b/sqlmesh/dbt/profile.py index ea0384c786..a95c81501c 100644 --- a/sqlmesh/dbt/profile.py +++ b/sqlmesh/dbt/profile.py @@ -60,7 +60,7 @@ def load(cls, context: DbtContext, target_name: t.Optional[str] = None) -> Profi if not context.profile_name: raise ConfigError(f"{project_file.stem} must include project name.") - profile_filepath = cls._find_profile(context.project_root) + profile_filepath = cls._find_profile(context.project_root, context.profiles_dir) if not profile_filepath: raise ConfigError(f"{cls.PROFILE_FILE} not found.") @@ -68,8 +68,8 @@ def load(cls, context: DbtContext, target_name: t.Optional[str] = None) -> Profi return Profile(profile_filepath, target_name, target) @classmethod - def _find_profile(cls, project_root: Path) -> t.Optional[Path]: - dir = os.environ.get("DBT_PROFILES_DIR", "") + def _find_profile(cls, project_root: Path, profiles_dir: t.Optional[Path]) -> t.Optional[Path]: + dir = os.environ.get("DBT_PROFILES_DIR", profiles_dir or "") path = Path(project_root, dir, cls.PROFILE_FILE) if path.exists(): return path diff --git a/sqlmesh/dbt/project.py b/sqlmesh/dbt/project.py index 4af30958f5..2b0a2e0c3f 100644 --- a/sqlmesh/dbt/project.py +++ b/sqlmesh/dbt/project.py @@ -99,16 +99,21 @@ def load(cls, context: DbtContext, variables: t.Optional[t.Dict[str, t.Any]] = N package = package_loader.load(path.parent) packages[package.name] = package + # Variable resolution precedence: + # 1. Variable overrides + # 2. Package-scoped variables in the root project's dbt_project.yml + # 3. Global project variables in the root project's dbt_project.yml + # 4. Variables in the package's dbt_project.yml all_project_variables = {**(project_yaml.get("vars") or {}), **(variable_overrides or {})} for name, package in packages.items(): - package_vars = all_project_variables.get(name) - - if isinstance(package_vars, dict): - package.variables.update(package_vars) - - if name == context.project_name: - package.variables.update(all_project_variables) + if isinstance(all_project_variables.get(name), dict): + project_vars_copy = all_project_variables.copy() + package_scoped_vars = project_vars_copy.pop(name) + package.variables.update(project_vars_copy) + package.variables.update(package_scoped_vars) else: + package.variables.update(all_project_variables) + if variable_overrides: package.variables.update(variable_overrides) return Project(context, profile, packages) diff --git a/sqlmesh/dbt/seed.py b/sqlmesh/dbt/seed.py index d6ecc768f9..c0c8186f29 100644 --- a/sqlmesh/dbt/seed.py +++ b/sqlmesh/dbt/seed.py @@ -92,7 +92,7 @@ def to_sqlmesh( audit_definitions=audit_definitions, virtual_environment_mode=virtual_environment_mode, start=self.start or context.sqlmesh_config.model_defaults.start, - dbt_name=self.node_name, + dbt_node_info=self.node_info, **kwargs, ) diff --git a/sqlmesh/dbt/source.py b/sqlmesh/dbt/source.py index 76ee682e77..832ed0e156 100644 --- a/sqlmesh/dbt/source.py +++ b/sqlmesh/dbt/source.py @@ -36,6 +36,7 @@ class SourceConfig(GeneralConfig): # DBT configuration fields name: str = "" source_name_: str = Field("", alias="source_name") + fqn_: t.List[str] = Field(default_factory=list, alias="fqn") database: t.Optional[str] = None schema_: t.Optional[str] = Field(None, alias="schema") identifier: t.Optional[str] = None @@ -64,6 +65,10 @@ def table_name(self) -> t.Optional[str]: def config_name(self) -> str: return f"{self.source_name_}.{self.name}" + @property + def fqn(self) -> str: + return ".".join(self.fqn_) + def canonical_name(self, context: DbtContext) -> str: if self._canonical_name is None: source = context.get_callable_macro("source") @@ -74,7 +79,7 @@ def canonical_name(self, context: DbtContext) -> str: relation = source(self.source_name_, self.name) except Exception as e: raise ConfigError( - f"'source' macro failed for '{self.config_name}' with exeception '{e}'." + f"'source' macro failed for '{self.config_name}' with exception '{e}'." ) relation = relation.quote( diff --git a/sqlmesh/dbt/target.py b/sqlmesh/dbt/target.py index f5fd119027..62683ecfac 100644 --- a/sqlmesh/dbt/target.py +++ b/sqlmesh/dbt/target.py @@ -45,11 +45,24 @@ # We only serialize a subset of fields in order to avoid persisting sensitive information SERIALIZABLE_FIELDS = { - "type", + # core "name", - "database", "schema_", + "type", + "threads", + # snowflake + "database", "warehouse", + "user", + "role", + "account", + # postgres/redshift + "dbname", + "host", + "port", + # bigquery + "project", + "dataset", } SCHEMA_DIFFER_OVERRIDES = { @@ -588,12 +601,17 @@ def validate_fields(cls, data: t.Any) -> t.Any: if not isinstance(data, dict): return data - data["schema"] = data.get("schema") or data.get("dataset") - if not data["schema"]: + # dbt treats schema and dataset interchangeably + schema = data.get("schema") or data.get("dataset") + if not schema: raise ConfigError("Either schema or dataset must be set") - data["database"] = data.get("database") or data.get("project") - if not data["database"]: + data["dataset"] = data["schema"] = schema + + # dbt treats database and project interchangeably + database = data.get("database") or data.get("project") + if not database: raise ConfigError("Either database or project must be set") + data["database"] = data["project"] = database return data diff --git a/sqlmesh/dbt/test.py b/sqlmesh/dbt/test.py index 5c18ff4d81..c4a32b2189 100644 --- a/sqlmesh/dbt/test.py +++ b/sqlmesh/dbt/test.py @@ -8,6 +8,7 @@ from pydantic import Field import sqlmesh.core.dialect as d from sqlmesh.core.audit import Audit, ModelAudit, StandaloneAudit +from sqlmesh.core.node import DbtNodeInfo from sqlmesh.dbt.common import ( Dependencies, GeneralConfig, @@ -79,8 +80,10 @@ class TestConfig(GeneralConfig): dialect_: t.Optional[str] = Field(None, alias="dialect") # dbt fields + unique_id: str = "" package_name: str = "" alias: t.Optional[str] = None + fqn: t.List[str] = [] schema_: t.Optional[str] = Field("", alias="schema") database: t.Optional[str] = None severity: Severity = Severity.ERROR @@ -106,6 +109,10 @@ def _validate_severity(cls, v: t.Union[Severity, str]) -> Severity: def _lowercase_name(cls, v: str) -> str: return v.lower() + @property + def canonical_name(self) -> str: + return f"{self.package_name}.{self.name}".lower() if self.package_name else self.name + @property def is_standalone(self) -> bool: # A test is standalone if: @@ -115,7 +122,14 @@ def is_standalone(self) -> bool: return True # Check if test has references to other models - other_refs = {ref for ref in self.dependencies.refs if ref != self.model_name} + # For versioned models, refs include version (e.g., "model_name_v1") but model_name may not + self_refs = {self.model_name} + for ref in self.dependencies.refs: + # versioned models end in _vX + if ref.startswith(f"{self.model_name}_v"): + self_refs.add(ref) + + other_refs = {ref for ref in self.dependencies.refs if ref not in self_refs} return bool(other_refs) @property @@ -155,6 +169,7 @@ def to_sqlmesh(self, context: DbtContext) -> Audit: jinja_macros.add_globals({"this": self.relation_info}) audit = StandaloneAudit( name=self.name, + dbt_node_info=self.node_info, dialect=self.dialect(context), skip=skip, query=query, @@ -171,6 +186,7 @@ def to_sqlmesh(self, context: DbtContext) -> Audit: else: audit = ModelAudit( name=self.name, + dbt_node_info=self.node_info, dialect=self.dialect(context), skip=skip, blocking=blocking, @@ -214,6 +230,12 @@ def relation_info(self) -> AttributeDict: } ) + @property + def node_info(self) -> DbtNodeInfo: + return DbtNodeInfo( + unique_id=self.unique_id, name=self.name, fqn=".".join(self.fqn), alias=self.alias + ) + def _remove_jinja_braces(jinja_str: str) -> str: no_braces = jinja_str diff --git a/sqlmesh/integrations/github/cicd/command.py b/sqlmesh/integrations/github/cicd/command.py index f1b611150a..5506d4917b 100644 --- a/sqlmesh/integrations/github/cicd/command.py +++ b/sqlmesh/integrations/github/cicd/command.py @@ -25,12 +25,21 @@ envvar="GITHUB_TOKEN", help="The Github Token to be used. Pass in `${{ secrets.GITHUB_TOKEN }}` if you want to use the one created by Github actions", ) +@click.option( + "--full-logs", + is_flag=True, + help="Whether to print all logs in the Github Actions output or only in their relevant GA check", +) @click.pass_context -def github(ctx: click.Context, token: str) -> None: +def github(ctx: click.Context, token: str, full_logs: bool = False) -> None: """Github Action CI/CD Bot. See https://sqlmesh.readthedocs.io/en/stable/integrations/github/ for details""" # set a larger width because if none is specified, it auto-detects 80 characters when running in GitHub Actions # which can result in surprise newlines when outputting dates to backfill - set_console(MarkdownConsole(width=1000, warning_capture_only=True, error_capture_only=True)) + set_console( + MarkdownConsole( + width=1000, warning_capture_only=not full_logs, error_capture_only=not full_logs + ) + ) ctx.obj["github"] = GithubController( paths=ctx.obj["paths"], token=token, diff --git a/sqlmesh/integrations/github/cicd/config.py b/sqlmesh/integrations/github/cicd/config.py index a287bf1af5..7fb3a0f5b6 100644 --- a/sqlmesh/integrations/github/cicd/config.py +++ b/sqlmesh/integrations/github/cicd/config.py @@ -36,6 +36,7 @@ class GithubCICDBotConfig(BaseConfig): forward_only_branch_suffix_: t.Optional[str] = Field( default=None, alias="forward_only_branch_suffix" ) + check_if_blocked_on_deploy_to_prod: bool = True @model_validator(mode="before") @classmethod diff --git a/sqlmesh/integrations/github/cicd/controller.py b/sqlmesh/integrations/github/cicd/controller.py index dd5ee70e76..40102b97e8 100644 --- a/sqlmesh/integrations/github/cicd/controller.py +++ b/sqlmesh/integrations/github/cicd/controller.py @@ -448,10 +448,9 @@ def prod_plan_with_gaps(self) -> Plan: c.PROD, # this is required to highlight any data gaps between this PR environment and prod (since PR environments may only contain a subset of data) no_gaps=False, - # this works because the snapshots were already categorized when applying self.pr_plan so there are no uncategorized local snapshots to trigger a plan error - no_auto_categorization=True, skip_tests=True, skip_linter=True, + categorizer_config=self.bot_config.auto_categorize_changes, run=self.bot_config.run_on_deploy_to_prod, forward_only=self.forward_only_plan, ) @@ -773,6 +772,11 @@ def deploy_to_prod(self) -> None: "PR is already merged and this event was triggered prior to the merge." ) merge_status = self._get_merge_state_status() + if self.bot_config.check_if_blocked_on_deploy_to_prod and merge_status.is_blocked: + raise CICDBotError( + "Branch protection or ruleset requirement is likely not satisfied, e.g. missing CODEOWNERS approval. " + "Please check PR and resolve any issues. To disable this check, set `check_if_blocked_on_deploy_to_prod` to false in the bot configuration." + ) if merge_status.is_dirty: raise CICDBotError( "Merge commit cannot be cleanly created. Likely from a merge conflict. " diff --git a/sqlmesh/lsp/context.py b/sqlmesh/lsp/context.py index 50265ec306..a94db7c421 100644 --- a/sqlmesh/lsp/context.py +++ b/sqlmesh/lsp/context.py @@ -72,7 +72,7 @@ def __init__(self, context: Context) -> None: def list_workspace_tests(self) -> t.List[TestEntry]: """List all tests in the workspace.""" - tests = self.context.load_model_tests() + tests = self.context.select_tests() # Use a set to ensure unique URIs unique_test_uris = {URI.from_path(test.path).value for test in tests} @@ -81,7 +81,9 @@ def list_workspace_tests(self) -> t.List[TestEntry]: test_ranges = get_test_ranges(URI(uri).to_path()) if uri not in test_uris: test_uris[uri] = {} + test_uris[uri].update(test_ranges) + return [ TestEntry( name=test.test_name, @@ -100,7 +102,7 @@ def get_document_tests(self, uri: URI) -> t.List[TestEntry]: Returns: List of TestEntry objects for the specified document. """ - tests = self.context.load_model_tests(tests=[str(uri.to_path())]) + tests = self.context.select_tests(tests=[str(uri.to_path())]) test_ranges = get_test_ranges(uri.to_path()) return [ TestEntry( diff --git a/sqlmesh/lsp/hints.py b/sqlmesh/lsp/hints.py index a8d56e2f31..611ce8608d 100644 --- a/sqlmesh/lsp/hints.py +++ b/sqlmesh/lsp/hints.py @@ -5,7 +5,6 @@ from lsprotocol import types from sqlglot import exp -from sqlglot.expressions import Expression from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlmesh.core.model.definition import SqlModel from sqlmesh.lsp.context import LSPContext, ModelTarget @@ -60,7 +59,7 @@ def get_hints( def _get_type_hints_for_select( - expression: exp.Expression, + expression: exp.Expr, dialect: str, columns_to_types: t.Dict[str, exp.DataType], start_line: int, @@ -113,7 +112,7 @@ def _get_type_hints_for_select( def _get_type_hints_for_model_from_query( - query: Expression, + query: exp.Expr, dialect: str, columns_to_types: t.Dict[str, exp.DataType], start_line: int, diff --git a/sqlmesh/lsp/reference.py b/sqlmesh/lsp/reference.py index 80d401f79c..73c4e5681b 100644 --- a/sqlmesh/lsp/reference.py +++ b/sqlmesh/lsp/reference.py @@ -209,7 +209,7 @@ def get_macro_reference( target: t.Union[Model, StandaloneAudit], read_file: t.List[str], config_path: t.Optional[Path], - node: exp.Expression, + node: exp.Expr, macro_name: str, ) -> t.Optional[Reference]: # Get the file path where the macro is defined diff --git a/sqlmesh/magics.py b/sqlmesh/magics.py index 2b5f185aa9..0a433360df 100644 --- a/sqlmesh/magics.py +++ b/sqlmesh/magics.py @@ -337,7 +337,7 @@ def test(self, context: Context, line: str, test_def_raw: t.Optional[str] = None if not args.test_name and not args.ls: raise MagicError("Must provide either test name or `--ls` to list tests") - test_meta = context.load_model_tests() + test_meta = context.select_tests() tests: t.Dict[str, t.Dict[str, ModelTestMetadata]] = defaultdict(dict) for model_test_metadata in test_meta: diff --git a/sqlmesh/migrations/v0000_baseline.py b/sqlmesh/migrations/v0000_baseline.py index 4891900a76..abd316fcfe 100644 --- a/sqlmesh/migrations/v0000_baseline.py +++ b/sqlmesh/migrations/v0000_baseline.py @@ -4,15 +4,12 @@ from sqlmesh.utils.migration import blob_text_type, index_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - schema = state_sync.schema - engine_adapter = state_sync.engine_adapter - +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" snapshots_table = "_snapshots" environments_table = "_environments" versions_table = "_versions" - if state_sync.schema: + if schema: engine_adapter.create_schema(schema) intervals_table = f"{schema}.{intervals_table}" snapshots_table = f"{schema}.{snapshots_table}" @@ -94,5 +91,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.create_index(intervals_table, "_intervals_name_version_idx", ("name", "version")) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py b/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py index 34b765b3ad..897974f09a 100644 --- a/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py +++ b/sqlmesh/migrations/v0061_mysql_fix_blob_text_type.py @@ -9,12 +9,9 @@ from sqlmesh.utils.migration import blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore if engine_adapter.dialect != "mysql": return - - schema = state_sync.schema environments_table = "_environments" snapshots_table = "_snapshots" @@ -46,5 +43,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0062_add_model_gateway.py b/sqlmesh/migrations/v0062_add_model_gateway.py index 524a94044a..f65d8224ec 100644 --- a/sqlmesh/migrations/v0062_add_model_gateway.py +++ b/sqlmesh/migrations/v0062_add_model_gateway.py @@ -1,9 +1,9 @@ """Add the gateway model attribute.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0063_change_signals.py b/sqlmesh/migrations/v0063_change_signals.py index 8806c9ea60..bbced547fd 100644 --- a/sqlmesh/migrations/v0063_change_signals.py +++ b/sqlmesh/migrations/v0063_change_signals.py @@ -7,15 +7,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0064_join_when_matched_strings.py b/sqlmesh/migrations/v0064_join_when_matched_strings.py index 6da3164a38..ffd4c94913 100644 --- a/sqlmesh/migrations/v0064_join_when_matched_strings.py +++ b/sqlmesh/migrations/v0064_join_when_matched_strings.py @@ -7,15 +7,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0065_add_model_optimize.py b/sqlmesh/migrations/v0065_add_model_optimize.py index 09240aa61e..e9bc646666 100644 --- a/sqlmesh/migrations/v0065_add_model_optimize.py +++ b/sqlmesh/migrations/v0065_add_model_optimize.py @@ -1,9 +1,9 @@ """Add the optimize_query model attribute.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0066_add_auto_restatements.py b/sqlmesh/migrations/v0066_add_auto_restatements.py index 96d2cd45e8..9eea773573 100644 --- a/sqlmesh/migrations/v0066_add_auto_restatements.py +++ b/sqlmesh/migrations/v0066_add_auto_restatements.py @@ -5,9 +5,7 @@ from sqlmesh.utils.migration import index_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore auto_restatements_table = "_auto_restatements" intervals_table = "_intervals" @@ -40,9 +38,7 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" if schema: diff --git a/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py b/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py index d4fd93eda4..1243118df0 100644 --- a/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py +++ b/sqlmesh/migrations/v0067_add_tsql_date_full_precision.py @@ -1,9 +1,9 @@ """Add full precision for tsql to support nanoseconds.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py b/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py index 6f7ddbdc1c..35142e9aeb 100644 --- a/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py +++ b/sqlmesh/migrations/v0068_include_unrendered_query_in_metadata_hash.py @@ -1,9 +1,9 @@ """Include the unrendered query in the metadata hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0069_update_dev_table_suffix.py b/sqlmesh/migrations/v0069_update_dev_table_suffix.py index 57b41a816c..f69aac434e 100644 --- a/sqlmesh/migrations/v0069_update_dev_table_suffix.py +++ b/sqlmesh/migrations/v0069_update_dev_table_suffix.py @@ -7,15 +7,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" environments_table = "_environments" if schema: diff --git a/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py b/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py index 4b339d8e97..d0dbdd5563 100644 --- a/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py +++ b/sqlmesh/migrations/v0070_include_grains_in_metadata_hash.py @@ -1,9 +1,9 @@ """Include grains in the metadata hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py b/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py index 4e6cbab4f0..61a49dc0b9 100644 --- a/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py +++ b/sqlmesh/migrations/v0071_add_dev_version_to_intervals.py @@ -8,9 +8,7 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" if schema: intervals_table = f"{schema}.{intervals_table}" @@ -29,9 +27,7 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore intervals_table = "_intervals" snapshots_table = "_snapshots" if schema: diff --git a/sqlmesh/migrations/v0072_add_environment_statements.py b/sqlmesh/migrations/v0072_add_environment_statements.py index e73faf2b9a..4ed52b5c47 100644 --- a/sqlmesh/migrations/v0072_add_environment_statements.py +++ b/sqlmesh/migrations/v0072_add_environment_statements.py @@ -5,9 +5,7 @@ from sqlmesh.utils.migration import blob_text_type, index_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore environment_statements_table = "_environment_statements" if schema: @@ -27,5 +25,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore ) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py b/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py index 40e74d6426..708693ed61 100644 --- a/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py +++ b/sqlmesh/migrations/v0073_remove_symbolic_disable_restatement.py @@ -6,15 +6,13 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py b/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py index 04f1a27254..acd349c888 100644 --- a/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py +++ b/sqlmesh/migrations/v0074_add_partition_by_time_column_property.py @@ -2,9 +2,9 @@ (default: True to keep the original behaviour)""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0075_remove_validate_query.py b/sqlmesh/migrations/v0075_remove_validate_query.py index f6d4e255d9..9fdcca7ea6 100644 --- a/sqlmesh/migrations/v0075_remove_validate_query.py +++ b/sqlmesh/migrations/v0075_remove_validate_query.py @@ -8,15 +8,13 @@ from sqlmesh.utils.migration import blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0076_add_cron_tz.py b/sqlmesh/migrations/v0076_add_cron_tz.py index 300474aa18..909017c8cd 100644 --- a/sqlmesh/migrations/v0076_add_cron_tz.py +++ b/sqlmesh/migrations/v0076_add_cron_tz.py @@ -1,9 +1,9 @@ """Add 'cron_tz' property to node definition.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py b/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py index 2aec1140f1..68953836bd 100644 --- a/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py +++ b/sqlmesh/migrations/v0077_fix_column_type_hash_calculation.py @@ -1,9 +1,9 @@ """Use the model's dialect when calculating the hash for the column types.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py b/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py index c24b6a5168..adf1e96dd0 100644 --- a/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py +++ b/sqlmesh/migrations/v0078_warn_if_non_migratable_python_env.py @@ -24,13 +24,11 @@ from sqlmesh.core.console import get_console -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0079_add_gateway_managed_property.py b/sqlmesh/migrations/v0079_add_gateway_managed_property.py index 8d24601102..7650d6d765 100644 --- a/sqlmesh/migrations/v0079_add_gateway_managed_property.py +++ b/sqlmesh/migrations/v0079_add_gateway_managed_property.py @@ -3,11 +3,10 @@ from sqlglot import exp -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore environments_table = "_environments" - if state_sync.schema: - environments_table = f"{state_sync.schema}.{environments_table}" + if schema: + environments_table = f"{schema}.{environments_table}" alter_table_exp = exp.Alter( this=exp.to_table(environments_table), @@ -22,13 +21,12 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore environments_table = "_environments" - if state_sync.schema: - environments_table = f"{state_sync.schema}.{environments_table}" + if schema: + environments_table = f"{schema}.{environments_table}" - state_sync.engine_adapter.update_table( + engine_adapter.update_table( environments_table, {"gateway_managed": False}, where=exp.true(), diff --git a/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py b/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py index 582bdd3da9..35cb3977cc 100644 --- a/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py +++ b/sqlmesh/migrations/v0080_add_batch_size_to_scd_type_2_models.py @@ -1,9 +1,9 @@ """Add batch_size to SCD Type 2 models and add updated_at_name to by time which changes their data hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0081_update_partitioned_by.py b/sqlmesh/migrations/v0081_update_partitioned_by.py index 611d8f6973..8740285bf0 100644 --- a/sqlmesh/migrations/v0081_update_partitioned_by.py +++ b/sqlmesh/migrations/v0081_update_partitioned_by.py @@ -8,15 +8,13 @@ from sqlmesh.utils.migration import blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" index_type = index_text_type(engine_adapter.dialect) if schema: diff --git a/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py b/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py index 6eadbfc2c3..5565b099cd 100644 --- a/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py +++ b/sqlmesh/migrations/v0082_warn_if_incorrectly_duplicated_statements.py @@ -34,13 +34,11 @@ from sqlmesh.core.console import get_console -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py b/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py index 38c84afafd..5dbe0847f9 100644 --- a/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py +++ b/sqlmesh/migrations/v0083_use_sql_for_scd_time_data_type_data_hash.py @@ -1,9 +1,9 @@ """Use sql(...) instead of gen when computing the data hash of the time data type.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py b/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py index 5401c97d77..9edb0051ba 100644 --- a/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py +++ b/sqlmesh/migrations/v0084_normalize_quote_when_matched_and_merge_filter.py @@ -5,9 +5,9 @@ """ -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0085_deterministic_repr.py b/sqlmesh/migrations/v0085_deterministic_repr.py index 1a90277bbe..81cb0f194e 100644 --- a/sqlmesh/migrations/v0085_deterministic_repr.py +++ b/sqlmesh/migrations/v0085_deterministic_repr.py @@ -36,15 +36,13 @@ def _dict_sort(obj: t.Any) -> str: return repr(obj) -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0086_check_deterministic_bug.py b/sqlmesh/migrations/v0086_check_deterministic_bug.py index 0679414881..f44e5b8e33 100644 --- a/sqlmesh/migrations/v0086_check_deterministic_bug.py +++ b/sqlmesh/migrations/v0086_check_deterministic_bug.py @@ -10,13 +10,11 @@ KEYS_TO_MAKE_DETERMINISTIC = ["__sqlmesh__vars__", "__sqlmesh__blueprint__vars__"] -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" versions_table = "_versions" if schema: diff --git a/sqlmesh/migrations/v0087_normalize_blueprint_variables.py b/sqlmesh/migrations/v0087_normalize_blueprint_variables.py index 2f23a0653e..fe737861c2 100644 --- a/sqlmesh/migrations/v0087_normalize_blueprint_variables.py +++ b/sqlmesh/migrations/v0087_normalize_blueprint_variables.py @@ -35,15 +35,13 @@ class SqlValue: sql: str -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py b/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py index 405aad725f..0aa7171821 100644 --- a/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py +++ b/sqlmesh/migrations/v0088_warn_about_variable_python_env_diffs.py @@ -35,13 +35,11 @@ METADATA_HASH_EXPRESSIONS = {"on_virtual_update", "audits", "signals", "audit_definitions"} -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0089_add_virtual_environment_mode.py b/sqlmesh/migrations/v0089_add_virtual_environment_mode.py index 63d491418f..88126c76d7 100644 --- a/sqlmesh/migrations/v0089_add_virtual_environment_mode.py +++ b/sqlmesh/migrations/v0089_add_virtual_environment_mode.py @@ -1,9 +1,9 @@ """Add virtual_environment_mode to the model definition.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0090_add_forward_only_column.py b/sqlmesh/migrations/v0090_add_forward_only_column.py index b68c0f65ea..48253691ec 100644 --- a/sqlmesh/migrations/v0090_add_forward_only_column.py +++ b/sqlmesh/migrations/v0090_add_forward_only_column.py @@ -7,9 +7,7 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" @@ -27,11 +25,9 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(alter_table_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0091_on_additive_change.py b/sqlmesh/migrations/v0091_on_additive_change.py index c0170bd438..e24b9b4122 100644 --- a/sqlmesh/migrations/v0091_on_additive_change.py +++ b/sqlmesh/migrations/v0091_on_additive_change.py @@ -1,9 +1,9 @@ """Add on_additive_change to incremental model metadata hash.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py index 1ff069bc82..5407e5a99a 100644 --- a/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +++ b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py @@ -5,7 +5,7 @@ doesn't match dbt's behavior. dbt only uses data_type for contracts/validation, not DDL. This fix may cause diffs if tables were created with incorrect types. -More context: https://github.com/TobikoData/sqlmesh/pull/5231 +More context: https://github.com/SQLMesh/sqlmesh/pull/5231 """ import json @@ -17,13 +17,11 @@ SQLMESH_DBT_PACKAGE = "sqlmesh.dbt" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" @@ -35,7 +33,7 @@ def migrate_rows(state_sync, **kwargs): # type: ignore "tables may have been created with incorrect column types. After this migration, run " "'sqlmesh diff prod' to check for column type differences, and if any are found, " "apply a plan to correct the table schemas. For more details, see: " - "https://github.com/TobikoData/sqlmesh/pull/5231." + "https://github.com/SQLMesh/sqlmesh/pull/5231." ) for (snapshot,) in engine_adapter.fetchall( diff --git a/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py b/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py index f629c1d27d..aaaacf3a91 100644 --- a/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py +++ b/sqlmesh/migrations/v0093_use_raw_sql_in_fingerprint.py @@ -1,9 +1,9 @@ """Use the raw SQL when computing the model fingerprint.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py b/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py index 1abc4fa4af..9d7adf21a3 100644 --- a/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py +++ b/sqlmesh/migrations/v0094_add_dev_version_and_fingerprint_columns.py @@ -7,9 +7,7 @@ from sqlmesh.utils.migration import index_text_type, blob_text_type -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" @@ -42,11 +40,9 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.execute(add_fingerprint_exp) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore import pandas as pd - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py b/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py index 802d996df5..0fa9fd51b8 100644 --- a/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py +++ b/sqlmesh/migrations/v0095_warn_about_dbt_raw_sql_diff.py @@ -17,13 +17,11 @@ SQLMESH_DBT_PACKAGE = "sqlmesh.dbt" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore snapshots_table = "_snapshots" if schema: snapshots_table = f"{schema}.{snapshots_table}" diff --git a/sqlmesh/migrations/v0096_remove_plan_dags_table.py b/sqlmesh/migrations/v0096_remove_plan_dags_table.py index e342d6b1a8..8eb674ead0 100644 --- a/sqlmesh/migrations/v0096_remove_plan_dags_table.py +++ b/sqlmesh/migrations/v0096_remove_plan_dags_table.py @@ -1,9 +1,7 @@ """Remove the obsolete _plan_dags table.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore - engine_adapter = state_sync.engine_adapter - schema = state_sync.schema +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore plan_dags_table = "_plan_dags" if schema: plan_dags_table = f"{schema}.{plan_dags_table}" @@ -11,5 +9,5 @@ def migrate_schemas(state_sync, **kwargs): # type: ignore engine_adapter.drop_table(plan_dags_table) -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0097_add_dbt_name_in_node.py b/sqlmesh/migrations/v0097_add_dbt_name_in_node.py index f8909e4430..cd548977ef 100644 --- a/sqlmesh/migrations/v0097_add_dbt_name_in_node.py +++ b/sqlmesh/migrations/v0097_add_dbt_name_in_node.py @@ -1,9 +1,9 @@ """Add 'dbt_name' property to node definition.""" -def migrate_schemas(state_sync, **kwargs): # type: ignore +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore pass -def migrate_rows(state_sync, **kwargs): # type: ignore +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore pass diff --git a/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py b/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py new file mode 100644 index 0000000000..b69ba8fa6f --- /dev/null +++ b/sqlmesh/migrations/v0098_add_dbt_node_info_in_node.py @@ -0,0 +1,103 @@ +"""Replace 'dbt_name' with 'dbt_node_info' in the snapshot definition""" + +import json +from sqlglot import exp +from sqlmesh.utils.migration import index_text_type, blob_text_type + + +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore + pass + + +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore + import pandas as pd + + snapshots_table = "_snapshots" + if schema: + snapshots_table = f"{schema}.{snapshots_table}" + + index_type = index_text_type(engine_adapter.dialect) + blob_type = blob_text_type(engine_adapter.dialect) + + new_snapshots = [] + migration_needed = False + + for ( + name, + identifier, + version, + snapshot, + kind_name, + updated_ts, + unpaused_ts, + ttl_ms, + unrestorable, + forward_only, + dev_version, + fingerprint, + ) in engine_adapter.fetchall( + exp.select( + "name", + "identifier", + "version", + "snapshot", + "kind_name", + "updated_ts", + "unpaused_ts", + "ttl_ms", + "unrestorable", + "forward_only", + "dev_version", + "fingerprint", + ).from_(snapshots_table), + quote_identifiers=True, + ): + parsed_snapshot = json.loads(snapshot) + if dbt_name := parsed_snapshot["node"].get("dbt_name"): + parsed_snapshot["node"].pop("dbt_name") + parsed_snapshot["node"]["dbt_node_info"] = { + "unique_id": dbt_name, + # these will get populated as metadata-only changes on the next plan + "name": "", + "fqn": "", + } + migration_needed = True + + new_snapshots.append( + { + "name": name, + "identifier": identifier, + "version": version, + "snapshot": json.dumps(parsed_snapshot), + "kind_name": kind_name, + "updated_ts": updated_ts, + "unpaused_ts": unpaused_ts, + "ttl_ms": ttl_ms, + "unrestorable": unrestorable, + "forward_only": forward_only, + "dev_version": dev_version, + "fingerprint": fingerprint, + } + ) + + if migration_needed and new_snapshots: + engine_adapter.delete_from(snapshots_table, "TRUE") + + engine_adapter.insert_append( + snapshots_table, + pd.DataFrame(new_snapshots), + target_columns_to_types={ + "name": exp.DataType.build(index_type), + "identifier": exp.DataType.build(index_type), + "version": exp.DataType.build(index_type), + "snapshot": exp.DataType.build(blob_type), + "kind_name": exp.DataType.build(index_type), + "updated_ts": exp.DataType.build("bigint"), + "unpaused_ts": exp.DataType.build("bigint"), + "ttl_ms": exp.DataType.build("bigint"), + "unrestorable": exp.DataType.build("boolean"), + "forward_only": exp.DataType.build("boolean"), + "dev_version": exp.DataType.build(index_type), + "fingerprint": exp.DataType.build(blob_type), + }, + ) diff --git a/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py b/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py new file mode 100644 index 0000000000..b80ed35a35 --- /dev/null +++ b/sqlmesh/migrations/v0099_add_last_altered_to_intervals.py @@ -0,0 +1,25 @@ +"""Add dev version to the intervals table.""" + +from sqlglot import exp + + +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore + intervals_table = "_intervals" + if schema: + intervals_table = f"{schema}.{intervals_table}" + + alter_table_exp = exp.Alter( + this=exp.to_table(intervals_table), + kind="TABLE", + actions=[ + exp.ColumnDef( + this=exp.to_column("last_altered_ts"), + kind=exp.DataType.build("BIGINT", dialect=engine_adapter.dialect), + ) + ], + ) + engine_adapter.execute(alter_table_exp) + + +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore + pass diff --git a/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py b/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py new file mode 100644 index 0000000000..9ff64c5e57 --- /dev/null +++ b/sqlmesh/migrations/v0100_add_grants_and_grants_target_layer.py @@ -0,0 +1,9 @@ +"""Add grants and grants_target_layer to incremental model metadata hash.""" + + +def migrate_schemas(engine_adapter, schema, **kwargs): # type: ignore + pass + + +def migrate_rows(engine_adapter, schema, **kwargs): # type: ignore + pass diff --git a/sqlmesh/utils/__init__.py b/sqlmesh/utils/__init__.py index c220de4847..5b1b077216 100644 --- a/sqlmesh/utils/__init__.py +++ b/sqlmesh/utils/__init__.py @@ -21,6 +21,7 @@ from functools import lru_cache, reduce, wraps from pathlib import Path +import unicodedata from sqlglot import exp from sqlglot.dialects.dialect import Dialects @@ -291,8 +292,14 @@ def sqlglot_dialects() -> str: NON_ALNUM = re.compile(r"[^a-zA-Z0-9_]") +NON_ALUM_INCLUDE_UNICODE = re.compile(r"\W", flags=re.UNICODE) -def sanitize_name(name: str) -> str: + +def sanitize_name(name: str, *, include_unicode: bool = False) -> str: + if include_unicode: + s = unicodedata.normalize("NFC", name) + s = NON_ALUM_INCLUDE_UNICODE.sub("_", s) + return s return NON_ALNUM.sub("_", name) diff --git a/sqlmesh/utils/cache.py b/sqlmesh/utils/cache.py index 002248f511..e72c34f632 100644 --- a/sqlmesh/utils/cache.py +++ b/sqlmesh/utils/cache.py @@ -59,6 +59,10 @@ def __init__(self, path: Path, prefix: t.Optional[str] = None): threshold = to_datetime("1 week ago").timestamp() # delete all old cache files for file in self._path.glob("*"): + if IS_WINDOWS: + # the file.stat() call below will fail on windows if the :file name is longer than 260 chars + file = fix_windows_path(file) + if not file.stem.startswith(self._cache_version) or file.stat().st_atime < threshold: file.unlink(missing_ok=True) @@ -133,7 +137,7 @@ def clear(self) -> None: def _cache_entry_path(self, name: str, entry_id: str = "") -> Path: entry_file_name = "__".join(p for p in (self._cache_version, name, entry_id) if p) - full_path = self._path / sanitize_name(entry_file_name) + full_path = self._path / sanitize_name(entry_file_name, include_unicode=True) if IS_WINDOWS: # handle paths longer than 260 chars full_path = fix_windows_path(full_path) diff --git a/sqlmesh/utils/date.py b/sqlmesh/utils/date.py index 931cebf535..bdc15125d4 100644 --- a/sqlmesh/utils/date.py +++ b/sqlmesh/utils/date.py @@ -168,7 +168,7 @@ def to_datetime( dt: t.Optional[datetime] = value elif isinstance(value, date): dt = datetime(value.year, value.month, value.day) - elif isinstance(value, exp.Expression): + elif isinstance(value, exp.Expr): return to_datetime(value.name) else: try: @@ -401,7 +401,7 @@ def to_time_column( dialect: str, time_column_format: t.Optional[str] = None, nullable: bool = False, -) -> exp.Expression: +) -> exp.Expr: """Convert a TimeLike object to the same time format and type as the model's time column.""" if dialect == "clickhouse" and time_column_type.is_type( *(exp.DataType.TEMPORAL_TYPES - {exp.DataType.Type.DATE, exp.DataType.Type.DATE32}) @@ -444,7 +444,7 @@ def to_time_column( def pandas_timestamp_to_pydatetime( - df: pd.DataFrame, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] + df: pd.DataFrame, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None ) -> pd.DataFrame: import pandas as pd from pandas.api.types import is_datetime64_any_dtype # type: ignore diff --git a/sqlmesh/utils/errors.py b/sqlmesh/utils/errors.py index d90965c25c..ca3e1bfb05 100644 --- a/sqlmesh/utils/errors.py +++ b/sqlmesh/utils/errors.py @@ -151,6 +151,10 @@ class AdditiveChangeError(SQLMeshError): pass +class MigrationNotSupportedError(SQLMeshError): + pass + + class NotificationTargetError(SQLMeshError): pass diff --git a/sqlmesh/utils/git.py b/sqlmesh/utils/git.py index 00410e776c..cdb9d4e2d5 100644 --- a/sqlmesh/utils/git.py +++ b/sqlmesh/utils/git.py @@ -16,7 +16,9 @@ def list_untracked_files(self) -> t.List[Path]: ) def list_uncommitted_changed_files(self) -> t.List[Path]: - return self._execute_list_output(["diff", "--name-only", "--diff-filter=d"], self._git_root) + return self._execute_list_output( + ["diff", "--name-only", "--diff-filter=d", "HEAD"], self._git_root + ) def list_committed_changed_files(self, target_branch: str = "main") -> t.List[Path]: return self._execute_list_output( diff --git a/sqlmesh/utils/jinja.py b/sqlmesh/utils/jinja.py index c9339cf404..bd82cf225c 100644 --- a/sqlmesh/utils/jinja.py +++ b/sqlmesh/utils/jinja.py @@ -12,7 +12,8 @@ from jinja2 import Environment, Template, nodes, UndefinedError from jinja2.runtime import Macro -from sqlglot import Dialect, Expression, Parser, TokenType +from sqlglot import Dialect, Parser, TokenType +from sqlglot.expressions import Expression from sqlmesh.core import constants as c from sqlmesh.core import dialect as d @@ -78,6 +79,11 @@ def extract(self, jinja: str, dialect: str = "") -> t.Dict[str, MacroInfo]: self.reset() self.sql = jinja self._tokens = Dialect.get_or_raise(dialect).tokenize(jinja) + + # guard for older sqlglot versions (before 30.0.3) + if hasattr(self, "_tokens_size"): + # keep the cached length in sync + self._tokens_size = len(self._tokens) self._index = -1 self._advance() @@ -133,6 +139,12 @@ def find_call_names(node: nodes.Node, vars_in_scope: t.Set[str]) -> t.Iterator[C vars_in_scope = vars_in_scope.copy() for child_node in node.iter_child_nodes(): if "target" in child_node.fields: + # For nodes with assignment targets (Assign, AssignBlock, For, Import), + # the target name could shadow a reference in the right hand side. + # So we need to process the RHS before adding the target to scope. + # For example: {% set model = model.path %} should track model.path. + yield from find_call_names(child_node, vars_in_scope) + target = getattr(child_node, "target") if isinstance(target, nodes.Name): vars_in_scope.add(target.name) @@ -149,7 +161,9 @@ def find_call_names(node: nodes.Node, vars_in_scope: t.Set[str]) -> t.Iterator[C name = call_name(child_node) if name[0][0] != "'" and name[0] not in vars_in_scope: yield (name, child_node) - yield from find_call_names(child_node, vars_in_scope) + + if "target" not in child_node.fields: + yield from find_call_names(child_node, vars_in_scope) def extract_call_names( @@ -206,6 +220,20 @@ def extract_macro_references_and_variables( return macro_references, variables +def sort_dict_recursive( + item: t.Dict[str, t.Any], +) -> t.Dict[str, t.Any]: + sorted_dict: t.Dict[str, t.Any] = {} + for k, v in sorted(item.items()): + if isinstance(v, list): + sorted_dict[k] = sorted(v) + elif isinstance(v, dict): + sorted_dict[k] = sort_dict_recursive(v) + else: + sorted_dict[k] = v + return sorted_dict + + JinjaGlobalAttribute = t.Union[str, int, float, bool, AttributeDict] @@ -355,6 +383,7 @@ def build_environment(self, **kwargs: t.Any) -> Environment: context.update(builtin_globals) context.update(root_macros) context.update(package_macros) + context["render"] = lambda input: env.from_string(input).render() env.globals.update(context) env.filters.update(self._environment.filters) @@ -440,7 +469,7 @@ def to_expressions(self) -> t.List[Expression]: d.PythonCode( expressions=[ f"{k} = '{v}'" if isinstance(v, str) else f"{k} = {v}" - for k, v in sorted(filtered_objs.items()) + for k, v in sort_dict_recursive(filtered_objs).items() ] ) ) diff --git a/sqlmesh/utils/lineage.py b/sqlmesh/utils/lineage.py index f5b4506c68..f63395708d 100644 --- a/sqlmesh/utils/lineage.py +++ b/sqlmesh/utils/lineage.py @@ -70,7 +70,7 @@ class MacroReference(PydanticModel): def extract_references_from_query( - query: exp.Expression, + query: exp.Expr, context: t.Union["Context", "GenericContext[t.Any]"], document_path: Path, read_file: t.List[str], @@ -95,7 +95,11 @@ def extract_references_from_query( # Check if this table reference is a CTE in the current scope if cte_scope := scope.cte_sources.get(table_name): + if cte_scope.expression is None: + continue cte = cte_scope.expression.parent + if cte is None: + continue alias = cte.args["alias"] if isinstance(alias, exp.TableAlias): identifier = alias.this diff --git a/sqlmesh/utils/metaprogramming.py b/sqlmesh/utils/metaprogramming.py index 858e8a50da..cd77c36353 100644 --- a/sqlmesh/utils/metaprogramming.py +++ b/sqlmesh/utils/metaprogramming.py @@ -352,7 +352,8 @@ def walk(obj: t.Any, name: str, is_metadata: bool = False) -> None: walk(base, base.__qualname__, is_metadata) for k, v in obj.__dict__.items(): - if k.startswith("__"): + # skip dunder methods bar __init__ as it might contain user defined logic with cross class references + if k.startswith("__") and k != "__init__": continue # Traverse methods in a class to find global references @@ -362,10 +363,14 @@ def walk(obj: t.Any, name: str, is_metadata: bool = False) -> None: if callable(v): # Walk the method if it's part of the object, else it's a global function and we just store it if v.__qualname__.startswith(obj.__qualname__): - for k, v in func_globals(v).items(): - walk(v, k, is_metadata) - else: - walk(v, v.__name__, is_metadata) + try: + for k, v in func_globals(v).items(): + walk(v, k, is_metadata) + except (OSError, TypeError): + # __init__ may come from built-ins or wrapped callables + pass + else: + walk(v, k, is_metadata) elif callable(obj): for k, v in func_globals(obj).items(): walk(v, k, is_metadata) @@ -439,6 +444,41 @@ def value( ) +def _resolve_import_module(obj: t.Any, name: str) -> str: + """Resolve the most appropriate module path for importing an object. + + When a callable's ``__module__`` points to a submodule of a known public + module (e.g. ``sqlglot.expressions.builders`` is a submodule of + ``sqlglot.expressions``), and the object is re-exported from that public + parent module, prefer the public parent so that generated import statements + remain stable across internal restructurings of third-party packages. + + Args: + obj: The callable to resolve. + name: The name under which the object will be imported. + + Returns: + The module path to use in the ``from import `` statement. + """ + module_name = getattr(obj, "__module__", None) or "" + parts = module_name.split(".") + + # Walk from the shallowest ancestor (excluding the top-level package) up to + # the immediate parent, returning the shallowest one that re-exports the object. + # We skip the top-level package to avoid over-normalizing (e.g. ``sqlglot`` + # re-exports everything, but callers expect ``sqlglot.expressions``). + for i in range(2, len(parts)): + parent = ".".join(parts[:i]) + try: + parent_module = sys.modules.get(parent) or importlib.import_module(parent) + if getattr(parent_module, name, None) is obj: + return parent + except Exception: + continue + + return module_name + + def serialize_env(env: t.Dict[str, t.Any], path: Path) -> t.Dict[str, Executable]: """Serializes a python function into a self contained dictionary. @@ -507,7 +547,7 @@ def serialize_env(env: t.Dict[str, t.Any], path: Path) -> t.Dict[str, Executable ) else: serialized[k] = Executable( - payload=f"from {v.__module__} import {name}", + payload=f"from {_resolve_import_module(v, name)} import {name}", kind=ExecutableKind.IMPORT, is_metadata=is_metadata, ) diff --git a/sqlmesh/utils/pydantic.py b/sqlmesh/utils/pydantic.py index 317e873aeb..8bc81e2774 100644 --- a/sqlmesh/utils/pydantic.py +++ b/sqlmesh/utils/pydantic.py @@ -56,7 +56,7 @@ def get_dialect(values: t.Any) -> str: return model._dialect if dialect is None else dialect # type: ignore -def _expression_encoder(e: exp.Expression) -> str: +def _expression_encoder(e: exp.Expr) -> str: return e.meta.get("sql") or e.sql(dialect=e.meta.get("dialect")) @@ -70,7 +70,7 @@ class PydanticModel(pydantic.BaseModel): # crippled badly. Here we need to enumerate all different ways of how sqlglot expressions # show up in pydantic models. json_encoders={ - exp.Expression: _expression_encoder, + exp.Expr: _expression_encoder, exp.DataType: _expression_encoder, exp.Tuple: _expression_encoder, AuditQueryTypes: _expression_encoder, # type: ignore @@ -190,7 +190,7 @@ def validate_list_of_strings(v: t.Any) -> t.List[str]: def validate_string(v: t.Any) -> str: - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return v.name return str(v) @@ -204,13 +204,13 @@ def validate_expression(expression: E, dialect: str) -> E: def bool_validator(v: t.Any) -> bool: if isinstance(v, exp.Boolean): return v.this - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): return str_to_bool(v.name) return str_to_bool(str(v or "")) def positive_int_validator(v: t.Any) -> int: - if isinstance(v, exp.Expression) and v.is_int: + if isinstance(v, exp.Expr) and v.is_int: v = int(v.name) if not isinstance(v, int): raise ValueError(f"Invalid num {v}. Value must be an integer value") @@ -237,10 +237,10 @@ def _formatted_validation_errors(error: pydantic.ValidationError) -> t.List[str] def _get_field( v: t.Any, values: t.Any, -) -> exp.Expression: +) -> exp.Expr: dialect = get_dialect(values) - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): expression = v else: expression = parse_one(v, dialect=dialect) @@ -257,16 +257,16 @@ def _get_field( def _get_fields( v: t.Any, values: t.Any, -) -> t.List[exp.Expression]: +) -> t.List[exp.Expr]: dialect = get_dialect(values) if isinstance(v, (exp.Tuple, exp.Array)): - expressions: t.List[exp.Expression] = v.expressions - elif isinstance(v, exp.Expression): + expressions: t.List[exp.Expr] = v.expressions + elif isinstance(v, exp.Expr): expressions = [v] else: expressions = [ - parse_one(entry, dialect=dialect) if isinstance(entry, str) else entry + parse_one(entry, dialect=dialect) if isinstance(entry, str) else entry # type: ignore[misc] for entry in ensure_list(v) ] @@ -278,7 +278,7 @@ def _get_fields( return results -def list_of_fields_validator(v: t.Any, values: t.Any) -> t.List[exp.Expression]: +def list_of_fields_validator(v: t.Any, values: t.Any) -> t.List[exp.Expr]: return _get_fields(v, values) @@ -289,17 +289,17 @@ def column_validator(v: t.Any, values: t.Any) -> exp.Column: return expression -def list_of_columns_or_star_validator( +def list_of_fields_or_star_validator( v: t.Any, values: t.Any -) -> t.Union[exp.Star, t.List[exp.Column]]: +) -> t.Union[exp.Star, t.List[exp.Expr]]: expressions = _get_fields(v, values) if len(expressions) == 1 and isinstance(expressions[0], exp.Star): return t.cast(exp.Star, expressions[0]) - return t.cast(t.List[exp.Column], expressions) + return t.cast(t.List[exp.Expr], expressions) def cron_validator(v: t.Any) -> str: - if isinstance(v, exp.Expression): + if isinstance(v, exp.Expr): v = v.name from croniter import CroniterBadCronError, croniter @@ -338,8 +338,8 @@ def get_concrete_types_from_typehint(typehint: type[t.Any]) -> set[type[t.Any]]: SQLGlotBool = bool SQLGlotPositiveInt = int SQLGlotColumn = exp.Column - SQLGlotListOfFields = t.List[exp.Expression] - SQLGlotListOfColumnsOrStar = t.Union[t.List[exp.Column], exp.Star] + SQLGlotListOfFields = t.List[exp.Expr] + SQLGlotListOfFieldsOrStar = t.Union[SQLGlotListOfFields, exp.Star] SQLGlotCron = str else: from pydantic.functional_validators import BeforeValidator @@ -348,11 +348,9 @@ def get_concrete_types_from_typehint(typehint: type[t.Any]) -> set[type[t.Any]]: SQLGlotString = t.Annotated[str, BeforeValidator(validate_string)] SQLGlotBool = t.Annotated[bool, BeforeValidator(bool_validator)] SQLGlotPositiveInt = t.Annotated[int, BeforeValidator(positive_int_validator)] - SQLGlotColumn = t.Annotated[exp.Expression, BeforeValidator(column_validator)] - SQLGlotListOfFields = t.Annotated[ - t.List[exp.Expression], BeforeValidator(list_of_fields_validator) - ] - SQLGlotListOfColumnsOrStar = t.Annotated[ - t.Union[t.List[exp.Column], exp.Star], BeforeValidator(list_of_columns_or_star_validator) + SQLGlotColumn = t.Annotated[exp.Expr, BeforeValidator(column_validator)] + SQLGlotListOfFields = t.Annotated[t.List[exp.Expr], BeforeValidator(list_of_fields_validator)] + SQLGlotListOfFieldsOrStar = t.Annotated[ + t.Union[SQLGlotListOfFields, exp.Star], BeforeValidator(list_of_fields_or_star_validator) ] SQLGlotCron = t.Annotated[str, BeforeValidator(cron_validator)] diff --git a/sqlmesh/utils/windows.py b/sqlmesh/utils/windows.py index 238ed353de..b2de5b8af9 100644 --- a/sqlmesh/utils/windows.py +++ b/sqlmesh/utils/windows.py @@ -3,12 +3,22 @@ IS_WINDOWS = platform.system() == "Windows" +WINDOWS_LONGPATH_PREFIX = "\\\\?\\" + def fix_windows_path(path: Path) -> Path: """ Windows paths are limited to 260 characters: https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation Users can change this by updating a registry entry but we cant rely on that. - We can quite commonly generate a cache file path that exceeds 260 characters which causes a FileNotFound error. - If we prefix the path with "\\?\" then we can have paths up to 32,767 characters + + SQLMesh quite commonly generates cache file paths that exceed 260 characters and thus cause a FileNotFound error. + If we prefix paths with "\\?\" then we can have paths up to 32,767 characters. + + Note that this prefix also means that relative paths no longer work. From the above docs: + > Because you cannot use the "\\?\" prefix with a relative path, relative paths are always limited to a total of MAX_PATH characters. + + So we also call path.resolve() to resolve the relative sections so that operations like `path.read_text()` continue to work """ - return Path("\\\\?\\" + str(path.absolute())) + if path.parts and not path.parts[0].startswith(WINDOWS_LONGPATH_PREFIX): + path = Path(WINDOWS_LONGPATH_PREFIX + str(path.absolute())) + return path.resolve() diff --git a/sqlmesh_dbt/cli.py b/sqlmesh_dbt/cli.py index 370f115d61..278daa5370 100644 --- a/sqlmesh_dbt/cli.py +++ b/sqlmesh_dbt/cli.py @@ -8,11 +8,13 @@ import functools -def _get_dbt_operations(ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]]) -> DbtOperations: +def _get_dbt_operations( + ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]], threads: t.Optional[int] = None +) -> DbtOperations: if not isinstance(ctx.obj, functools.partial): raise ValueError(f"Unexpected click context object: {type(ctx.obj)}") - dbt_operations = ctx.obj(vars=vars) + dbt_operations = ctx.obj(vars=vars, threads=threads) if not isinstance(dbt_operations, DbtOperations): raise ValueError(f"Unexpected dbt operations type: {type(dbt_operations)}") @@ -33,15 +35,39 @@ def _cleanup() -> None: select_option = click.option( "-s", - "-m", "--select", + multiple=True, + help="Specify the nodes to include.", +) +model_option = click.option( + "-m", "--models", "--model", multiple=True, - help="Specify the nodes to include.", + help="Specify the model nodes to include; other nodes are excluded.", ) exclude_option = click.option("--exclude", multiple=True, help="Specify the nodes to exclude.") +# TODO: expand this out into --resource-type/--resource-types and --exclude-resource-type/--exclude-resource-types +resource_types = [ + "metric", + "semantic_model", + "saved_query", + "source", + "analysis", + "model", + "test", + "unit_test", + "exposure", + "snapshot", + "seed", + "default", + "all", +] +resource_type_option = click.option( + "--resource-type", type=click.Choice(resource_types, case_sensitive=False) +) + @click.group(cls=ErrorHandlingGroup, invoke_without_command=True) @click.option("--profile", help="Which existing profile to load. Overrides output.profile") @@ -52,6 +78,22 @@ def _cleanup() -> None: default=False, help="Display debug logging during dbt execution. Useful for debugging and making bug reports events to help when debugging.", ) +@click.option( + "--log-level", + default="info", + type=click.Choice(["debug", "info", "warn", "error", "none"]), + help="Specify the minimum severity of events that are logged to the console and the log file.", +) +@click.option( + "--profiles-dir", + type=click.Path(exists=True, file_okay=False, path_type=Path), + help="Which directory to look in for the profiles.yml file. If not set, dbt will look in the current working directory first, then HOME/.dbt/", +) +@click.option( + "--project-dir", + type=click.Path(exists=True, file_okay=False, path_type=Path), + help="Which directory to look in for the dbt_project.yml file. Default is the current working directory and its parents.", +) @click.pass_context @cli_global_error_handler def dbt( @@ -59,6 +101,9 @@ def dbt( profile: t.Optional[str] = None, target: t.Optional[str] = None, debug: bool = False, + log_level: t.Optional[str] = None, + profiles_dir: t.Optional[Path] = None, + project_dir: t.Optional[Path] = None, ) -> None: """ An ELT tool for managing your SQL transformations and data models, powered by the SQLMesh engine. @@ -71,7 +116,13 @@ def dbt( # we have a partially applied function here because subcommands might set extra options like --vars # that need to be known before we attempt to load the project ctx.obj = functools.partial( - create, project_dir=Path.cwd(), profile=profile, target=target, debug=debug + create, + project_dir=project_dir, + profiles_dir=profiles_dir, + profile=profile, + target=target, + debug=debug, + log_level=log_level, ) if not ctx.invoked_subcommand: @@ -86,11 +137,15 @@ def dbt( @dbt.command() @select_option +@model_option @exclude_option +@resource_type_option @click.option( "-f", "--full-refresh", - help="If specified, dbt will drop incremental models and fully-recalculate the incremental table from the model definition.", + is_flag=True, + default=False, + help="If specified, sqlmesh will drop incremental models and fully-recalculate the incremental table from the model definition.", ) @click.option( "--env", @@ -100,21 +155,29 @@ def dbt( @click.option( "--empty/--no-empty", default=False, help="If specified, limit input refs and sources" ) +@click.option( + "--threads", + type=int, + help="Specify number of threads to use while executing models. Overrides settings in profiles.yml.", +) @vars_option @click.pass_context def run( ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]], + threads: t.Optional[int], env: t.Optional[str] = None, **kwargs: t.Any, ) -> None: """Compile SQL and execute against the current target database.""" - _get_dbt_operations(ctx, vars).run(environment=env, **kwargs) + _get_dbt_operations(ctx, vars, threads).run(environment=env, **kwargs) @dbt.command(name="list") @select_option +@model_option @exclude_option +@resource_type_option @vars_option @click.pass_context def list_(ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]], **kwargs: t.Any) -> None: diff --git a/sqlmesh_dbt/console.py b/sqlmesh_dbt/console.py index 3c62adfe68..6bf7a1618f 100644 --- a/sqlmesh_dbt/console.py +++ b/sqlmesh_dbt/console.py @@ -1,6 +1,7 @@ import typing as t from sqlmesh.core.console import TerminalConsole from sqlmesh.core.model import Model +from sqlmesh.core.snapshot.definition import Node from rich.tree import Tree @@ -9,19 +10,26 @@ def print(self, msg: str) -> None: return self._print(msg) def list_models( - self, models: t.List[Model], list_parents: bool = True, list_audits: bool = True + self, + models: t.List[Model], + all_nodes: t.Dict[str, Node], + list_parents: bool = True, + list_audits: bool = True, ) -> None: model_list = Tree("[bold]Models in project:[/bold]") for model in models: - model_tree = model_list.add(model.name) + model_tree = model_list.add(model.dbt_fqn or model.name) if list_parents: - for parent in model.depends_on: - model_tree.add(f"depends_on: {parent}") + for parent_name in model.depends_on: + if parent := all_nodes.get(parent_name): + parent_name = parent.dbt_fqn or parent_name + + model_tree.add(f"depends_on: {parent_name}") if list_audits: - for audit_name in model.audit_definitions: - model_tree.add(f"audit: {audit_name}") + for audit_name, audit in model.audit_definitions.items(): + model_tree.add(f"audit: {audit.dbt_fqn or audit_name}") self._print(model_list) diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py index ac7ad031f3..576d8e090b 100644 --- a/sqlmesh_dbt/operations.py +++ b/sqlmesh_dbt/operations.py @@ -11,7 +11,7 @@ from sqlmesh.dbt.project import Project from sqlmesh_dbt.console import DbtCliConsole from sqlmesh.core.model import Model - from sqlmesh.core.plan import Plan + from sqlmesh.core.plan import Plan, PlanBuilder logger = logging.getLogger(__name__) @@ -26,24 +26,67 @@ def list_( self, select: t.Optional[t.List[str]] = None, exclude: t.Optional[t.List[str]] = None, + models: t.Optional[t.List[str]] = None, + resource_type: t.Optional[str] = None, ) -> None: # dbt list prints: # - models # - "data tests" (audits) for those models # it also applies selectors which is useful for testing selectors - selected_models = list(self._selected_models(select, exclude).values()) - self.console.list_models(selected_models) + selected_models = list( + self._selected_models(select, exclude, models, resource_type).values() + ) + self.console.list_models( + selected_models, {k: v.node for k, v in self.context.snapshots.items()} + ) def run( self, environment: t.Optional[str] = None, select: t.Optional[t.List[str]] = None, exclude: t.Optional[t.List[str]] = None, + models: t.Optional[t.List[str]] = None, + resource_type: t.Optional[str] = None, full_refresh: bool = False, empty: bool = False, ) -> Plan: - return self.context.plan( - **self._plan_options( + consolidated_select, consolidated_exclude = selectors.consolidate( + select or [], exclude or [], models or [], resource_type + ) + + plan_builder = self._plan_builder( + environment=environment, + select=consolidated_select, + exclude=consolidated_exclude, + full_refresh=full_refresh, + empty=empty, + ) + + plan = plan_builder.build() + + self.console.plan( + plan_builder, + default_catalog=self.context.default_catalog, + # start doing work immediately (since no_diff is set, there isnt really anything for the user to say yes/no to) + auto_apply=True, + # dont output a diff of model changes + no_diff=True, + # don't throw up any prompts like "set the effective date" - use defaults + no_prompts=True, + ) + + return plan + + def _plan_builder( + self, + environment: t.Optional[str] = None, + select: t.Optional[t.List[str]] = None, + exclude: t.Optional[t.List[str]] = None, + full_refresh: bool = False, + empty: bool = False, + ) -> PlanBuilder: + return self.context.plan_builder( + **self._plan_builder_options( environment=environment, select=select, exclude=exclude, @@ -53,9 +96,15 @@ def run( ) def _selected_models( - self, select: t.Optional[t.List[str]] = None, exclude: t.Optional[t.List[str]] = None + self, + select: t.Optional[t.List[str]] = None, + exclude: t.Optional[t.List[str]] = None, + models: t.Optional[t.List[str]] = None, + resource_type: t.Optional[str] = None, ) -> t.Dict[str, Model]: - if sqlmesh_selector := selectors.to_sqlmesh(select or [], exclude or []): + if sqlmesh_selector := selectors.to_sqlmesh( + *selectors.consolidate(select or [], exclude or [], models or [], resource_type) + ): if self.debug: self.console.print(f"dbt --select: {select}") self.console.print(f"dbt --exclude: {exclude}") @@ -71,13 +120,15 @@ def _selected_models( return selected_models - def _plan_options( + def _plan_builder_options( self, - environment: t.Optional[str] = None, + # upstream dbt options select: t.Optional[t.List[str]] = None, exclude: t.Optional[t.List[str]] = None, empty: bool = False, full_refresh: bool = False, + # sqlmesh extra options + environment: t.Optional[str] = None, ) -> t.Dict[str, t.Any]: import sqlmesh.core.constants as c @@ -130,24 +181,38 @@ def _plan_options( # `dbt --empty` adds LIMIT 0 to the queries, resulting in empty tables. In addition, it happily clobbers existing tables regardless of if they are populated. # This *partially* lines up with --skip-backfill in SQLMesh, which indicates to not populate tables if they happened to be created/updated as part of this plan. # However, if a table already exists and has data in it, there is no change so SQLMesh will not recreate the table and thus it will not be cleared. - # So in order to fully replicate dbt's --empty, we also need --full-refresh semantics in order to replace existing tables + # Currently, SQLMesh has no way to say "restate with empty data", because --restate-model coupled with --skip-backfill ends up being a no-op options["skip_backfill"] = True - full_refresh = True + + self.console.log_warning( + "dbt's `--empty` drops the tables for all selected models and replaces them with empty ones.\n" + "This can easily result in accidental data loss, so SQLMesh limits this to only new or modified models and leaves the tables for existing unmodified models alone.\n\n" + "If you were creating empty tables to preview model changes, please consider using `--environment` to preview these changes in an isolated Virtual Data Environment instead.\n\n" + "Otherwise, if you really do want dbt's `--empty` behaviour of clearing every selected table, please file an issue on GitHub so we can better understand the use-case.\n" + ) + + if full_refresh: + # --full-refresh is implemented in terms of "add every model as a restatement" + # however, `--empty` sets skip_backfill=True, which causes the BackfillStage of the plan to be skipped. + # the re-processing of data intervals happens in the BackfillStage, so if it gets skipped, restatements become a no-op + raise ValueError("`--full-refresh` alongside `--empty` is not currently supported.") if full_refresh: - # TODO: handling this requires some updates in the engine to enable restatements+changes in the same plan without affecting prod - # if the plan targets dev - pass + options.update( + dict( + # Add every selected model as a restatement to force them to get repopulated from scratch + restate_models=[m.dbt_fqn for m in self.context.models.values() if m.dbt_fqn] + if not select_models + else select_models, + # by default in SQLMesh, restatements only operate on what has been committed to state. + # in order to emulate dbt, we need to use the local filesystem instead, so we override this default + always_include_local_changes=True, + ) + ) return dict( environment=environment, select_models=select_models, - # dont output a diff of model changes - no_diff=True, - # don't throw up any prompts like "set the effective date" - use defaults - no_prompts=True, - # start doing work immediately (since no_diff is set, there isnt really anything for the user to say yes/no to) - auto_apply=True, **options, ) @@ -167,10 +232,13 @@ def close(self) -> None: def create( project_dir: t.Optional[Path] = None, + profiles_dir: t.Optional[Path] = None, profile: t.Optional[str] = None, target: t.Optional[str] = None, vars: t.Optional[t.Dict[str, t.Any]] = None, + threads: t.Optional[int] = None, debug: bool = False, + log_level: t.Optional[str] = None, ) -> DbtOperations: with Progress(transient=True) as progress: # Indeterminate progress bar before SQLMesh import to provide feedback to the user that something is indeed happening @@ -182,6 +250,7 @@ def create( from sqlmesh.core.console import set_console from sqlmesh_dbt.console import DbtCliConsole from sqlmesh.utils.errors import SQLMeshError + from sqlmesh.core.selector import DbtSelector # clear any existing handlers set up by click/rich as defaults so that once SQLMesh logging config is applied, # we dont get duplicate messages logged from things like console.log_warning() @@ -189,7 +258,7 @@ def create( while root_logger.hasHandlers(): root_logger.removeHandler(root_logger.handlers[0]) - configure_logging(force_debug=debug) + configure_logging(force_debug=debug, log_level=log_level) set_console(DbtCliConsole()) progress.update(load_task_id, description="Loading project", total=None) @@ -199,8 +268,16 @@ def create( sqlmesh_context = Context( paths=[project_dir], - config_loader_kwargs=dict(profile=profile, target=target, variables=vars), + config_loader_kwargs=dict( + profile=profile, + target=target, + variables=vars, + threads=threads, + profiles_dir=profiles_dir, + ), load=True, + # DbtSelector selects based on dbt model fqn's rather than SQLMesh model names + selector=DbtSelector, ) dbt_loader = sqlmesh_context._loaders[0] @@ -213,7 +290,7 @@ def create( return DbtOperations(sqlmesh_context, dbt_project, debug=debug) -def init_project_if_required(project_dir: Path) -> None: +def init_project_if_required(project_dir: Path, start: t.Optional[str] = None) -> None: """ SQLMesh needs a start date to as the starting point for calculating intervals on incremental models, amongst other things @@ -229,4 +306,6 @@ def init_project_if_required(project_dir: Path) -> None: if not any(f.exists() for f in [project_dir / file for file in ALL_CONFIG_FILENAMES]): get_console().log_warning("No existing SQLMesh config detected; creating one") - init_example_project(path=project_dir, engine_type=None, template=ProjectTemplate.DBT) + init_example_project( + path=project_dir, engine_type=None, template=ProjectTemplate.DBT, start=start + ) diff --git a/sqlmesh_dbt/selectors.py b/sqlmesh_dbt/selectors.py index 120d5dcb36..5821586ad3 100644 --- a/sqlmesh_dbt/selectors.py +++ b/sqlmesh_dbt/selectors.py @@ -4,7 +4,45 @@ logger = logging.getLogger(__name__) -def to_sqlmesh(dbt_select: t.Collection[str], dbt_exclude: t.Collection[str]) -> t.Optional[str]: +def consolidate( + select: t.List[str], + exclude: t.List[str], + models: t.List[str], + resource_type: t.Optional[str], +) -> t.Tuple[t.List[str], t.List[str]]: + """ + Given a bunch of dbt CLI arguments that may or may not be defined: + --select, --exclude, --models, --resource-type + + Combine them into a single set of --select/--exclude node selectors, throwing an error if mutually exclusive combinations are provided + Note that the returned value is still in dbt format, pass it to to_sqlmesh() to create a selector for the sqlmesh selector engine + """ + if models and select: + raise ValueError('"models" and "select" are mutually exclusive arguments') + + if models and resource_type: + raise ValueError('"models" and "resource_type" are mutually exclusive arguments') + + if models: + # --models implies resource_type:model + resource_type = "model" + + if resource_type: + resource_type_selector = f"resource_type:{resource_type}" + all_selectors = [*select, *models] + select = ( + [ + f"resource_type:{resource_type},{original_selector}" + for original_selector in all_selectors + ] + if all_selectors + else [resource_type_selector] + ) + + return select, exclude + + +def to_sqlmesh(dbt_select: t.List[str], dbt_exclude: t.List[str]) -> t.Optional[str]: """ Given selectors defined in the format of the dbt cli --select and --exclude arguments, convert them into a selector expression that the SQLMesh selector engine can understand. diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index e460387bbc..5e0737e1b6 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -1,10 +1,8 @@ import json -import logging import os import pytest import string import time_machine -from contextlib import contextmanager from os import getcwd, path, remove from pathlib import Path from shutil import rmtree @@ -32,16 +30,7 @@ def mock_runtime_env(monkeypatch): @pytest.fixture(scope="session") def runner() -> CliRunner: - return CliRunner() - - -@contextmanager -def disable_logging(): - logging.disable(logging.CRITICAL) - try: - yield - finally: - logging.disable(logging.NOTSET) + return CliRunner(env={"COLUMNS": "80"}) def create_example_project(temp_dir, template=ProjectTemplate.DEFAULT) -> None: @@ -247,7 +236,7 @@ def test_plan_restate_model(runner, tmp_path): ) assert result.exit_code == 0 assert_duckdb_test(result) - assert "Restating models" in result.output + assert "Models selected for restatement" in result.output assert "sqlmesh_example.full_model [full refresh" in result.output assert_model_batches_executed(result) assert "Virtual layer updated" not in result.output @@ -795,8 +784,7 @@ def test_run_cron_not_elapsed(runner, tmp_path, caplog): init_prod_and_backfill(runner, tmp_path) # No error if `prod` environment exists and cron has not elapsed - with disable_logging(): - result = runner.invoke(cli, ["--log-file-dir", tmp_path, "--paths", tmp_path, "run"]) + result = runner.invoke(cli, ["--log-file-dir", tmp_path, "--paths", tmp_path, "run"]) assert result.exit_code == 0 assert ( @@ -843,18 +831,17 @@ def test_table_name(runner, tmp_path): # Create and backfill `prod` environment create_example_project(tmp_path) init_prod_and_backfill(runner, tmp_path) - with disable_logging(): - result = runner.invoke( - cli, - [ - "--log-file-dir", - tmp_path, - "--paths", - tmp_path, - "table_name", - "sqlmesh_example.full_model", - ], - ) + result = runner.invoke( + cli, + [ + "--log-file-dir", + tmp_path, + "--paths", + tmp_path, + "table_name", + "sqlmesh_example.full_model", + ], + ) assert result.exit_code == 0 assert result.output.startswith("db.sqlmesh__sqlmesh_example.sqlmesh_example__full_model__") @@ -891,7 +878,6 @@ def test_dlt_pipeline_errors(runner, tmp_path): assert "Error: Could not attach to pipeline" in result.output -@time_machine.travel(FREEZE_TIME) def test_dlt_filesystem_pipeline(tmp_path): import dlt @@ -957,6 +943,7 @@ def test_dlt_filesystem_pipeline(tmp_path): " # pre_ping: False\n" " # pretty_sql: False\n" " # schema_differ_overrides: \n" + " # catalog_type_overrides: \n" " # aws_access_key_id: \n" " # aws_secret_access_key: \n" " # role_arn: \n" @@ -994,7 +981,6 @@ def test_dlt_filesystem_pipeline(tmp_path): rmtree(storage_path) -@time_machine.travel(FREEZE_TIME) def test_dlt_pipeline(runner, tmp_path): from dlt.common.pipeline import get_dlt_pipelines_dir @@ -1886,7 +1872,9 @@ def test_init_interactive_cli_mode_simple(runner: CliRunner, tmp_path: Path): assert "no_diff: true" in config_path.read_text() -def test_init_interactive_engine_install_msg(runner: CliRunner, tmp_path: Path): +def test_init_interactive_engine_install_msg(runner: CliRunner, tmp_path: Path, monkeypatch): + monkeypatch.setattr("sqlmesh.utils.rich.console.width", 80) + # Engine install text should not appear for built-in engines like DuckDB # Input: 1 (DEFAULT template), 1 (duckdb engine), 1 (DEFAULT CLI mode) result = runner.invoke( @@ -1960,11 +1948,11 @@ def test_init_dbt_template(runner: CliRunner, tmp_path: Path): @time_machine.travel(FREEZE_TIME) def test_init_project_engine_configs(tmp_path): engine_type_to_config = { - "redshift": "# concurrent_tasks: 4\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # user: \n # password: \n # database: \n # host: \n # port: \n # source_address: \n # unix_sock: \n # ssl: \n # sslmode: \n # timeout: \n # tcp_keepalive: \n # application_name: \n # preferred_role: \n # principal_arn: \n # credentials_provider: \n # region: \n # cluster_identifier: \n # iam: \n # is_serverless: \n # serverless_acct_id: \n # serverless_work_group: \n # enable_merge: ", - "bigquery": "# concurrent_tasks: 1\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # method: oauth\n # project: \n # execution_project: \n # quota_project: \n # location: \n # keyfile: \n # keyfile_json: \n # token: \n # refresh_token: \n # client_id: \n # client_secret: \n # token_uri: \n # scopes: \n # impersonated_service_account: \n # job_creation_timeout_seconds: \n # job_execution_timeout_seconds: \n # job_retries: 1\n # job_retry_deadline_seconds: \n # priority: \n # maximum_bytes_billed: ", - "snowflake": "account: \n # concurrent_tasks: 4\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # user: \n # password: \n # warehouse: \n # database: \n # role: \n # authenticator: \n # token: \n # host: \n # port: \n # application: Tobiko_SQLMesh\n # private_key: \n # private_key_path: \n # private_key_passphrase: \n # session_parameters: ", - "databricks": "# concurrent_tasks: 1\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # server_hostname: \n # http_path: \n # access_token: \n # auth_type: \n # oauth_client_id: \n # oauth_client_secret: \n # catalog: \n # http_headers: \n # session_configuration: \n # databricks_connect_server_hostname: \n # databricks_connect_access_token: \n # databricks_connect_cluster_id: \n # databricks_connect_use_serverless: False\n # force_databricks_connect: False\n # disable_databricks_connect: False\n # disable_spark_session: False", - "postgres": "host: \n user: \n password: \n port: \n database: \n # concurrent_tasks: 4\n # register_comments: True\n # pre_ping: True\n # pretty_sql: False\n # schema_differ_overrides: \n # keepalives_idle: \n # connect_timeout: 10\n # role: \n # sslmode: \n # application_name: ", + "redshift": "# concurrent_tasks: 4\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # catalog_type_overrides: \n # user: \n # password: \n # database: \n # host: \n # port: \n # source_address: \n # unix_sock: \n # ssl: \n # sslmode: \n # timeout: \n # tcp_keepalive: \n # application_name: \n # preferred_role: \n # principal_arn: \n # credentials_provider: \n # region: \n # cluster_identifier: \n # iam: \n # is_serverless: \n # serverless_acct_id: \n # serverless_work_group: \n # enable_merge: ", + "bigquery": "# concurrent_tasks: 1\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # catalog_type_overrides: \n # method: oauth\n # project: \n # execution_project: \n # quota_project: \n # location: \n # keyfile: \n # keyfile_json: \n # token: \n # refresh_token: \n # client_id: \n # client_secret: \n # token_uri: \n # scopes: \n # impersonated_service_account: \n # job_creation_timeout_seconds: \n # job_execution_timeout_seconds: \n # job_retries: 1\n # job_retry_deadline_seconds: \n # priority: \n # maximum_bytes_billed: \n # reservation: ", + "snowflake": "account: \n # concurrent_tasks: 4\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # catalog_type_overrides: \n # user: \n # password: \n # warehouse: \n # database: \n # role: \n # authenticator: \n # token: \n # host: \n # port: \n # application: Tobiko_SQLMesh\n # private_key: \n # private_key_path: \n # private_key_passphrase: \n # session_parameters: ", + "databricks": "# concurrent_tasks: 1\n # register_comments: True\n # pre_ping: False\n # pretty_sql: False\n # schema_differ_overrides: \n # catalog_type_overrides: \n # server_hostname: \n # http_path: \n # access_token: \n # auth_type: \n # oauth_client_id: \n # oauth_client_secret: \n # catalog: \n # http_headers: \n # session_configuration: \n # databricks_connect_server_hostname: \n # databricks_connect_access_token: \n # databricks_connect_cluster_id: \n # databricks_connect_use_serverless: False\n # force_databricks_connect: False\n # disable_databricks_connect: False\n # disable_spark_session: False", + "postgres": "host: \n user: \n password: \n port: \n database: \n # concurrent_tasks: 4\n # register_comments: True\n # pre_ping: True\n # pretty_sql: False\n # schema_differ_overrides: \n # catalog_type_overrides: \n # keepalives_idle: \n # connect_timeout: 10\n # role: \n # sslmode: \n # application_name: ", } for engine_type, expected_config in engine_type_to_config.items(): diff --git a/tests/conftest.py b/tests/conftest.py index e5bbc4f425..46086444bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -202,7 +202,15 @@ def validate( def pytest_collection_modifyitems(items, *args, **kwargs): - test_type_markers = {"fast", "slow", "docker", "remote", "isolated", "registry_isolation"} + test_type_markers = { + "fast", + "slow", + "docker", + "remote", + "isolated", + "registry_isolation", + "dialect_isolated", + } for item in items: for marker in item.iter_markers(): if marker.name in test_type_markers: @@ -239,7 +247,7 @@ def rescope_duckdb_classvar(request): yield -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(scope="function", autouse=True) def rescope_log_handlers(): logging.getLogger().handlers.clear() yield @@ -253,9 +261,12 @@ def rescope_lineage_cache(request): @pytest.fixture(autouse=True) def reset_console(): - from sqlmesh.core.console import set_console, NoopConsole + from sqlmesh.core.console import set_console, NoopConsole, get_console + orig_console = get_console() set_console(NoopConsole()) + yield + set_console(orig_console) @pytest.fixture @@ -370,7 +381,7 @@ def _make_function( @pytest.fixture def assert_exp_eq() -> t.Callable: def _assert_exp_eq( - source: exp.Expression | str, expected: exp.Expression | str, dialect: DialectType = None + source: exp.Expr | str, expected: exp.Expr | str, dialect: DialectType = None ) -> None: source_exp = maybe_parse(source, dialect=dialect) expected_exp = maybe_parse(expected, dialect=dialect) @@ -572,7 +583,19 @@ def _make_function( # shutil.copytree just doesnt work properly with the symlinks on Windows, regardless of the `symlinks` setting src = str(path.absolute()) dst = str(temp_dir.absolute()) - os.system(f"robocopy {src} {dst} /E /COPYALL") + + # Robocopy flag reference: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/robocopy#copy-options + # /E: Copy subdirectories, including empty directories + # /COPY:D Copy "data" only. In particular, this avoids copying auditing information, which can throw + # an error like "ERROR : You do not have the Manage Auditing user right" + robocopy_cmd = f"robocopy {src} {dst} /E /COPY:D" + exit_code = os.system(robocopy_cmd) + + # exit code reference: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/robocopy#exit-return-codes + if exit_code > 8: + raise Exception( + f"robocopy command: '{robocopy_cmd}' failed with exit code: {exit_code}" + ) # after copying, delete the files that would have been ignored for root, dirs, _ in os.walk(temp_dir): diff --git a/tests/core/engine_adapter/__init__.py b/tests/core/engine_adapter/__init__.py index 4761c4100b..a9370b8cc3 100644 --- a/tests/core/engine_adapter/__init__.py +++ b/tests/core/engine_adapter/__init__.py @@ -11,7 +11,7 @@ def to_sql_calls(adapter: EngineAdapter, identify: bool = True) -> t.List[str]: value = call[0][0] sql = ( value.sql(dialect=adapter.dialect, identify=identify) - if isinstance(value, exp.Expression) + if isinstance(value, exp.Expr) else str(value) ) output.append(sql) diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index c5377e309a..47ccdc876a 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -5,10 +5,12 @@ import sys import typing as t import time +from contextlib import contextmanager import pandas as pd # noqa: TID253 import pytest from sqlglot import exp, parse_one +from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlmesh import Config, Context, EngineAdapter from sqlmesh.core.config import load_config_from_paths @@ -274,7 +276,7 @@ def time_formatter(self) -> t.Callable: return lambda x, _: exp.Literal.string(to_ds(x)) @property - def partitioned_by(self) -> t.List[exp.Expression]: + def partitioned_by(self) -> t.List[exp.Expr]: return [parse_one(self.time_column)] @property @@ -386,8 +388,8 @@ def table(self, table_name: TableName, schema: str = TEST_SCHEMA) -> exp.Table: ) def physical_properties( - self, properties_for_dialect: t.Dict[str, t.Dict[str, str | exp.Expression]] - ) -> t.Dict[str, exp.Expression]: + self, properties_for_dialect: t.Dict[str, t.Dict[str, str | exp.Expr]] + ) -> t.Dict[str, exp.Expr]: if props := properties_for_dialect.get(self.dialect): return {k: exp.Literal.string(v) if isinstance(v, str) else v for k, v in props.items()} return {} @@ -744,6 +746,109 @@ def upsert_sql_model(self, model_definition: str) -> t.Tuple[Context, SqlModel]: self._context.upsert_model(model) return self._context, model + def _get_create_user_or_role( + self, username: str, password: t.Optional[str] = None + ) -> t.Tuple[str, t.Optional[str]]: + password = password or random_id() + if self.dialect in ["postgres", "redshift"]: + return username, f"CREATE USER \"{username}\" WITH PASSWORD '{password}'" + if self.dialect == "snowflake": + return username, f"CREATE ROLE {username}" + if self.dialect == "databricks": + # Creating an account-level group in Databricks requires making REST API calls so we are going to + # use a pre-created group instead. We assume the suffix on the name is the unique id. + # In the Databricks UI, Workspace Settings -> Identity and Access, create the following groups: + # - test_user, test_analyst, test_etl_user, test_reader, test_writer, test_admin + # (there do not need to be any users assigned to these groups) + return "_".join(username.split("_")[:-1]), None + if self.dialect == "bigquery": + # BigQuery uses IAM service accounts that need to be pre-created + # Pre-created GCP service accounts: + # - sqlmesh-test-admin@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-analyst@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-etl-user@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-reader@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-user@{project-id}.iam.gserviceaccount.com + # - sqlmesh-test-writer@{project-id}.iam.gserviceaccount.com + role_name = ( + username.replace(f"_{self.test_id}", "").replace("test_", "").replace("_", "-") + ) + project_id = self.engine_adapter.get_current_catalog() + service_account = f"sqlmesh-test-{role_name}@{project_id}.iam.gserviceaccount.com" + return f"serviceAccount:{service_account}", None + raise ValueError(f"User creation not supported for dialect: {self.dialect}") + + def _create_user_or_role(self, username: str, password: t.Optional[str] = None) -> str: + username, create_user_sql = self._get_create_user_or_role(username, password) + if create_user_sql: + self.engine_adapter.execute(create_user_sql) + return username + + @contextmanager + def create_users_or_roles(self, *role_names: str) -> t.Iterator[t.Dict[str, str]]: + created_users = [] + roles = {} + + try: + for role_name in role_names: + user_name = normalize_identifiers( + self.add_test_suffix(f"test_{role_name}"), dialect=self.dialect + ).sql(dialect=self.dialect) + password = random_id() + if self.dialect == "redshift": + password += ( + "A" # redshift requires passwords to have at least one uppercase letter + ) + user_name = self._create_user_or_role(user_name, password) + created_users.append(user_name) + roles[role_name] = user_name + + yield roles + + finally: + for user_name in created_users: + self._cleanup_user_or_role(user_name) + + def get_select_privilege(self) -> str: + if self.dialect == "bigquery": + return "roles/bigquery.dataViewer" + return "SELECT" + + def get_insert_privilege(self) -> str: + if self.dialect == "databricks": + # This would really be "MODIFY" but for the purposes of having this be unique from UPDATE + # we return "MANAGE" instead + return "MANAGE" + if self.dialect == "bigquery": + return "roles/bigquery.dataEditor" + return "INSERT" + + def get_update_privilege(self) -> str: + if self.dialect == "databricks": + return "MODIFY" + if self.dialect == "bigquery": + return "roles/bigquery.dataOwner" + return "UPDATE" + + def _cleanup_user_or_role(self, user_name: str) -> None: + """Helper function to clean up a user/role and all their dependencies.""" + try: + if self.dialect in ["postgres", "redshift"]: + self.engine_adapter.execute(f""" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE usename = '{user_name}' AND pid <> pg_backend_pid() + """) + self.engine_adapter.execute(f'DROP OWNED BY "{user_name}"') + self.engine_adapter.execute(f'DROP USER IF EXISTS "{user_name}"') + elif self.dialect == "snowflake": + self.engine_adapter.execute(f"DROP ROLE IF EXISTS {user_name}") + elif self.dialect in ["databricks", "bigquery"]: + # For Databricks and BigQuery, we use pre-created accounts that should not be deleted + pass + except Exception: + pass + def wait_until(fn: t.Callable[..., bool], attempts=3, wait=5) -> None: current_attempt = 0 diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 8e87b2c3c8..5635f4e1ba 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -128,7 +128,8 @@ gateways: warehouse: {{ env_var('SNOWFLAKE_WAREHOUSE') }} database: {{ env_var('SNOWFLAKE_DATABASE') }} user: {{ env_var('SNOWFLAKE_USER') }} - password: {{ env_var('SNOWFLAKE_PASSWORD') }} + authenticator: SNOWFLAKE_JWT + private_key_path: {{ env_var('SNOWFLAKE_PRIVATE_KEY_FILE', 'tests/fixtures/snowflake/rsa_key_no_pass.p8') }} check_import: false state_connection: type: duckdb @@ -139,7 +140,10 @@ gateways: catalog: {{ env_var('DATABRICKS_CATALOG') }} server_hostname: {{ env_var('DATABRICKS_SERVER_HOSTNAME') }} http_path: {{ env_var('DATABRICKS_HTTP_PATH') }} - access_token: {{ env_var('DATABRICKS_ACCESS_TOKEN') }} + auth_type: {{ env_var('DATABRICKS_AUTH_TYPE', 'databricks-oauth') }} + oauth_client_id: {{ env_var('DATABRICKS_CLIENT_ID') }} + oauth_client_secret: {{ env_var('DATABRICKS_CLIENT_SECRET') }} + databricks_connect_use_serverless: true check_import: false inttest_redshift: diff --git a/tests/core/engine_adapter/integration/conftest.py b/tests/core/engine_adapter/integration/conftest.py index eafdf2fe1d..3fb4bc15f1 100644 --- a/tests/core/engine_adapter/integration/conftest.py +++ b/tests/core/engine_adapter/integration/conftest.py @@ -7,8 +7,8 @@ import logging from pytest import FixtureRequest - from sqlmesh import Config, EngineAdapter +from sqlmesh.core.constants import SQLMESH_PATH from sqlmesh.core.config.connection import ( ConnectionConfig, AthenaConnectionConfig, @@ -34,7 +34,7 @@ def config(tmp_path: pathlib.Path) -> Config: project_paths=[ pathlib.Path(os.path.join(os.path.dirname(__file__), "config.yaml")), ], - personal_paths=[pathlib.Path("~/.sqlmesh/config.yaml").expanduser()], + personal_paths=[(SQLMESH_PATH / "config.yaml").expanduser()], variables={"tmp_path": str(tmp_path)}, ) @@ -148,7 +148,7 @@ def ctx_df( yield from create_test_context(*request.param) -@pytest.fixture(params=list(generate_pytest_params(ENGINES, query=True, df=False))) +@pytest.fixture(params=list(generate_pytest_params(ENGINES, query=True, df=True))) def ctx_query_and_df( request: FixtureRequest, create_test_context: t.Callable[[IntegrationTestEngine, str], t.Iterable[TestContext]], diff --git a/tests/core/engine_adapter/integration/docker/spark/Dockerfile b/tests/core/engine_adapter/integration/docker/spark/Dockerfile index 7fb39b840c..cfbe7d1e88 100644 --- a/tests/core/engine_adapter/integration/docker/spark/Dockerfile +++ b/tests/core/engine_adapter/integration/docker/spark/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/bitnami/spark:3.5 +FROM bitnamilegacy/spark:3.5.2 USER root RUN install_packages curl USER 1001 diff --git a/tests/core/engine_adapter/integration/test_freshness.py b/tests/core/engine_adapter/integration/test_freshness.py new file mode 100644 index 0000000000..e5ee574e7e --- /dev/null +++ b/tests/core/engine_adapter/integration/test_freshness.py @@ -0,0 +1,488 @@ +# type: ignore +from __future__ import annotations + +import pathlib +import typing as t +from datetime import datetime, timedelta +from IPython.utils.capture import capture_output + +import time_machine +from pytest_mock.plugin import MockerFixture + +import pytest +import time_machine + +import sqlmesh +from sqlmesh import Config, Context +from sqlmesh.utils.date import now, to_datetime +from sqlmesh.utils.errors import SignalEvalError +from tests.core.engine_adapter.integration import ( + TestContext, + TEST_SCHEMA, +) +from tests.utils.test_helpers import use_terminal_console + +EVALUATION_SPY = None + + +@pytest.fixture(autouse=True) +def _skip_snowflake(ctx: TestContext): + if ctx.dialect == "snowflake": + # these tests use callbacks that need to run db queries within a time_travel context that changes the system time to be in the future + # this causes invalid JWT's to be generated when the callbacks try to run a db query + pytest.skip( + "snowflake.connector generates an invalid JWT when time_travel changes the system time" + ) + + +# Mock the snapshot evaluator's evaluate function to count the number of times it is called +@pytest.fixture(autouse=True, scope="function") +def _install_evaluation_spy(mocker: MockerFixture): + global EVALUATION_SPY + EVALUATION_SPY = mocker.spy(sqlmesh.core.snapshot.evaluator.SnapshotEvaluator, "evaluate") + yield + EVALUATION_SPY = None + + +def assert_snapshot_last_altered_ts( + context: Context, + snapshot_id: str, + last_altered_ts: datetime, + dev_last_altered_ts: t.Optional[datetime] = None, +): + """ + Ensure that prod and dev last altered timestamps of a snapshot are as expected. + """ + snapshot = context.state_sync.get_snapshots([snapshot_id])[snapshot_id] + + if snapshot.is_external: + return + + assert to_datetime(snapshot.last_altered_ts).replace(microsecond=0) == last_altered_ts.replace( + microsecond=0 + ) + + if dev_last_altered_ts: + assert to_datetime(snapshot.dev_last_altered_ts).replace( + microsecond=0 + ) == dev_last_altered_ts.replace(microsecond=0) + + +def assert_model_evaluation( + lambda_func, was_evaluated: bool = True, day_delta: int = 0, model_evaluations: int = 1 +): + """ + Ensure that a model was evaluated by checking the freshness signal and that + the evaluation function was called the expected number of times. + """ + EVALUATION_SPY.reset_mock() + timestamp = now(minute_floor=False) + timedelta(days=day_delta) + with time_machine.travel(timestamp, tick=False): + with capture_output() as output: + plan_or_run_result = lambda_func() + + evaluate_function_called = EVALUATION_SPY.call_count == model_evaluations + signal_was_checked = "Checking signals for" in output.stdout + + assert signal_was_checked + if was_evaluated: + assert "All ready" in output.stdout + assert evaluate_function_called + else: + assert "None ready" in output.stdout + assert not evaluate_function_called + + return timestamp, plan_or_run_result + + +def create_model( + name: str, schema: str, query: str, path: pathlib.Path, signals: str = "freshness()" +): + """ + Create a freshness model with the given name, path, and query. + """ + model_name = f"{schema}.{name}" + model_path = path / "models" / f"{name}.sql" + (path / "models").mkdir(parents=True, exist_ok=True) + model_path.write_text( + f""" + MODEL ( + name {model_name}, + start '2024-01-01', + kind FULL, + signals ( + {signals}, + ) + ); + + {query} + """ + ) + + return model_name, model_path + + +def initialize_context( + ctx: TestContext, tmp_path: pathlib.Path, num_external_models: int = 1 +) -> t.Tuple[Context, str, t.List[str]]: + """ + Initialize a context by creating a schema and external models. + """ + adapter = ctx.engine_adapter + if not adapter.SUPPORTS_METADATA_TABLE_LAST_MODIFIED_TS: + pytest.skip("This test only runs for engines that support metadata-based freshness") + + # Create & initialize schema + schema = ctx.add_test_suffix(TEST_SCHEMA) + ctx._schemas.append(schema) + adapter.create_schema(schema) + + # Create & initialize external models + external_tables = [] + + yaml_content = "" + for i in range(1, num_external_models + 1): + external_table = f"{schema}.external_table{i}" + external_tables.append(f"{schema}.external_table{i}") + adapter.execute( + f"CREATE TABLE {external_table} AS (SELECT {i} AS col{i})", + quote_identifiers=False, + ) + + yaml_content = ( + yaml_content + + f""" +- name: {external_table} + columns: + col{i}: int + +""" + ) + + external_models_yaml = tmp_path / "external_models.yaml" + external_models_yaml.write_text(yaml_content) + + # Initialize context + def _set_config(gateway: str, config: Config) -> None: + config.model_defaults.dialect = ctx.dialect + + context = ctx.create_context(path=tmp_path, config_mutator=_set_config) + + return context, schema, external_tables + + +@use_terminal_console +def test_external_model_freshness(ctx: TestContext, tmp_path: pathlib.Path, mocker: MockerFixture): + adapter = ctx.engine_adapter + context, schema, (external_table1, external_table2) = initialize_context( + ctx, tmp_path, num_external_models=2 + ) + + # Create model that depends on external models + model_name, model_path = create_model( + "new_model", + schema, + f"SELECT col1 * col2 AS col FROM {external_table1}, {external_table2}", + tmp_path, + ) + + context.load() + + # Case 1: Model is evaluated for the first plan + prod_plan_ts_1, prod_plan_1 = assert_model_evaluation( + lambda: context.plan(auto_apply=True, no_prompts=True) + ) + + prod_snapshot_id = next(iter(prod_plan_1.context_diff.new_snapshots)) + assert_snapshot_last_altered_ts(context, prod_snapshot_id, last_altered_ts=prod_plan_ts_1) + + # Case 2: Model is NOT evaluated on run if external models are not fresh + assert_model_evaluation(lambda: context.run(), was_evaluated=False, day_delta=1) + + # Case 3: Differentiate last_altered_ts between snapshots with shared version + # For instance, creating a FORWARD_ONLY change in dev (reusing the version but creating a dev preview) should not cause + # any side effects to the prod snapshot's last_altered_ts hydration + model_path.write_text(model_path.read_text().replace("col1 * col2", "col1 + col2")) + context.load() + dev_plan_ts = now(minute_floor=False) + timedelta(days=2) + with time_machine.travel(dev_plan_ts, tick=False): + dev_plan = context.plan( + environment="dev", forward_only=True, auto_apply=True, no_prompts=True + ) + + context.state_sync.clear_cache() + dev_snapshot_id = next(iter(dev_plan.context_diff.new_snapshots)) + assert_snapshot_last_altered_ts( + context, + dev_snapshot_id, + last_altered_ts=prod_plan_ts_1, + dev_last_altered_ts=dev_plan_ts, + ) + assert_snapshot_last_altered_ts(context, prod_snapshot_id, last_altered_ts=prod_plan_ts_1) + + # Case 4: Model is evaluated on run if any external model is fresh + adapter.execute(f"INSERT INTO {external_table2} (col2) VALUES (3)", quote_identifiers=False) + assert_model_evaluation(lambda: context.run(), day_delta=2) + + # Case 5: Model is evaluated if changed (case 3) even if the external model is not fresh + model_path.write_text(model_path.read_text().replace("col1 + col2", "col1 * col2 * 5")) + context.load() + assert_model_evaluation( + lambda: context.plan(auto_apply=True, no_prompts=True), + day_delta=3, + ) + + # Case 6: Model is evaluated on a restatement plan even if the external model is not fresh + assert_model_evaluation( + lambda: context.plan(restate_models=[model_name], auto_apply=True, no_prompts=True), + day_delta=4, + ) + + +@use_terminal_console +def test_mixed_model_freshness(ctx: TestContext, tmp_path: pathlib.Path): + """ + Scenario: Freshness for a model that depends on both external and SQLMesh models + """ + + adapter = ctx.engine_adapter + context, schema, (external_table,) = initialize_context(ctx, tmp_path, num_external_models=1) + + # Create parent model that depends on the external model + parent_model_name, _ = create_model( + "parent_model", + schema, + f"SELECT col1 AS new_col FROM {external_table}", + tmp_path, + ) + + # First child model depends only on the parent model + create_model( + "child_model1", + schema, + f"SELECT new_col FROM {parent_model_name}", + tmp_path, + ) + + # Second child model depends on the parent model and the external table + create_model( + "child_model2", + schema, + f"SELECT col1 + new_col FROM {parent_model_name}, {external_table}", + tmp_path, + ) + + # Third model does not depend on any models, so it should only be evaluated once + create_model( + "child_model3", + schema, + f"SELECT 1 AS col", + tmp_path, + ) + + context.load() + + # Case 1: New models are evaluated when introduced in a plan + prod_plan_ts_1, prod_plan_1 = assert_model_evaluation( + lambda: context.plan(auto_apply=True, no_prompts=True), + model_evaluations=4, + ) + + for new_snapshot in prod_plan_1.context_diff.new_snapshots: + assert_snapshot_last_altered_ts(context, new_snapshot, last_altered_ts=prod_plan_ts_1) + + # Case 2: Mixed models are evaluated if the upstream models (sqlmesh or external) become fresh + adapter.execute(f"INSERT INTO {external_table} (col1) VALUES (2)", quote_identifiers=False) + + assert_model_evaluation( + lambda: context.run(), was_evaluated=True, day_delta=1, model_evaluations=3 + ) + + # Case 3: Mixed models are still evaluated if breaking changes are introduced + create_model( + "child_model2", + schema, + f"SELECT col1 * new_col FROM {parent_model_name}, {external_table}", + tmp_path, + ) + + context.load() + + prod_plan_ts_2, prod_plan_2 = assert_model_evaluation( + lambda: context.plan(auto_apply=True, no_prompts=True), + day_delta=1, + model_evaluations=1, + ) + + assert prod_plan_2.context_diff.modified_snapshots + + assert_snapshot_last_altered_ts( + context, next(iter(prod_plan_2.context_diff.new_snapshots)), last_altered_ts=prod_plan_ts_2 + ) + + +def test_missing_external_model_freshness(ctx: TestContext, tmp_path: pathlib.Path): + """ + Scenario: Freshness for a model that depends on an external model that is missing + """ + adapter = ctx.engine_adapter + context, schema, (external_table,) = initialize_context(ctx, tmp_path) + + # Create model that depends on the external model + create_model( + "new_model", + schema, + f"SELECT * FROM {external_table}", + tmp_path, + ) + + context.load() + context.plan(auto_apply=True, no_prompts=True) + + # Case: By dropping the external table, the freshness signal should raise an error + # instead of silently succeeding/failing + adapter.execute(f"DROP TABLE {external_table}", quote_identifiers=False) + + with time_machine.travel(now() + timedelta(days=1)): + with pytest.raises(SignalEvalError): + context.run() + + +@use_terminal_console +def test_check_ready_intervals(ctx: TestContext, tmp_path: pathlib.Path): + """ + Scenario: Ensure that freshness evaluates the "ready" intervals of the parent snapshots i.e their + missing intervals plus their signals applied. + + """ + + def _write_user_signal(signal: str, tmp_path: pathlib.Path): + signal_code = f""" +import typing as t +from sqlmesh import signal + +@signal() +{signal} + """ + + test_signals = tmp_path / "signals/test_signals.py" + test_signals.parent.mkdir(parents=True, exist_ok=True) + test_signals.write_text(signal_code) + + context, schema, _ = initialize_context(ctx, tmp_path, num_external_models=0) + + _write_user_signal( + """ +def my_signal(batch): + return True + """, + tmp_path, + ) + + # Parent model depends on a custom signal + parent_model, _ = create_model( + "parent_model", + schema, + f"SELECT 1 AS col", + tmp_path, + signals="my_signal()", + ) + + # Create a new model that depends on the parent model + create_model( + "child_model", + schema, + f"SELECT * FROM {parent_model}", + tmp_path, + ) + + # Case 1: Both models are evaluated when introduced in a plan and subsequent runs, + # given that `my_signal()` always returns True. + context.load() + context.plan(auto_apply=True, no_prompts=True) + + assert_model_evaluation( + lambda: context.run(), + day_delta=2, + model_evaluations=2, + ) + + # Case 2: By changing the signal to return False, both models should not be evaluated. + _write_user_signal( + """ +def my_signal(batch): + return False + """, + tmp_path, + ) + + context.load() + context.plan(auto_apply=True, no_prompts=True) + + assert_model_evaluation( + lambda: context.run(), + day_delta=3, + was_evaluated=False, + ) + + +@use_terminal_console +def test_registered_and_unregistered_external_models( + ctx: TestContext, tmp_path: pathlib.Path, mocker: MockerFixture +): + """ + Scenario: Ensure that external models are queried for their last modified timestamp + regardless of whether they are present in the "external_models.yaml" file (registered) or not (unregistered) + """ + + adapter = ctx.engine_adapter + context, schema, (registered_external_table,) = initialize_context( + ctx, tmp_path, num_external_models=1 + ) + + current_catalog = ctx.engine_adapter.get_current_catalog() + + def normalize_external_table_name(external_table_name) -> str: + from sqlglot import exp + + normalized = exp.normalize_table_name( + f"{current_catalog}.{external_table_name}", dialect=ctx.dialect + ) + return exp.table_name(normalized, dialect=ctx.dialect, identify=True) + + unregistered_external_table = f"{schema}.unregistered_external_table" + + adapter.execute( + f"CREATE TABLE {unregistered_external_table} AS (SELECT 1 AS col)", + quote_identifiers=False, + ) + + create_model( + "new_model", + schema, + f"SELECT * FROM {unregistered_external_table}, {registered_external_table}", + tmp_path, + ) + + context.load() + context.plan(auto_apply=True, no_prompts=True) + + spy = mocker.spy( + sqlmesh.core.engine_adapter.SnowflakeEngineAdapter, "get_table_last_modified_ts" + ) + assert_model_evaluation( + lambda: context.run(), + day_delta=1, + was_evaluated=False, + ) + + assert spy.call_args_list + + # The first argument of "get_table_last_modified_ts" is a list of external table names in normalized form + # Ensure that this contains both external tables (registered and unregistered) + assert sorted(spy.call_args[0][1]) == sorted( + [ + normalize_external_table_name(registered_external_table), + normalize_external_table_name(unregistered_external_table), + ] + ) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index e02877e0c6..1fba346db3 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -11,6 +11,9 @@ from unittest.mock import patch import logging + +import time_machine + import numpy as np # noqa: TID253 import pandas as pd # noqa: TID253 import pytest @@ -22,6 +25,7 @@ from sqlmesh import Config, Context from sqlmesh.cli.project_init import init_example_project +from sqlmesh.core.config.common import VirtualEnvironmentMode from sqlmesh.core.config.connection import ConnectionConfig import sqlmesh.core.dialect as d from sqlmesh.core.environment import EnvironmentSuffixTarget @@ -1938,7 +1942,12 @@ def test_transaction(ctx: TestContext): ctx.compare_with_current(table, input_data) -def test_sushi(ctx: TestContext, tmp_path: pathlib.Path): +@pytest.mark.parametrize( + "virtual_environment_mode", [VirtualEnvironmentMode.FULL, VirtualEnvironmentMode.DEV_ONLY] +) +def test_sushi( + ctx: TestContext, tmp_path: pathlib.Path, virtual_environment_mode: VirtualEnvironmentMode +): if ctx.mark == "athena_hive": pytest.skip( "Sushi end-to-end tests only need to run once for Athena because sushi needs a hybrid of both Hive and Iceberg" @@ -1984,6 +1993,7 @@ def _mutate_config(gateway: str, config: Config) -> None: ).sql(dialect=config.model_defaults.dialect) for e in before_all ] + config.virtual_environment_mode = virtual_environment_mode context = ctx.create_context(_mutate_config, path=tmp_path, ephemeral_state_connection=False) @@ -3767,7 +3777,7 @@ def _set_config(gateway: str, config: Config) -> None: ] -def test_materialized_view_evaluation(ctx: TestContext, mocker: MockerFixture): +def test_materialized_view_evaluation(ctx: TestContext): adapter = ctx.engine_adapter dialect = ctx.dialect @@ -3827,3 +3837,246 @@ def _assert_mview_value(value: int): assert any("Replacing view" in call[0][0] for call in mock_logger.call_args_list) _assert_mview_value(value=2) + + +def test_unicode_characters(ctx: TestContext, tmp_path: Path): + # Engines that don't quote identifiers in views are incompatible with unicode characters in model names + # at the time of writing this is Spark/Trino and they do this for compatibility reasons. + # I also think Spark may not support unicode in general but that would need to be verified. + if not ctx.engine_adapter.QUOTE_IDENTIFIERS_IN_VIEWS: + pytest.skip("Skipping as these engines have issues with unicode characters in model names") + + model_name = "客户数据" + table = ctx.table(model_name).sql(dialect=ctx.dialect) + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name {table}, + kind FULL, + dialect '{ctx.dialect}' + ); + SELECT 1 as id + """ + + (tmp_path / "models" / "客户数据.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + context.plan(auto_apply=True, no_prompts=True) + + results = ctx.get_metadata_results() + assert len(results.views) == 1 + assert results.views[0].lower() == model_name + + schema = d.to_schema(ctx.schema(), dialect=ctx.dialect) + schema_name = schema.args["db"].this + schema.args["db"].set("this", "sqlmesh__" + schema_name) + table_results = ctx.get_metadata_results(schema) + assert len(table_results.tables) == 1 + assert table_results.tables[0].lower().startswith(schema_name.lower() + "________") + + +def test_sync_grants_config(ctx: TestContext) -> None: + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + table = ctx.table("sync_grants_integration") + select_privilege = ctx.get_select_privilege() + insert_privilege = ctx.get_insert_privilege() + update_privilege = ctx.get_update_privilege() + with ctx.create_users_or_roles("reader", "writer", "admin") as roles: + ctx.engine_adapter.create_table(table, {"id": exp.DataType.build("INT")}) + + initial_grants = { + select_privilege: [roles["reader"]], + insert_privilege: [roles["writer"]], + } + ctx.engine_adapter.sync_grants_config(table, initial_grants) + + current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert set(current_grants.get(select_privilege, [])) == {roles["reader"]} + assert set(current_grants.get(insert_privilege, [])) == {roles["writer"]} + + target_grants = { + select_privilege: [roles["writer"], roles["admin"]], + update_privilege: [roles["admin"]], + } + ctx.engine_adapter.sync_grants_config(table, target_grants) + + synced_grants = ctx.engine_adapter._get_current_grants_config(table) + assert set(synced_grants.get(select_privilege, [])) == { + roles["writer"], + roles["admin"], + } + assert set(synced_grants.get(update_privilege, [])) == {roles["admin"]} + assert synced_grants.get(insert_privilege, []) == [] + + +def test_grants_sync_empty_config(ctx: TestContext): + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + table = ctx.table("grants_empty_test") + select_privilege = ctx.get_select_privilege() + insert_privilege = ctx.get_insert_privilege() + with ctx.create_users_or_roles("user") as roles: + ctx.engine_adapter.create_table(table, {"id": exp.DataType.build("INT")}) + + initial_grants = { + select_privilege: [roles["user"]], + insert_privilege: [roles["user"]], + } + ctx.engine_adapter.sync_grants_config(table, initial_grants) + + initial_current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert roles["user"] in initial_current_grants.get(select_privilege, []) + assert roles["user"] in initial_current_grants.get(insert_privilege, []) + + ctx.engine_adapter.sync_grants_config(table, {}) + + final_grants = ctx.engine_adapter._get_current_grants_config(table) + assert final_grants == {} + + +def test_grants_case_insensitive_grantees(ctx: TestContext): + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + with ctx.create_users_or_roles("reader", "writer") as roles: + table = ctx.table("grants_quoted_test") + ctx.engine_adapter.create_table(table, {"id": exp.DataType.build("INT")}) + + reader = roles["reader"] + writer = roles["writer"] + select_privilege = ctx.get_select_privilege() + + if ctx.dialect == "bigquery": + # BigQuery labels are case sensitive, e.g. serviceAccount + lablel, grantee = writer.split(":", 1) + upper_case_writer = f"{lablel}:{grantee.upper()}" + else: + upper_case_writer = writer.upper() + + grants_config = {select_privilege: [reader, upper_case_writer]} + ctx.engine_adapter.sync_grants_config(table, grants_config) + + # Grantees are still in lowercase + current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert reader in current_grants.get(select_privilege, []) + assert writer in current_grants.get(select_privilege, []) + + # Revoke writer + grants_config = {select_privilege: [reader.upper()]} + ctx.engine_adapter.sync_grants_config(table, grants_config) + + current_grants = ctx.engine_adapter._get_current_grants_config(table) + assert reader in current_grants.get(select_privilege, []) + assert writer not in current_grants.get(select_privilege, []) + + +def test_grants_plan(ctx: TestContext, tmp_path: Path): + if not ctx.engine_adapter.SUPPORTS_GRANTS: + pytest.skip( + f"Skipping Test since engine adapter {ctx.engine_adapter.dialect} doesn't support grants" + ) + + table = ctx.table("grant_model").sql(dialect="duckdb") + select_privilege = ctx.get_select_privilege() + insert_privilege = ctx.get_insert_privilege() + with ctx.create_users_or_roles("analyst", "etl_user") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name {table}, + kind FULL, + grants ( + '{select_privilege}' = ['{roles["analyst"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, CURRENT_DATE as created_date + """ + + (tmp_path / "models" / "grant_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + + # Physical layer w/ grants + table_name = snapshot.table_name() + view_name = snapshot.qualified_view_name.for_environment( + plan_result.environment_naming_info, dialect=ctx.dialect + ) + current_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=ctx.dialect) + ) + assert current_grants == {select_privilege: [roles["analyst"]]} + + # Virtual layer (view) w/ grants + virtual_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=ctx.dialect) + ) + assert virtual_grants == {select_privilege: [roles["analyst"]]} + + # Update model with query change and new grants + updated_model = load_sql_based_model( + d.parse( + f""" + MODEL ( + name {table}, + kind FULL, + grants ( + '{select_privilege}' = ['{roles["analyst"]}', '{roles["etl_user"]}'], + '{insert_privilege}' = ['{roles["etl_user"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, CURRENT_DATE as created_date, 'v2' as version + """, + default_dialect=context.default_dialect, + ), + dialect=context.default_dialect, + ) + context.upsert_model(updated_model) + + plan = context.plan(auto_apply=True, no_prompts=True) + plan_result = PlanResults.create(plan, ctx, ctx.add_test_suffix(TEST_SCHEMA)) + assert len(plan_result.plan.directly_modified) == 1 + + new_snapshot = plan_result.snapshot_for(updated_model) + assert new_snapshot is not None + + new_table_name = new_snapshot.table_name() + final_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(new_table_name, dialect=ctx.dialect) + ) + expected_final_grants = { + select_privilege: [roles["analyst"], roles["etl_user"]], + insert_privilege: [roles["etl_user"]], + } + assert set(final_grants.get(select_privilege, [])) == set( + expected_final_grants[select_privilege] + ) + assert final_grants.get(insert_privilege, []) == expected_final_grants[insert_privilege] + + # Virtual layer should also have the updated grants + updated_virtual_grants = ctx.engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=ctx.dialect) + ) + assert set(updated_virtual_grants.get(select_privilege, [])) == set( + expected_final_grants[select_privilege] + ) + assert ( + updated_virtual_grants.get(insert_privilege, []) + == expected_final_grants[insert_privilege] + ) diff --git a/tests/core/engine_adapter/integration/test_integration_athena.py b/tests/core/engine_adapter/integration/test_integration_athena.py index 1c0ece6d78..9d23af206e 100644 --- a/tests/core/engine_adapter/integration/test_integration_athena.py +++ b/tests/core/engine_adapter/integration/test_integration_athena.py @@ -378,7 +378,7 @@ def test_insert_overwrite_by_time_partition_date_type( ), # note: columns_to_types_from_df() would infer this as TEXT but we need a DATE type } - def time_formatter(time: TimeLike, _: t.Optional[t.Dict[str, exp.DataType]]) -> exp.Expression: + def time_formatter(time: TimeLike, _: t.Optional[t.Dict[str, exp.DataType]]) -> exp.Expr: return exp.cast(exp.Literal.string(to_ds(time)), "date") engine_adapter.create_table( @@ -440,7 +440,7 @@ def test_insert_overwrite_by_time_partition_datetime_type( ), # note: columns_to_types_from_df() would infer this as TEXT but we need a DATETIME type } - def time_formatter(time: TimeLike, _: t.Optional[t.Dict[str, exp.DataType]]) -> exp.Expression: + def time_formatter(time: TimeLike, _: t.Optional[t.Dict[str, exp.DataType]]) -> exp.Expr: return exp.cast(exp.Literal.string(to_ts(time)), "datetime") engine_adapter.create_table( diff --git a/tests/core/engine_adapter/integration/test_integration_clickhouse.py b/tests/core/engine_adapter/integration/test_integration_clickhouse.py index f09360c673..4420acec71 100644 --- a/tests/core/engine_adapter/integration/test_integration_clickhouse.py +++ b/tests/core/engine_adapter/integration/test_integration_clickhouse.py @@ -64,9 +64,7 @@ def _create_table_and_insert_existing_data( "ds": exp.DataType.build("Date", "clickhouse"), }, table_name: str = "data_existing", - partitioned_by: t.Optional[t.List[exp.Expression]] = [ - parse_one("toMonth(ds)", dialect="clickhouse") - ], + partitioned_by: t.Optional[t.List[exp.Expr]] = [parse_one("toMonth(ds)", dialect="clickhouse")], ) -> exp.Table: existing_data = existing_data existing_table_name: exp.Table = ctx.table(table_name) diff --git a/tests/core/engine_adapter/integration/test_integration_postgres.py b/tests/core/engine_adapter/integration/test_integration_postgres.py index 26b8cbda42..f236fdebce 100644 --- a/tests/core/engine_adapter/integration/test_integration_postgres.py +++ b/tests/core/engine_adapter/integration/test_integration_postgres.py @@ -1,9 +1,11 @@ import typing as t +from contextlib import contextmanager import pytest from pytest import FixtureRequest from pathlib import Path from sqlmesh.core.engine_adapter import PostgresEngineAdapter from sqlmesh.core.config import Config, DuckDBConnectionConfig +from sqlmesh.core.config.common import VirtualEnvironmentMode from tests.core.engine_adapter.integration import TestContext import time_machine from datetime import timedelta @@ -12,6 +14,7 @@ from sqlmesh.core.context import Context from sqlmesh.core.state_sync import CachingStateSync, EngineAdapterStateSync from sqlmesh.core.snapshot.definition import SnapshotId +from sqlmesh.utils import random_id from tests.core.engine_adapter.integration import ( TestContext, @@ -22,6 +25,87 @@ ) +def _cleanup_user(engine_adapter: PostgresEngineAdapter, user_name: str) -> None: + """Helper function to clean up a PostgreSQL user and all their dependencies.""" + try: + engine_adapter.execute(f""" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE usename = '{user_name}' AND pid <> pg_backend_pid() + """) + engine_adapter.execute(f'DROP OWNED BY "{user_name}"') + engine_adapter.execute(f'DROP USER IF EXISTS "{user_name}"') + except Exception: + pass + + +@contextmanager +def create_users( + engine_adapter: PostgresEngineAdapter, *role_names: str +) -> t.Iterator[t.Dict[str, t.Dict[str, str]]]: + """Create a set of Postgres users and yield their credentials.""" + created_users = [] + roles = {} + + try: + for role_name in role_names: + user_name = f"test_{role_name}" + _cleanup_user(engine_adapter, user_name) + + for role_name in role_names: + user_name = f"test_{role_name}" + password = random_id() + engine_adapter.execute(f"CREATE USER \"{user_name}\" WITH PASSWORD '{password}'") + engine_adapter.execute(f'GRANT USAGE ON SCHEMA public TO "{user_name}"') + created_users.append(user_name) + roles[role_name] = {"username": user_name, "password": password} + + yield roles + + finally: + for user_name in created_users: + _cleanup_user(engine_adapter, user_name) + + +def create_engine_adapter_for_role( + role_credentials: t.Dict[str, str], ctx: TestContext, config: Config +) -> PostgresEngineAdapter: + """Create a PostgreSQL adapter for a specific role to test authentication and permissions.""" + from sqlmesh.core.config import PostgresConnectionConfig + + gateway = ctx.gateway + assert gateway in config.gateways + connection_config = config.gateways[gateway].connection + assert isinstance(connection_config, PostgresConnectionConfig) + + role_connection_config = PostgresConnectionConfig( + host=connection_config.host, + port=connection_config.port, + database=connection_config.database, + user=role_credentials["username"], + password=role_credentials["password"], + keepalives_idle=connection_config.keepalives_idle, + connect_timeout=connection_config.connect_timeout, + role=connection_config.role, + sslmode=connection_config.sslmode, + application_name=connection_config.application_name, + ) + + return t.cast(PostgresEngineAdapter, role_connection_config.create_engine_adapter()) + + +@contextmanager +def engine_adapter_for_role( + role_credentials: t.Dict[str, str], ctx: TestContext, config: Config +) -> t.Iterator[PostgresEngineAdapter]: + """Context manager that yields a PostgresEngineAdapter and ensures it is closed.""" + adapter = create_engine_adapter_for_role(role_credentials, ctx, config) + try: + yield adapter + finally: + adapter.close() + + @pytest.fixture(params=list(generate_pytest_params(ENGINES_BY_NAME["postgres"]))) def ctx( request: FixtureRequest, @@ -286,3 +370,857 @@ def _mutate_config(gateway: str, config: Config): assert after_objects.views == [ exp.to_table(model_b_prod_snapshot.table_name()).text("this") ] + + +# Grants Integration Tests + + +def test_grants_plan_target_layer_physical_only( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = """ + MODEL ( + name test_schema.physical_grants_model, + kind FULL, + grants ( + 'select' = ['test_reader'] + ), + grants_target_layer 'physical' + ); + SELECT 1 as id, 'physical_only' as layer + """ + + (tmp_path / "models" / "physical_grants_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + physical_table_name = snapshot.table_name() + + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert physical_grants == {"SELECT": [roles["reader"]["username"]]} + + # Virtual layer should have no grants + virtual_view_name = f"test_schema.physical_grants_model" + virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert virtual_grants == {} + + +def test_grants_plan_target_layer_virtual_only( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "viewer") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = """ + MODEL ( + name test_schema.virtual_grants_model, + kind FULL, + grants ( + 'select' = ['test_viewer'] + ), + grants_target_layer 'virtual' + ); + SELECT 1 as id, 'virtual_only' as layer + """ + + (tmp_path / "models" / "virtual_grants_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + physical_table_name = snapshot.table_name() + + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + # Physical table should have no grants + assert physical_grants == {} + + virtual_view_name = f"test_schema.virtual_grants_model" + virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert virtual_grants == {"SELECT": [roles["viewer"]["username"]]} + + +def test_grants_plan_full_refresh_model_via_replace( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + (tmp_path / "models" / "full_refresh_model.sql").write_text( + f""" + MODEL ( + name test_schema.full_refresh_model, + kind FULL, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'test_data' as status + """ + ) + + context = ctx.create_context(path=tmp_path) + + plan_result = context.plan( + "dev", # this triggers _replace_query_for_model for FULL models + auto_apply=True, + no_prompts=True, + ) + + assert len(plan_result.new_snapshots) == 1 + snapshot = plan_result.new_snapshots[0] + table_name = snapshot.table_name() + + # Physical table + grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert grants == {"SELECT": [roles["reader"]["username"]]} + + # Virtual view + dev_view_name = "test_schema__dev.full_refresh_model" + dev_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_view_name, dialect=engine_adapter.dialect) + ) + assert dev_grants == {"SELECT": [roles["reader"]["username"]]} + + +def test_grants_plan_incremental_model( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader", "writer") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_name = "incr_model" + model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, @start_ds::timestamp as ts, 'data' as value + """ + + (tmp_path / "models" / f"{model_name}.sql").write_text(model_definition) + + context = ctx.create_context(path=tmp_path) + + plan_result = context.plan( + "dev", start="2020-01-01", end="2020-01-01", auto_apply=True, no_prompts=True + ) + assert len(plan_result.new_snapshots) == 1 + + snapshot = plan_result.new_snapshots[0] + table_name = snapshot.table_name() + + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert physical_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert physical_grants.get("INSERT", []) == [roles["writer"]["username"]] + + view_name = f"test_schema__dev.{model_name}" + view_grants = engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=engine_adapter.dialect) + ) + assert view_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert view_grants.get("INSERT", []) == [roles["writer"]["username"]] + + +def test_grants_plan_clone_environment( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "reader") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + (tmp_path / "models" / "clone_model.sql").write_text( + f""" + MODEL ( + name test_schema.clone_model, + kind FULL, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'] + ), + grants_target_layer 'all' + ); + + SELECT 1 as id, 'data' as value + """ + ) + + context = ctx.create_context(path=tmp_path) + prod_plan_result = context.plan("prod", auto_apply=True, no_prompts=True) + + assert len(prod_plan_result.new_snapshots) == 1 + prod_snapshot = prod_plan_result.new_snapshots[0] + prod_table_name = prod_snapshot.table_name() + + # Prod physical table grants + prod_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_table_name, dialect=engine_adapter.dialect) + ) + assert prod_grants == {"SELECT": [roles["reader"]["username"]]} + + # Prod virtual view grants + prod_view_name = f"test_schema.clone_model" + prod_view_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_view_name, dialect=engine_adapter.dialect) + ) + assert prod_view_grants == {"SELECT": [roles["reader"]["username"]]} + + # Create dev environment (cloned from prod) + context.plan("dev", auto_apply=True, no_prompts=True, include_unmodified=True) + + # Physical table grants should remain unchanged + prod_grants_after_clone = engine_adapter._get_current_grants_config( + exp.to_table(prod_table_name, dialect=engine_adapter.dialect) + ) + assert prod_grants_after_clone == prod_grants + + # Dev virtual view should have the same grants as prod + dev_view_name = f"test_schema__dev.clone_model" + dev_view_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_view_name, dialect=engine_adapter.dialect) + ) + assert dev_view_grants == prod_grants + + +@pytest.mark.parametrize( + "model_name,kind_config,query,extra_config,needs_seed", + [ + ( + "grants_full", + "FULL", + "SELECT 1 as id, 'unchanged_query' as data", + "", + False, + ), + ( + "grants_view", + "VIEW", + "SELECT 1 as id, 'unchanged_query' as data", + "", + False, + ), + ( + "grants_incr_time", + "INCREMENTAL_BY_TIME_RANGE (time_column event_date)", + "SELECT '2025-09-01'::date as event_date, 1 as id, 'unchanged_query' as data", + "start '2025-09-01',", + False, + ), + ( + "grants_seed", + "SEED (path '../seeds/grants_seed.csv')", + "", + "", + True, + ), + ], +) +def test_grants_metadata_only_changes( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + model_name: str, + kind_config: str, + query: str, + extra_config: str, + needs_seed: bool, +): + with create_users(engine_adapter, "reader", "writer", "admin") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + if needs_seed: + (tmp_path / "seeds").mkdir(exist_ok=True) + csv_content = "id,data\\n1,unchanged_query" + (tmp_path / "seeds" / f"{model_name}.csv").write_text(csv_content) + + initial_model_def = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + {extra_config} + grants ( + 'select' = ['{roles["reader"]["username"]}'] + ), + grants_target_layer 'all' + ); + {query} + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(initial_model_def) + + context = ctx.create_context(path=tmp_path) + initial_plan_result = context.plan(auto_apply=True, no_prompts=True) + + assert len(initial_plan_result.new_snapshots) == 1 + initial_snapshot = initial_plan_result.new_snapshots[0] + + physical_table_name = initial_snapshot.table_name() + virtual_view_name = f"test_schema.{model_name}" + + initial_physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert initial_physical_grants == {"SELECT": [roles["reader"]["username"]]} + + initial_virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert initial_virtual_grants == {"SELECT": [roles["reader"]["username"]]} + + # Metadata-only change: update grants only using upsert_model + existing_model = context.get_model(f"test_schema.{model_name}") + context.upsert_model( + existing_model, + grants={ + "select": [roles["writer"]["username"], roles["admin"]["username"]], + "insert": [roles["admin"]["username"]], + }, + ) + second_plan_result = context.plan(auto_apply=True, no_prompts=True) + + expected_grants = { + "SELECT": [roles["writer"]["username"], roles["admin"]["username"]], + "INSERT": [roles["admin"]["username"]], + } + + # For seed models, grant changes rebuild the entire table, so it will create a new physical table + if model_name == "grants_seed" and second_plan_result.new_snapshots: + updated_snapshot = second_plan_result.new_snapshots[0] + physical_table_name = updated_snapshot.table_name() + + updated_physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert set(updated_physical_grants.get("SELECT", [])) == set(expected_grants["SELECT"]) + assert updated_physical_grants.get("INSERT", []) == expected_grants["INSERT"] + + updated_virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view_name, dialect=engine_adapter.dialect) + ) + assert set(updated_virtual_grants.get("SELECT", [])) == set(expected_grants["SELECT"]) + assert updated_virtual_grants.get("INSERT", []) == expected_grants["INSERT"] + + +def _vde_dev_only_config(gateway: str, config: Config) -> None: + config.virtual_environment_mode = VirtualEnvironmentMode.DEV_ONLY + + +@pytest.mark.parametrize( + "grants_target_layer,model_kind", + [ + ("virtual", "FULL"), + ("physical", "FULL"), + ("all", "FULL"), + ("virtual", "VIEW"), + ("physical", "VIEW"), + ], +) +def test_grants_target_layer_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + grants_target_layer: str, + model_kind: str, +): + with create_users(engine_adapter, "reader", "writer") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + if model_kind == "VIEW": + grants_config = ( + f"'SELECT' = ['{roles['reader']['username']}', '{roles['writer']['username']}']" + ) + else: + grants_config = f""" + 'SELECT' = ['{roles["reader"]["username"]}', '{roles["writer"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + """.strip() + + model_def = f""" + MODEL ( + name test_schema.vde_model_{grants_target_layer}_{model_kind.lower()}, + kind {model_kind}, + grants ( + {grants_config} + ), + grants_target_layer '{grants_target_layer}' + ); + SELECT 1 as id, '{grants_target_layer}_{model_kind}' as test_type + """ + ( + tmp_path / "models" / f"vde_model_{grants_target_layer}_{model_kind.lower()}.sql" + ).write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + context.plan("prod", auto_apply=True, no_prompts=True) + + table_name = f"test_schema.vde_model_{grants_target_layer}_{model_kind.lower()}" + + # In VDE dev_only mode, VIEWs are created as actual views + assert context.engine_adapter.table_exists(table_name) + + grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["reader"]["username"] in grants.get("SELECT", []) + assert roles["writer"]["username"] in grants.get("SELECT", []) + + if model_kind != "VIEW": + assert roles["writer"]["username"] in grants.get("INSERT", []) + + +def test_grants_incremental_model_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, ctx: TestContext, tmp_path: Path +): + with create_users(engine_adapter, "etl", "analyst") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name test_schema.vde_incremental_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column event_date + ), + grants ( + 'SELECT' = ['{roles["analyst"]["username"]}'], + 'INSERT' = ['{roles["etl"]["username"]}'] + ), + grants_target_layer 'virtual' + ); + SELECT + 1 as id, + @start_date::date as event_date, + 'event' as event_type + """ + (tmp_path / "models" / "vde_incremental_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + context.plan("prod", auto_apply=True, no_prompts=True) + + prod_table = "test_schema.vde_incremental_model" + prod_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_table, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in prod_grants.get("SELECT", []) + assert roles["etl"]["username"] in prod_grants.get("INSERT", []) + + +@pytest.mark.parametrize( + "change_type,initial_query,updated_query,expect_schema_change", + [ + # Metadata-only change (grants only) + ( + "metadata_only", + "SELECT 1 as id, 'same' as status", + "SELECT 1 as id, 'same' as status", + False, + ), + # Breaking change only + ( + "breaking_only", + "SELECT 1 as id, 'initial' as status, 100 as amount", + "SELECT 1 as id, 'updated' as status", # Removed column + True, + ), + # Both metadata and breaking changes + ( + "metadata_and_breaking", + "SELECT 1 as id, 'initial' as status, 100 as amount", + "SELECT 2 as id, 'changed' as new_status", # Different schema + True, + ), + ], +) +def test_grants_changes_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + change_type: str, + initial_query: str, + updated_query: str, + expect_schema_change: bool, +): + with create_users(engine_adapter, "user1", "user2", "user3") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + model_path = tmp_path / "models" / f"vde_changes_{change_type}.sql" + + initial_model = f""" + MODEL ( + name test_schema.vde_changes_{change_type}, + kind FULL, + grants ( + 'SELECT' = ['{roles["user1"]["username"]}'] + ), + grants_target_layer 'virtual' + ); + {initial_query} + """ + model_path.write_text(initial_model) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + context.plan("prod", auto_apply=True, no_prompts=True) + + table_name = f"test_schema.vde_changes_{change_type}" + initial_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["user1"]["username"] in initial_grants.get("SELECT", []) + assert roles["user2"]["username"] not in initial_grants.get("SELECT", []) + + # Update model with new grants and potentially new query + updated_model = f""" + MODEL ( + name test_schema.vde_changes_{change_type}, + kind FULL, + grants ( + 'SELECT' = ['{roles["user1"]["username"]}', '{roles["user2"]["username"]}', '{roles["user3"]["username"]}'], + 'INSERT' = ['{roles["user3"]["username"]}'] + ), + grants_target_layer 'virtual' + ); + {updated_query} + """ + model_path.write_text(updated_model) + + # Get initial table columns + initial_columns = set( + col[0] + for col in engine_adapter.fetchall( + f"SELECT column_name FROM information_schema.columns WHERE table_schema = 'test_schema' AND table_name = 'vde_changes_{change_type}'" + ) + ) + + context.load() + plan = context.plan("prod", auto_apply=True, no_prompts=True) + + assert len(plan.new_snapshots) == 1 + + current_columns = set( + col[0] + for col in engine_adapter.fetchall( + f"SELECT column_name FROM information_schema.columns WHERE table_schema = 'test_schema' AND table_name = 'vde_changes_{change_type}'" + ) + ) + + if expect_schema_change: + assert current_columns != initial_columns + else: + # For metadata-only changes, schema should be the same + assert current_columns == initial_columns + + # Grants should be updated in all cases + updated_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["user1"]["username"] in updated_grants.get("SELECT", []) + assert roles["user2"]["username"] in updated_grants.get("SELECT", []) + assert roles["user3"]["username"] in updated_grants.get("SELECT", []) + assert roles["user3"]["username"] in updated_grants.get("INSERT", []) + + +@pytest.mark.parametrize( + "grants_target_layer,environment", + [ + ("virtual", "prod"), + ("virtual", "dev"), + ("physical", "prod"), + ("physical", "staging"), + ("all", "prod"), + ("all", "preview"), + ], +) +def test_grants_target_layer_plan_env_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + grants_target_layer: str, + environment: str, +): + with create_users(engine_adapter, "grantee") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name test_schema.vde_layer_model, + kind FULL, + grants ( + 'SELECT' = ['{roles["grantee"]["username"]}'] + ), + grants_target_layer '{grants_target_layer}' + ); + SELECT 1 as id, '{environment}' as env, '{grants_target_layer}' as layer + """ + (tmp_path / "models" / "vde_layer_model.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + + if environment == "prod": + context.plan("prod", auto_apply=True, no_prompts=True) + table_name = "test_schema.vde_layer_model" + grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] in grants.get("SELECT", []) + else: + context.plan(environment, auto_apply=True, no_prompts=True, include_unmodified=True) + virtual_view = f"test_schema__{environment}.vde_layer_model" + assert context.engine_adapter.table_exists(virtual_view) + virtual_grants = engine_adapter._get_current_grants_config( + exp.to_table(virtual_view, dialect=engine_adapter.dialect) + ) + + data_objects = engine_adapter.get_data_objects("sqlmesh__test_schema") + physical_tables = [ + obj + for obj in data_objects + if "vde_layer_model" in obj.name + and obj.name.endswith("__dev") # Always __dev suffix in VDE dev_only + and "TABLE" in str(obj.type).upper() + ] + + if grants_target_layer == "virtual": + # Virtual layer should have grants, physical should not + assert roles["grantee"]["username"] in virtual_grants.get("SELECT", []) + + assert len(physical_tables) > 0 + for physical_table in physical_tables: + physical_table_name = f"sqlmesh__test_schema.{physical_table.name}" + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] not in physical_grants.get("SELECT", []) + + elif grants_target_layer == "physical": + # Virtual layer should not have grants, physical should + assert roles["grantee"]["username"] not in virtual_grants.get("SELECT", []) + + assert len(physical_tables) > 0 + for physical_table in physical_tables: + physical_table_name = f"sqlmesh__test_schema.{physical_table.name}" + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] in physical_grants.get("SELECT", []) + + else: # grants_target_layer == "all" + # Both layers should have grants + assert roles["grantee"]["username"] in virtual_grants.get("SELECT", []) + assert len(physical_tables) > 0 + for physical_table in physical_tables: + physical_table_name = f"sqlmesh__test_schema.{physical_table.name}" + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["grantee"]["username"] in physical_grants.get("SELECT", []) + + +@pytest.mark.parametrize( + "model_kind", + [ + "SCD_TYPE_2", + "SCD_TYPE_2_BY_TIME", + ], +) +def test_grants_plan_scd_type_2_models( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + model_kind: str, +): + with create_users(engine_adapter, "reader", "writer", "analyst") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + model_name = "scd_model" + + kind_config = f"{model_kind} (unique_key [id])" + model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'initial_data' as name, CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(model_definition) + + context = ctx.create_context(path=tmp_path) + plan_result = context.plan( + "dev", start="2023-01-01", end="2023-01-01", auto_apply=True, no_prompts=True + ) + assert len(plan_result.new_snapshots) == 1 + + current_snapshot = plan_result.new_snapshots[0] + fingerprint_version = current_snapshot.fingerprint.to_version() + physical_table_name = ( + f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint_version}__dev" + ) + physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(physical_table_name, dialect=engine_adapter.dialect) + ) + assert physical_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert physical_grants.get("INSERT", []) == [roles["writer"]["username"]] + + view_name = f"test_schema__dev.{model_name}" + view_grants = engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=engine_adapter.dialect) + ) + assert view_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert view_grants.get("INSERT", []) == [roles["writer"]["username"]] + + # Data change + updated_model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'updated_data' as name, CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(updated_model_definition) + + context.load() + context.plan("dev", start="2023-01-02", end="2023-01-02", auto_apply=True, no_prompts=True) + + snapshot = context.get_snapshot(f"test_schema.{model_name}") + assert snapshot + fingerprint = snapshot.fingerprint.to_version() + table_name = f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint}__dev" + data_change_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + assert data_change_grants.get("SELECT", []) == [roles["reader"]["username"]] + assert data_change_grants.get("INSERT", []) == [roles["writer"]["username"]] + + # Data + grants changes + grant_change_model_definition = f""" + MODEL ( + name test_schema.{model_name}, + kind {kind_config}, + grants ( + 'SELECT' = ['{roles["reader"]["username"]}', '{roles["analyst"]["username"]}'], + 'INSERT' = ['{roles["writer"]["username"]}'], + 'UPDATE' = ['{roles["analyst"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT 1 as id, 'grant_changed_data' as name, CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(grant_change_model_definition) + + context.load() + context.plan("dev", start="2023-01-03", end="2023-01-03", auto_apply=True, no_prompts=True) + + snapshot = context.get_snapshot(f"test_schema.{model_name}") + assert snapshot + fingerprint = snapshot.fingerprint.to_version() + table_name = f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint}__dev" + final_grants = engine_adapter._get_current_grants_config( + exp.to_table(table_name, dialect=engine_adapter.dialect) + ) + expected_select_users = {roles["reader"]["username"], roles["analyst"]["username"]} + assert set(final_grants.get("SELECT", [])) == expected_select_users + assert final_grants.get("INSERT", []) == [roles["writer"]["username"]] + assert final_grants.get("UPDATE", []) == [roles["analyst"]["username"]] + + final_view_grants = engine_adapter._get_current_grants_config( + exp.to_table(view_name, dialect=engine_adapter.dialect) + ) + assert set(final_view_grants.get("SELECT", [])) == expected_select_users + assert final_view_grants.get("INSERT", []) == [roles["writer"]["username"]] + assert final_view_grants.get("UPDATE", []) == [roles["analyst"]["username"]] + + +@pytest.mark.parametrize( + "model_kind", + [ + "SCD_TYPE_2", + "SCD_TYPE_2_BY_TIME", + ], +) +def test_grants_plan_scd_type_2_with_vde_dev_only( + engine_adapter: PostgresEngineAdapter, + ctx: TestContext, + tmp_path: Path, + model_kind: str, +): + with create_users(engine_adapter, "etl_user", "analyst") as roles: + (tmp_path / "models").mkdir(exist_ok=True) + model_name = "vde_scd_model" + + model_def = f""" + MODEL ( + name test_schema.{model_name}, + kind {model_kind} (unique_key [customer_id]), + grants ( + 'SELECT' = ['{roles["analyst"]["username"]}'], + 'INSERT' = ['{roles["etl_user"]["username"]}'] + ), + grants_target_layer 'all' + ); + SELECT + 1 as customer_id, + 'active' as status, + CURRENT_TIMESTAMP as updated_at + """ + (tmp_path / "models" / f"{model_name}.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path, config_mutator=_vde_dev_only_config) + + # Prod + context.plan("prod", auto_apply=True, no_prompts=True) + prod_table = f"test_schema.{model_name}" + prod_grants = engine_adapter._get_current_grants_config( + exp.to_table(prod_table, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in prod_grants.get("SELECT", []) + assert roles["etl_user"]["username"] in prod_grants.get("INSERT", []) + + # Dev + context.plan("dev", auto_apply=True, no_prompts=True, include_unmodified=True) + dev_view = f"test_schema__dev.{model_name}" + dev_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_view, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in dev_grants.get("SELECT", []) + assert roles["etl_user"]["username"] in dev_grants.get("INSERT", []) + + snapshot = context.get_snapshot(f"test_schema.{model_name}") + assert snapshot + fingerprint_version = snapshot.fingerprint.to_version() + dev_physical_table_name = ( + f"sqlmesh__test_schema.test_schema__{model_name}__{fingerprint_version}__dev" + ) + + dev_physical_grants = engine_adapter._get_current_grants_config( + exp.to_table(dev_physical_table_name, dialect=engine_adapter.dialect) + ) + assert roles["analyst"]["username"] in dev_physical_grants.get("SELECT", []) + assert roles["etl_user"]["username"] in dev_physical_grants.get("INSERT", []) diff --git a/tests/core/engine_adapter/integration/test_integration_snowflake.py b/tests/core/engine_adapter/integration/test_integration_snowflake.py index f9862c51cb..7f3c38be46 100644 --- a/tests/core/engine_adapter/integration/test_integration_snowflake.py +++ b/tests/core/engine_adapter/integration/test_integration_snowflake.py @@ -186,6 +186,7 @@ def _get_data_object(table: exp.Table) -> DataObject: assert not metadata.is_clustered +@pytest.mark.skip(reason="External volume LIST privileges not configured for CI test databases") def test_create_iceberg_table(ctx: TestContext) -> None: # Note: this test relies on a default Catalog and External Volume being configured in Snowflake # ref: https://docs.snowflake.com/en/user-guide/tables-iceberg-configure-catalog-integration#set-a-default-catalog-at-the-account-database-or-schema-level diff --git a/tests/core/engine_adapter/test_athena.py b/tests/core/engine_adapter/test_athena.py index 4fe57baf34..19c92f66ac 100644 --- a/tests/core/engine_adapter/test_athena.py +++ b/tests/core/engine_adapter/test_athena.py @@ -81,7 +81,7 @@ def table_diff(adapter: AthenaEngineAdapter) -> TableDiff: def test_table_location( adapter: AthenaEngineAdapter, config_s3_warehouse_location: t.Optional[str], - table_properties: t.Optional[t.Dict[str, exp.Expression]], + table_properties: t.Optional[t.Dict[str, exp.Expr]], table: exp.Table, expected_location: t.Optional[str], ) -> None: @@ -312,6 +312,7 @@ def test_replace_query(adapter: AthenaEngineAdapter, mocker: MockerFixture): ) mocker.patch.object(adapter, "_get_data_objects", return_value=[]) adapter.cursor.execute.reset_mock() + adapter._clear_data_object_cache() adapter.s3_warehouse_location = "s3://foo" adapter.replace_query( diff --git a/tests/core/engine_adapter/test_base.py b/tests/core/engine_adapter/test_base.py index b2dfcc7ccc..2b9bcc665f 100644 --- a/tests/core/engine_adapter/test_base.py +++ b/tests/core/engine_adapter/test_base.py @@ -13,7 +13,6 @@ from sqlmesh.core import dialect as d from sqlmesh.core.dialect import normalize_model_name from sqlmesh.core.engine_adapter import EngineAdapter, EngineAdapterWithIndexSupport -from sqlmesh.core.engine_adapter.mixins import InsertOverwriteWithMergeMixin from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, DataObject from sqlmesh.core.schema_diff import SchemaDiffer, TableAlterOperation, NestedSupport from sqlmesh.utils import columns_to_types_to_struct @@ -21,8 +20,6 @@ from sqlmesh.utils.errors import SQLMeshError, UnsupportedCatalogOperationError from tests.core.engine_adapter import to_sql_calls -if t.TYPE_CHECKING: - pass pytestmark = pytest.mark.engine @@ -482,7 +479,8 @@ def test_insert_overwrite_no_where(make_mocked_engine_adapter: t.Callable): def test_insert_overwrite_by_condition_column_contains_unsafe_characters( make_mocked_engine_adapter: t.Callable, mocker: MockerFixture ): - adapter = make_mocked_engine_adapter(InsertOverwriteWithMergeMixin) + adapter = make_mocked_engine_adapter(EngineAdapter) + adapter.INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.MERGE source_queries, columns_to_types = adapter._get_source_queries_and_columns_to_types( parse_one("SELECT 1 AS c"), None, target_table="test_table" @@ -3349,7 +3347,7 @@ def test_clone_table(make_mocked_engine_adapter: t.Callable): adapter.clone_table("target_table", "source_table") adapter.cursor.execute.assert_called_once_with( - "CREATE TABLE `target_table` CLONE `source_table`" + "CREATE TABLE IF NOT EXISTS `target_table` CLONE `source_table`" ) @@ -3697,3 +3695,478 @@ def test_casted_columns( assert [ x.sql() for x in EngineAdapter._casted_columns(columns_to_types, source_columns) ] == expected + + +def test_data_object_cache_get_data_objects( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + result1 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert len(result1) == 2 + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert len(result2) == 2 + assert mock_get_data_objects.call_count == 1 # Should not increase + + result3 = adapter.get_data_objects("test_schema", {"table1"}) + assert len(result3) == 1 + assert result3[0].name == "table1" + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_get_data_objects_bypasses_cache( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + assert adapter.get_data_objects("test_schema") + assert adapter.get_data_objects("test_schema", {"table1", "table2"}) + assert adapter.get_data_objects("test_schema", {"table1", "table2"}) + assert adapter.get_data_objects("test_schema", {"table1"}) + assert adapter.get_data_object("test_schema.table1") is not None + + mock_get_data_objects.return_value = [] + assert not adapter.get_data_objects("test_schema") + assert not adapter.get_data_objects("test_schema", {"missing"}) + assert not adapter.get_data_objects("test_schema", {"missing"}) + assert adapter.get_data_object("test_schema.missing") is None + + # None of the calls should've been cached + assert mock_get_data_objects.call_count == 9 + assert not adapter._data_object_cache + + +def test_data_object_cache_get_data_objects_no_object_names( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + result1 = adapter.get_data_objects("test_schema", safe_to_cache=True) + assert len(result1) == 2 + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert len(result2) == 2 + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_get_data_object( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + result1 = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result1 is not None + assert result1.name == "test_table" + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result2 is not None + assert result2.name == "test_table" + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_cleared_on_drop_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + adapter.drop_table("test_schema.test_table") + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_drop_view( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + view = DataObject(catalog=None, schema="test_schema", name="test_view", type="view") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[view]) + + adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + adapter.drop_view("test_schema.test_view") + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_drop_data_object( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + adapter.drop_data_object(table) + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_create_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlglot import exp + + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + # Initially cache that table doesn't exist + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + # Create the table + table = DataObject(catalog=None, schema="test_schema", name="test_table", type="table") + mock_get_data_objects.return_value = [table] + adapter.create_table( + "test_schema.test_table", + {"col1": exp.DataType.build("INT")}, + ) + + # Cache should be cleared, so next get_data_object should call _get_data_objects again + result = adapter.get_data_object("test_schema.test_table", safe_to_cache=True) + assert result is not None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_create_view( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlglot import parse_one + + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + # Initially cache that view doesn't exist + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + # Create the view + view = DataObject(catalog=None, schema="test_schema", name="test_view", type="view") + mock_get_data_objects.return_value = [view] + adapter.create_view( + "test_schema.test_view", + parse_one("SELECT 1 AS col1"), + ) + + # Cache should be cleared, so next get_data_object should call _get_data_objects again + result = adapter.get_data_object("test_schema.test_view", safe_to_cache=True) + assert result is not None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_cleared_on_clone_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlmesh.core.engine_adapter.snowflake import SnowflakeEngineAdapter + + adapter = make_mocked_engine_adapter( + SnowflakeEngineAdapter, patch_get_data_objects=False, default_catalog="test_catalog" + ) + + # Initially cache that target table doesn't exist + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.test_target", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + # Clone the table + target_table = DataObject( + catalog="test_catalog", schema="test_schema", name="test_target", type="table" + ) + mock_get_data_objects.return_value = [target_table] + adapter.clone_table("test_schema.test_target", "test_schema.test_source") + + # Cache should be cleared, so next get_data_object should call _get_data_objects again + result = adapter.get_data_object("test_schema.test_target", safe_to_cache=True) + assert result is not None + assert mock_get_data_objects.call_count == 2 + + +def test_data_object_cache_with_catalog( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlmesh.core.engine_adapter.snowflake import SnowflakeEngineAdapter + + adapter = make_mocked_engine_adapter( + SnowflakeEngineAdapter, patch_get_data_objects=False, default_catalog="test_catalog" + ) + + table = DataObject( + catalog="test_catalog", schema="test_schema", name="test_table", type="table" + ) + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[table]) + + result1 = adapter.get_data_object("test_catalog.test_schema.test_table", safe_to_cache=True) + assert result1 is not None + assert result1.catalog == "test_catalog" + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_object("test_catalog.test_schema.test_table", safe_to_cache=True) + assert result2 is not None + assert result2.catalog == "test_catalog" + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_partial_cache_hit( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + table3 = DataObject(catalog=None, schema="test_schema", name="table3", type="table") + + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[table1, table2] + ) + + adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert mock_get_data_objects.call_count == 1 + + mock_get_data_objects.return_value = [table3] + result = adapter.get_data_objects("test_schema", {"table1", "table3"}, safe_to_cache=True) + + assert len(result) == 2 + assert {obj.name for obj in result} == {"table1", "table3"} + assert mock_get_data_objects.call_count == 2 # Called again for table3 + + +def test_data_object_cache_get_data_objects_missing_objects( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + table1 = DataObject(catalog=None, schema="test_schema", name="table1", type="table") + table2 = DataObject(catalog=None, schema="test_schema", name="table2", type="table") + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + + result1 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert not result1 + assert mock_get_data_objects.call_count == 1 + + result2 = adapter.get_data_objects("test_schema", {"table1", "table2"}, safe_to_cache=True) + assert not result2 + assert mock_get_data_objects.call_count == 1 # Should not increase + + result3 = adapter.get_data_objects("test_schema", {"table1"}, safe_to_cache=True) + assert not result3 + assert mock_get_data_objects.call_count == 1 # Should not increase + + +def test_data_object_cache_cleared_on_rename_table( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + old_table = DataObject(catalog=None, schema="test_schema", name="old_table", type="table") + mock_get_data_objects = mocker.patch.object( + adapter, "_get_data_objects", return_value=[old_table] + ) + + result = adapter.get_data_object("test_schema.old_table", safe_to_cache=True) + assert result is not None + assert result.name == "old_table" + assert mock_get_data_objects.call_count == 1 + + new_table = DataObject(catalog=None, schema="test_schema", name="new_table", type="table") + mock_get_data_objects.return_value = [new_table] + adapter.rename_table("test_schema.old_table", "test_schema.new_table") + + mock_get_data_objects.return_value = [] + result = adapter.get_data_object("test_schema.old_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 2 + + mock_get_data_objects.return_value = [new_table] + result = adapter.get_data_object("test_schema.new_table", safe_to_cache=True) + assert result is not None + assert result.name == "new_table" + assert mock_get_data_objects.call_count == 3 + + +def test_data_object_cache_cleared_on_create_table_like( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + from sqlglot import exp + + adapter = make_mocked_engine_adapter(EngineAdapter, patch_get_data_objects=False) + + columns_to_types = { + "col1": exp.DataType.build("INT"), + "col2": exp.DataType.build("TEXT"), + } + mocker.patch.object(adapter, "columns", return_value=columns_to_types) + + mock_get_data_objects = mocker.patch.object(adapter, "_get_data_objects", return_value=[]) + result = adapter.get_data_object("test_schema.target_table", safe_to_cache=True) + assert result is None + assert mock_get_data_objects.call_count == 1 + + target_table = DataObject(catalog=None, schema="test_schema", name="target_table", type="table") + mock_get_data_objects.return_value = [target_table] + adapter.create_table_like("test_schema.target_table", "test_schema.source_table") + + result = adapter.get_data_object("test_schema.target_table", safe_to_cache=True) + assert result is not None + assert result.name == "target_table" + assert mock_get_data_objects.call_count == 2 + + +def test_diff_grants_configs(): + new = {"SELECT": ["u1", "u2"], "INSERT": ["u1"]} + old = {"SELECT": ["u1", "u3"], "update": ["u1"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions.get("SELECT") and set(additions["SELECT"]) == {"u2"} + assert removals.get("SELECT") and set(removals["SELECT"]) == {"u3"} + + assert additions.get("INSERT") and set(additions["INSERT"]) == {"u1"} + assert removals.get("update") and set(removals["update"]) == {"u1"} + + for perm, grantees in additions.items(): + assert set(grantees).isdisjoint(set(old.get(perm, []))) + for perm, grantees in removals.items(): + assert set(grantees).isdisjoint(set(new.get(perm, []))) + + +def test_diff_grants_configs_empty_new(): + new = {} + old = {"SELECT": ["u1", "u2"], "INSERT": ["u3"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions == {} + assert removals == old + + +def test_diff_grants_configs_empty_old(): + new = {"SELECT": ["u1", "u2"], "INSERT": ["u3"]} + old = {} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions == new + assert removals == {} + + +def test_diff_grants_configs_identical(): + grants = {"SELECT": ["u1", "u2"], "INSERT": ["u3"]} + + additions, removals = EngineAdapter._diff_grants_configs(grants, grants) + + assert additions == {} + assert removals == {} + + +def test_diff_grants_configs_none_configs(): + grants = {"SELECT": ["u1"]} + + additions, removals = EngineAdapter._diff_grants_configs(grants, {}) + assert additions == grants + assert removals == {} + + additions, removals = EngineAdapter._diff_grants_configs({}, grants) + assert additions == {} + assert removals == grants + + additions, removals = EngineAdapter._diff_grants_configs({}, {}) + assert additions == {} + assert removals == {} + + +def test_diff_grants_configs_duplicate_grantees(): + new = {"SELECT": ["u1", "u2", "u1"]} + old = {"SELECT": ["u2", "u3", "u2"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert additions["SELECT"] == ["u1", "u1"] + assert removals["SELECT"] == ["u3"] + + +def test_diff_grants_configs_case_sensitive(): + new = {"select": ["u1"], "SELECT": ["u2"]} + old = {"Select": ["u3"]} + + additions, removals = EngineAdapter._diff_grants_configs(new, old) + + assert set(additions.keys()) == {"select", "SELECT"} + assert set(removals.keys()) == {"Select"} + assert additions["select"] == ["u1"] + assert additions["SELECT"] == ["u2"] + assert removals["Select"] == ["u3"] + + +def test_sync_grants_config_unsupported_engine(make_mocked_engine_adapter: t.Callable): + adapter = make_mocked_engine_adapter(EngineAdapter) + adapter.SUPPORTS_GRANTS = False + + relation = exp.to_table("test_table") + grants_config = {"SELECT": ["user1"]} + + with pytest.raises(NotImplementedError, match="Engine does not support grants"): + adapter.sync_grants_config(relation, grants_config) + + +def test_get_current_grants_config_not_implemented(make_mocked_engine_adapter: t.Callable): + adapter = make_mocked_engine_adapter(EngineAdapter) + relation = exp.to_table("test_table") + + with pytest.raises(NotImplementedError): + adapter._get_current_grants_config(relation) diff --git a/tests/core/engine_adapter/test_base_postgres.py b/tests/core/engine_adapter/test_base_postgres.py index df280a9059..f286c47c56 100644 --- a/tests/core/engine_adapter/test_base_postgres.py +++ b/tests/core/engine_adapter/test_base_postgres.py @@ -3,6 +3,7 @@ from unittest.mock import call import pytest +from pytest_mock.plugin import MockerFixture from sqlglot import exp, parse_one from sqlmesh.core.engine_adapter.base_postgres import BasePostgresEngineAdapter @@ -75,3 +76,26 @@ def test_drop_view(make_mocked_engine_adapter: t.Callable): call('DROP VIEW IF EXISTS "db"."view"'), ] ) + + +def test_get_current_schema(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(BasePostgresEngineAdapter) + + fetchone_mock = mocker.patch.object(adapter, "fetchone", return_value=("test_schema",)) + result = adapter._get_current_schema() + + assert result == "test_schema" + fetchone_mock.assert_called_once() + executed_query = fetchone_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + assert executed_sql == "SELECT CURRENT_SCHEMA" + + fetchone_mock.reset_mock() + fetchone_mock.return_value = None + result = adapter._get_current_schema() + assert result == "public" + + fetchone_mock.reset_mock() + fetchone_mock.return_value = (None,) # search_path = '' or 'nonexistent_schema' + result = adapter._get_current_schema() + assert result == "public" diff --git a/tests/core/engine_adapter/test_bigquery.py b/tests/core/engine_adapter/test_bigquery.py index 4328fa8923..134f144df1 100644 --- a/tests/core/engine_adapter/test_bigquery.py +++ b/tests/core/engine_adapter/test_bigquery.py @@ -13,6 +13,7 @@ import sqlmesh.core.dialect as d from sqlmesh.core.engine_adapter import BigQueryEngineAdapter from sqlmesh.core.engine_adapter.bigquery import select_partitions_expr +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.node import IntervalUnit from sqlmesh.utils import AttributeDict from sqlmesh.utils.errors import SQLMeshError @@ -487,7 +488,13 @@ def temp_table_exists(table: exp.Table) -> bool: retry_resp_call.errors = None retry_mock.return_value = retry_resp db_call_mock.return_value = AttributeDict({"errors": None}) - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = pd.DataFrame( + { + "id": [1, 2, 3], + "ts": ["2025-01-01 00:00:00", "2025-01-01 00:00:00", "2025-01-01 00:00:00"], + "val": [7, 8, 9], + } + ) adapter.merge( target_table="target", source_table=df, @@ -582,13 +589,14 @@ def _to_sql_calls(execute_mock: t.Any, identify: bool = True) -> t.List[str]: execute_mock = execute_mock.execute output = [] for call in execute_mock.call_args_list: - value = call[0][0] - sql = ( - value.sql(dialect="bigquery", identify=identify) - if isinstance(value, exp.Expression) - else str(value) - ) - output.append(sql) + values = ensure_list(call[0][0]) + for value in values: + sql = ( + value.sql(dialect="bigquery", identify=identify) + if isinstance(value, exp.Expr) + else str(value) + ) + output.append(sql) return output @@ -1207,3 +1215,168 @@ def test_scd_type_2_by_partitioning(adapter: BigQueryEngineAdapter): # Both calls should contain the partition logic (the scd logic is already covered by other tests) assert "PARTITION BY TIMESTAMP_TRUNC(`valid_from`, DAY)" in calls[0] assert "PARTITION BY TIMESTAMP_TRUNC(`valid_from`, DAY)" in calls[1] + + +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("project.dataset.test_table", dialect="bigquery") + new_grants_config = { + "roles/bigquery.dataViewer": ["user:analyst@example.com", "group:data-team@example.com"], + "roles/bigquery.dataEditor": ["user:admin@example.com"], + } + current_grants = [ + ("roles/bigquery.dataViewer", "user:old_analyst@example.com"), + ("roles/bigquery.admin", "user:old_admin@example.com"), + ] + + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + execute_mock = mocker.patch.object(adapter, "execute") + mocker.patch.object(adapter, "get_current_catalog", return_value="project") + mocker.patch.object(adapter.client, "location", "us-central1") + + mock_dataset = mocker.Mock() + mock_dataset.location = "us-central1" + mocker.patch.object(adapter, "_db_call", return_value=mock_dataset) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="bigquery") + expected_sql = ( + "SELECT privilege_type, grantee FROM `project`.`region-us-central1`.`INFORMATION_SCHEMA.OBJECT_PRIVILEGES` AS OBJECT_PRIVILEGES " + "WHERE object_schema = 'dataset' AND object_name = 'test_table' AND SPLIT(grantee, ':')[OFFSET(1)] <> SESSION_USER()" + ) + assert executed_sql == expected_sql + + sql_calls = _to_sql_calls(execute_mock) + + assert len(sql_calls) == 4 + assert ( + "REVOKE `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` FROM 'user:old_analyst@example.com'" + in sql_calls + ) + assert ( + "REVOKE `roles/bigquery.admin` ON TABLE `project`.`dataset`.`test_table` FROM 'user:old_admin@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` TO 'user:analyst@example.com', 'group:data-team@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataEditor` ON TABLE `project`.`dataset`.`test_table` TO 'user:admin@example.com'" + in sql_calls + ) + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("project.dataset.test_table", dialect="bigquery") + new_grants_config = { + "roles/bigquery.dataViewer": [ + "user:analyst1@example.com", + "user:analyst2@example.com", + "user:analyst3@example.com", + ], + "roles/bigquery.dataEditor": ["user:analyst2@example.com", "user:editor@example.com"], + } + current_grants = [ + ("roles/bigquery.dataViewer", "user:analyst1@example.com"), # Keep + ("roles/bigquery.dataViewer", "user:old_analyst@example.com"), # Remove + ("roles/bigquery.dataEditor", "user:analyst2@example.com"), # Keep + ("roles/bigquery.admin", "user:admin@example.com"), # Remove + ] + + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + execute_mock = mocker.patch.object(adapter, "execute") + mocker.patch.object(adapter, "get_current_catalog", return_value="project") + mocker.patch.object(adapter.client, "location", "us-central1") + + mock_dataset = mocker.Mock() + mock_dataset.location = "us-central1" + mocker.patch.object(adapter, "_db_call", return_value=mock_dataset) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="bigquery") + expected_sql = ( + "SELECT privilege_type, grantee FROM `project`.`region-us-central1`.`INFORMATION_SCHEMA.OBJECT_PRIVILEGES` AS OBJECT_PRIVILEGES " + "WHERE object_schema = 'dataset' AND object_name = 'test_table' AND SPLIT(grantee, ':')[OFFSET(1)] <> SESSION_USER()" + ) + assert executed_sql == expected_sql + + sql_calls = _to_sql_calls(execute_mock) + + assert len(sql_calls) == 4 + assert ( + "REVOKE `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` FROM 'user:old_analyst@example.com'" + in sql_calls + ) + assert ( + "REVOKE `roles/bigquery.admin` ON TABLE `project`.`dataset`.`test_table` FROM 'user:admin@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataViewer` ON TABLE `project`.`dataset`.`test_table` TO 'user:analyst2@example.com', 'user:analyst3@example.com'" + in sql_calls + ) + assert ( + "GRANT `roles/bigquery.dataEditor` ON TABLE `project`.`dataset`.`test_table` TO 'user:editor@example.com'" + in sql_calls + ) + + +@pytest.mark.parametrize( + "table_type, expected_keyword", + [ + (DataObjectType.TABLE, "TABLE"), + (DataObjectType.VIEW, "VIEW"), + (DataObjectType.MATERIALIZED_VIEW, "MATERIALIZED VIEW"), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockerFixture, + table_type: DataObjectType, + expected_keyword: str, +) -> None: + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("project.dataset.test_object", dialect="bigquery") + + mocker.patch.object(adapter, "fetchall", return_value=[]) + execute_mock = mocker.patch.object(adapter, "execute") + mocker.patch.object(adapter, "get_current_catalog", return_value="project") + mocker.patch.object(adapter.client, "location", "us-central1") + + mock_dataset = mocker.Mock() + mock_dataset.location = "us-central1" + mocker.patch.object(adapter, "_db_call", return_value=mock_dataset) + + adapter.sync_grants_config( + relation, {"roles/bigquery.dataViewer": ["user:test@example.com"]}, table_type + ) + + executed_exprs = execute_mock.call_args[0][0] + sql_calls = [expr.sql(dialect="bigquery") for expr in executed_exprs] + assert sql_calls == [ + f"GRANT `roles/bigquery.dataViewer` ON {expected_keyword} project.dataset.test_object TO 'user:test@example.com'" + ] + + +def test_sync_grants_config_no_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(BigQueryEngineAdapter) + relation = exp.to_table("test_table", dialect="bigquery") + new_grants_config = { + "roles/bigquery.dataViewer": ["user:analyst@example.com"], + "roles/bigquery.dataEditor": ["user:editor@example.com"], + } + + with pytest.raises(ValueError, match="Table test_table does not have a schema \\(dataset\\)"): + adapter.sync_grants_config(relation, new_grants_config) diff --git a/tests/core/engine_adapter/test_clickhouse.py b/tests/core/engine_adapter/test_clickhouse.py index 39e317c7fa..7ff971b742 100644 --- a/tests/core/engine_adapter/test_clickhouse.py +++ b/tests/core/engine_adapter/test_clickhouse.py @@ -327,16 +327,16 @@ def build_properties_sql(storage_format="", order_by="", primary_key="", propert assert ( build_properties_sql( - order_by="ORDER_BY = 'timestamp with fill to toStartOfDay(toDateTime64(\\'2024-07-11\\', 3)) step toIntervalDay(1) interpolate(price as price)'," + order_by="ORDER_BY = 'timestamp with fill to dateTrunc(\\'DAY\\', toDateTime64(\\'2024-07-11\\', 3)) step toIntervalDay(1) interpolate(price as price)'," ) - == "ENGINE=MergeTree ORDER BY (timestamp WITH FILL TO toStartOfDay(toDateTime64('2024-07-11', 3)) STEP toIntervalDay(1) INTERPOLATE (price AS price))" + == "ENGINE=MergeTree ORDER BY (timestamp WITH FILL TO dateTrunc('DAY', toDateTime64('2024-07-11', 3)) STEP toIntervalDay(1) INTERPOLATE (price AS price))" ) assert ( build_properties_sql( - order_by="ORDER_BY = (\"a\", 'timestamp with fill to toStartOfDay(toDateTime64(\\'2024-07-11\\', 3)) step toIntervalDay(1) interpolate(price as price)')," + order_by="ORDER_BY = (\"a\", 'timestamp with fill to dateTrunc(\\'DAY\\', toDateTime64(\\'2024-07-11\\', 3)) step toIntervalDay(1) interpolate(price as price)')," ) - == "ENGINE=MergeTree ORDER BY (\"a\", timestamp WITH FILL TO toStartOfDay(toDateTime64('2024-07-11', 3)) STEP toIntervalDay(1) INTERPOLATE (price AS price))" + == "ENGINE=MergeTree ORDER BY (\"a\", timestamp WITH FILL TO dateTrunc('DAY', toDateTime64('2024-07-11', 3)) STEP toIntervalDay(1) INTERPOLATE (price AS price))" ) assert ( @@ -368,7 +368,7 @@ def test_partitioned_by_expr(make_mocked_engine_adapter: t.Callable): assert ( model.partitioned_by[0].sql("clickhouse") - == """toMonday(CAST("ds" AS DateTime64(9, 'UTC')))""" + == """dateTrunc('WEEK', CAST("ds" AS DateTime64(9, 'UTC')))""" ) # user specifies without time column, unknown time column type @@ -393,7 +393,7 @@ def test_partitioned_by_expr(make_mocked_engine_adapter: t.Callable): ) assert [p.sql("clickhouse") for p in model.partitioned_by] == [ - """toMonday(CAST("ds" AS DateTime64(9, 'UTC')))""", + """dateTrunc('WEEK', CAST("ds" AS DateTime64(9, 'UTC')))""", '"x"', ] @@ -417,7 +417,7 @@ def test_partitioned_by_expr(make_mocked_engine_adapter: t.Callable): ) ) - assert model.partitioned_by[0].sql("clickhouse") == 'toMonday("ds")' + assert model.partitioned_by[0].sql("clickhouse") == """dateTrunc('WEEK', "ds")""" # user doesn't specify, non-conformable time column type model = load_sql_based_model( @@ -441,7 +441,7 @@ def test_partitioned_by_expr(make_mocked_engine_adapter: t.Callable): assert ( model.partitioned_by[0].sql("clickhouse") - == """CAST(toMonday(CAST("ds" AS DateTime64(9, 'UTC'))) AS String)""" + == """CAST(dateTrunc('WEEK', CAST("ds" AS DateTime64(9, 'UTC'))) AS String)""" ) # user specifies partitioned_by with time column @@ -640,7 +640,7 @@ def test_scd_type_2_by_time( "test_valid_from", "test_valid_to", TRUE AS "_exists" - FROM ""__temp_target_efgh"" + FROM "__temp_target_efgh" WHERE NOT "test_valid_to" IS NULL ), "latest" AS ( @@ -652,7 +652,7 @@ def test_scd_type_2_by_time( "test_valid_from", "test_valid_to", TRUE AS "_exists" - FROM ""__temp_target_efgh"" + FROM "__temp_target_efgh" WHERE "test_valid_to" IS NULL ), "deleted" AS ( @@ -993,7 +993,7 @@ def test_insert_overwrite_by_condition_replace_partitioned( temp_table_mock.return_value = make_temp_table_name(table_name, "abcd") fetchone_mock = mocker.patch("sqlmesh.core.engine_adapter.ClickhouseEngineAdapter.fetchone") - fetchone_mock.return_value = "toMonday(ds)" + fetchone_mock.return_value = "dateTrunc('WEEK', ds)" insert_table_name = make_temp_table_name("new_records", "abcd") existing_table_name = make_temp_table_name("existing_records", "abcd") @@ -1069,7 +1069,7 @@ def test_insert_overwrite_by_condition_where_partitioned( temp_table_mock.return_value = make_temp_table_name(table_name, "abcd") fetchone_mock = mocker.patch("sqlmesh.core.engine_adapter.ClickhouseEngineAdapter.fetchone") - fetchone_mock.return_value = "toMonday(ds)" + fetchone_mock.return_value = "dateTrunc('WEEK', ds)" fetchall_mock = mocker.patch("sqlmesh.core.engine_adapter.ClickhouseEngineAdapter.fetchall") fetchall_mock.side_effect = [ @@ -1175,7 +1175,7 @@ def test_insert_overwrite_by_condition_by_key_partitioned( temp_table_mock.return_value = make_temp_table_name(table_name, "abcd") fetchone_mock = mocker.patch("sqlmesh.core.engine_adapter.ClickhouseEngineAdapter.fetchone") - fetchone_mock.side_effect = ["toMonday(ds)", "toMonday(ds)"] + fetchone_mock.side_effect = ["dateTrunc('WEEK', ds)", "dateTrunc('WEEK', ds)"] fetchall_mock = mocker.patch("sqlmesh.core.engine_adapter.ClickhouseEngineAdapter.fetchall") fetchall_mock.side_effect = [ @@ -1240,7 +1240,7 @@ def test_insert_overwrite_by_condition_inc_by_partition( temp_table_mock.return_value = make_temp_table_name(table_name, "abcd") fetchone_mock = mocker.patch("sqlmesh.core.engine_adapter.ClickhouseEngineAdapter.fetchone") - fetchone_mock.return_value = "toMonday(ds)" + fetchone_mock.return_value = "dateTrunc('WEEK', ds)" fetchall_mock = mocker.patch("sqlmesh.core.engine_adapter.ClickhouseEngineAdapter.fetchall") fetchall_mock.return_value = [("1",), ("2",), ("4",)] @@ -1365,7 +1365,7 @@ def test_exchange_tables( # The EXCHANGE TABLES call errored, so we RENAME TABLE instead assert [ quote_identifiers(call.args[0]).sql("clickhouse") - if isinstance(call.args[0], exp.Expression) + if isinstance(call.args[0], exp.Expr) else call.args[0] for call in execute_mock.call_args_list ] == [ diff --git a/tests/core/engine_adapter/test_databricks.py b/tests/core/engine_adapter/test_databricks.py index fcd7aec0fa..de91fd3b70 100644 --- a/tests/core/engine_adapter/test_databricks.py +++ b/tests/core/engine_adapter/test_databricks.py @@ -106,7 +106,7 @@ def test_clone_table(mocker: MockFixture, make_mocked_engine_adapter: t.Callable adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog") adapter.clone_table("target_table", "source_table") adapter.cursor.execute.assert_called_once_with( - "CREATE TABLE `target_table` SHALLOW CLONE `source_table`" + "CREATE TABLE IF NOT EXISTS `target_table` SHALLOW CLONE `source_table`" ) @@ -128,17 +128,194 @@ def test_get_current_catalog(mocker: MockFixture, make_mocked_engine_adapter: t. assert to_sql_calls(adapter) == ["SELECT CURRENT_CATALOG()"] -def test_get_current_database(mocker: MockFixture, make_mocked_engine_adapter: t.Callable): +def test_get_current_schema(mocker: MockFixture, make_mocked_engine_adapter: t.Callable): mocker.patch( "sqlmesh.core.engine_adapter.databricks.DatabricksEngineAdapter.set_current_catalog" ) adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog") adapter.cursor.fetchone.return_value = ("test_database",) - assert adapter.get_current_database() == "test_database" + assert adapter._get_current_schema() == "test_database" assert to_sql_calls(adapter) == ["SELECT CURRENT_DATABASE()"] +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockFixture): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main") + relation = exp.to_table("main.test_schema.test_table", dialect="databricks") + new_grants_config = { + "SELECT": ["group1", "group2"], + "MODIFY": ["writers"], + } + + current_grants = [ + ("SELECT", "legacy"), + ("REFRESH", "stale"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM main.information_schema.table_privileges " + "WHERE table_catalog = 'main' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert "GRANT SELECT ON TABLE `main`.`test_schema`.`test_table` TO `group1`" in sql_calls + assert "GRANT SELECT ON TABLE `main`.`test_schema`.`test_table` TO `group2`" in sql_calls + assert "GRANT MODIFY ON TABLE `main`.`test_schema`.`test_table` TO `writers`" in sql_calls + assert "REVOKE SELECT ON TABLE `main`.`test_schema`.`test_table` FROM `legacy`" in sql_calls + assert "REVOKE REFRESH ON TABLE `main`.`test_schema`.`test_table` FROM `stale`" in sql_calls + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockFixture +): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main") + relation = exp.to_table("main.test_schema.test_table", dialect="databricks") + new_grants_config = { + "SELECT": ["shared", "new_role"], + "MODIFY": ["shared", "writer"], + } + + current_grants = [ + ("SELECT", "shared"), + ("SELECT", "legacy"), + ("MODIFY", "shared"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM main.information_schema.table_privileges " + "WHERE table_catalog = 'main' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + + assert "GRANT SELECT ON TABLE `main`.`test_schema`.`test_table` TO `new_role`" in sql_calls + assert "GRANT MODIFY ON TABLE `main`.`test_schema`.`test_table` TO `writer`" in sql_calls + assert "REVOKE SELECT ON TABLE `main`.`test_schema`.`test_table` FROM `legacy`" in sql_calls + + +@pytest.mark.parametrize( + "table_type, expected_keyword", + [ + (DataObjectType.TABLE, "TABLE"), + (DataObjectType.VIEW, "VIEW"), + (DataObjectType.MATERIALIZED_VIEW, "MATERIALIZED VIEW"), + (DataObjectType.MANAGED_TABLE, "TABLE"), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockFixture, + table_type: DataObjectType, + expected_keyword: str, +) -> None: + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main") + relation = exp.to_table("main.test_schema.test_object", dialect="databricks") + + mocker.patch.object(adapter, "fetchall", return_value=[]) + + adapter.sync_grants_config(relation, {"SELECT": ["test"]}, table_type) + + sql_calls = to_sql_calls(adapter) + assert sql_calls == [ + f"GRANT SELECT ON {expected_keyword} `main`.`test_schema`.`test_object` TO `test`" + ] + + +def test_sync_grants_config_quotes(make_mocked_engine_adapter: t.Callable, mocker: MockFixture): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="`test_db`") + relation = exp.to_table("`test_db`.`test_schema`.`test_table`", dialect="databricks") + new_grants_config = { + "SELECT": ["group1", "group2"], + "MODIFY": ["writers"], + } + + current_grants = [ + ("SELECT", "legacy"), + ("REFRESH", "stale"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM `test_db`.information_schema.table_privileges " + "WHERE table_catalog = 'test_db' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert "GRANT SELECT ON TABLE `test_db`.`test_schema`.`test_table` TO `group1`" in sql_calls + assert "GRANT SELECT ON TABLE `test_db`.`test_schema`.`test_table` TO `group2`" in sql_calls + assert "GRANT MODIFY ON TABLE `test_db`.`test_schema`.`test_table` TO `writers`" in sql_calls + assert "REVOKE SELECT ON TABLE `test_db`.`test_schema`.`test_table` FROM `legacy`" in sql_calls + assert "REVOKE REFRESH ON TABLE `test_db`.`test_schema`.`test_table` FROM `stale`" in sql_calls + + +def test_sync_grants_config_no_catalog_or_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockFixture +): + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="main_catalog") + relation = exp.to_table("test_table", dialect="databricks") + new_grants_config = { + "SELECT": ["group1", "group2"], + "MODIFY": ["writers"], + } + + current_grants = [ + ("SELECT", "legacy"), + ("REFRESH", "stale"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + mocker.patch.object(adapter, "_get_current_schema", return_value="schema") + mocker.patch.object(adapter, "get_current_catalog", return_value="main_catalog") + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="databricks") + expected_sql = ( + "SELECT privilege_type, grantee FROM `main_catalog`.information_schema.table_privileges " + "WHERE table_catalog = 'main_catalog' AND table_schema = 'schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER() AND grantee <> CURRENT_USER() AND inherited_from = 'NONE'" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert "GRANT SELECT ON TABLE `test_table` TO `group1`" in sql_calls + assert "GRANT SELECT ON TABLE `test_table` TO `group2`" in sql_calls + assert "GRANT MODIFY ON TABLE `test_table` TO `writers`" in sql_calls + assert "REVOKE SELECT ON TABLE `test_table` FROM `legacy`" in sql_calls + assert "REVOKE REFRESH ON TABLE `test_table` FROM `stale`" in sql_calls + + def test_insert_overwrite_by_partition_query( make_mocked_engine_adapter: t.Callable, mocker: MockFixture, make_temp_table_name: t.Callable ): @@ -195,7 +372,67 @@ def test_materialized_view_properties(mocker: MockFixture, make_mocked_engine_ad sql_calls = to_sql_calls(adapter) # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html#syntax assert sql_calls == [ - "CREATE OR REPLACE MATERIALIZED VIEW test_table PARTITIONED BY (ds) AS SELECT 1", + "CREATE OR REPLACE MATERIALIZED VIEW `test_table` PARTITIONED BY (`ds`) AS SELECT 1", + ] + + +def test_materialized_view_with_column_comments( + mocker: MockFixture, make_mocked_engine_adapter: t.Callable +): + mocker.patch( + "sqlmesh.core.engine_adapter.databricks.DatabricksEngineAdapter.set_current_catalog" + ) + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog") + mocker.patch.object(adapter, "get_current_catalog", return_value="test_catalog") + + adapter.create_view( + "test_view", + parse_one("SELECT a, b FROM source_table"), + target_columns_to_types={ + "a": exp.DataType.build("INT"), + "b": exp.DataType.build("STRING"), + }, + materialized=True, + column_descriptions={ + "a": "column a description", + "b": "column b description", + }, + ) + + sql_calls = to_sql_calls(adapter) + # Databricks requires column types when column comments are present in materialized views + assert sql_calls == [ + "CREATE OR REPLACE MATERIALIZED VIEW `test_view` (`a` INT COMMENT 'column a description', `b` STRING COMMENT 'column b description') AS SELECT `a`, `b` FROM `source_table`", + ] + + +def test_regular_view_with_column_comments( + mocker: MockFixture, make_mocked_engine_adapter: t.Callable +): + mocker.patch( + "sqlmesh.core.engine_adapter.databricks.DatabricksEngineAdapter.set_current_catalog" + ) + adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog") + mocker.patch.object(adapter, "get_current_catalog", return_value="test_catalog") + + adapter.create_view( + "test_view", + parse_one("SELECT a, b FROM source_table"), + target_columns_to_types={ + "a": exp.DataType.build("INT"), + "b": exp.DataType.build("STRING"), + }, + materialized=False, + column_descriptions={ + "a": "column a description", + "b": "column b description", + }, + ) + + sql_calls = to_sql_calls(adapter) + # Regular views should NOT include column types even when column comments are present + assert sql_calls == [ + "CREATE OR REPLACE VIEW `test_view` (`a` COMMENT 'column a description', `b` COMMENT 'column b description') AS SELECT `a`, `b` FROM `source_table`", ] diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py index 6b80ef7337..a52218a097 100644 --- a/tests/core/engine_adapter/test_fabric.py +++ b/tests/core/engine_adapter/test_fabric.py @@ -2,6 +2,7 @@ import typing as t +import pandas as pd # noqa: TID253 import pytest from pytest_mock import MockerFixture from sqlglot import exp, parse_one @@ -88,3 +89,200 @@ def test_replace_query(adapter: FabricEngineAdapter, mocker: MockerFixture): "TRUNCATE TABLE [test_table];", "INSERT INTO [test_table] ([a]) SELECT [a] FROM [tbl];", ] + + +def test_alter_table_column_type_workaround(adapter: FabricEngineAdapter, mocker: MockerFixture): + """ + Tests the alter_table method's workaround for changing a column's data type. + """ + # Mock set_current_catalog to avoid connection pool side effects + set_catalog_mock = mocker.patch.object(adapter, "set_current_catalog") + # Mock random_id to have a predictable temporary column name + mocker.patch("sqlmesh.core.engine_adapter.fabric.random_id", return_value="abcdef") + + alter_expression = exp.Alter( + this=exp.to_table("my_db.my_schema.my_table"), + actions=[ + exp.AlterColumn( + this=exp.to_column("col_a"), + dtype=exp.DataType.build("BIGINT"), + ) + ], + ) + + adapter.alter_table([alter_expression]) + + set_catalog_mock.assert_called_once_with("my_db") + + expected_calls = [ + "ALTER TABLE [my_schema].[my_table] ADD [col_a__abcdef] BIGINT;", + "UPDATE [my_schema].[my_table] SET [col_a__abcdef] = CAST([col_a] AS BIGINT);", + "ALTER TABLE [my_schema].[my_table] DROP COLUMN [col_a];", + "EXEC sp_rename 'my_schema.my_table.col_a__abcdef', 'col_a', 'COLUMN'", + ] + + assert to_sql_calls(adapter) == expected_calls + + +def test_alter_table_direct_alteration(adapter: FabricEngineAdapter, mocker: MockerFixture): + """ + Tests the alter_table method for direct alterations like adding a column. + """ + set_catalog_mock = mocker.patch.object(adapter, "set_current_catalog") + + alter_expression = exp.Alter( + this=exp.to_table("my_db.my_schema.my_table"), + actions=[exp.ColumnDef(this=exp.to_column("new_col"), kind=exp.DataType.build("INT"))], + ) + + adapter.alter_table([alter_expression]) + + set_catalog_mock.assert_called_once_with("my_db") + + expected_calls = [ + "ALTER TABLE [my_schema].[my_table] ADD [new_col] INT;", + ] + + assert to_sql_calls(adapter) == expected_calls + + +def test_merge_pandas( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture, make_temp_table_name: t.Callable +): + mocker.patch( + "sqlmesh.core.engine_adapter.fabric.FabricEngineAdapter.table_exists", + return_value=False, + ) + + adapter = make_mocked_engine_adapter(FabricEngineAdapter) + + temp_table_mock = mocker.patch("sqlmesh.core.engine_adapter.EngineAdapter._get_temp_table") + table_name = "target" + temp_table_id = "abcdefgh" + temp_table_mock.return_value = make_temp_table_name(table_name, temp_table_id) + + df = pd.DataFrame({"id": [1, 2, 3], "ts": [1, 2, 3], "val": [4, 5, 6]}) + + # 1 key + adapter.merge( + target_table=table_name, + source_table=df, + target_columns_to_types={ + "id": exp.DataType.build("int"), + "ts": exp.DataType.build("TIMESTAMP"), + "val": exp.DataType.build("int"), + }, + unique_key=[exp.to_identifier("id")], + ) + adapter._connection_pool.get().bulk_copy.assert_called_with( + f"__temp_target_{temp_table_id}", [(1, 1, 4), (2, 2, 5), (3, 3, 6)] + ) + + assert to_sql_calls(adapter) == [ + f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '__temp_target_{temp_table_id}') EXEC('CREATE TABLE [__temp_target_{temp_table_id}] ([id] INT, [ts] DATETIME2(6), [val] INT)');""", + f"MERGE INTO [target] AS [__MERGE_TARGET__] USING (SELECT CAST([id] AS INT) AS [id], CAST([ts] AS DATETIME2(6)) AS [ts], CAST([val] AS INT) AS [val] FROM [__temp_target_{temp_table_id}]) AS [__MERGE_SOURCE__] ON [__MERGE_TARGET__].[id] = [__MERGE_SOURCE__].[id] WHEN MATCHED THEN UPDATE SET [__MERGE_TARGET__].[ts] = [__MERGE_SOURCE__].[ts], [__MERGE_TARGET__].[val] = [__MERGE_SOURCE__].[val] WHEN NOT MATCHED THEN INSERT ([id], [ts], [val]) VALUES ([__MERGE_SOURCE__].[id], [__MERGE_SOURCE__].[ts], [__MERGE_SOURCE__].[val]);", + f"DROP TABLE IF EXISTS [__temp_target_{temp_table_id}];", + ] + + # 2 keys + adapter.cursor.reset_mock() + adapter._connection_pool.get().reset_mock() + temp_table_mock.return_value = make_temp_table_name(table_name, temp_table_id) + adapter.merge( + target_table=table_name, + source_table=df, + target_columns_to_types={ + "id": exp.DataType.build("int"), + "ts": exp.DataType.build("TIMESTAMP"), + "val": exp.DataType.build("int"), + }, + unique_key=[exp.to_identifier("id"), exp.to_column("ts")], + ) + adapter._connection_pool.get().bulk_copy.assert_called_with( + f"__temp_target_{temp_table_id}", [(1, 1, 4), (2, 2, 5), (3, 3, 6)] + ) + + assert to_sql_calls(adapter) == [ + f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '__temp_target_{temp_table_id}') EXEC('CREATE TABLE [__temp_target_{temp_table_id}] ([id] INT, [ts] DATETIME2(6), [val] INT)');""", + f"MERGE INTO [target] AS [__MERGE_TARGET__] USING (SELECT CAST([id] AS INT) AS [id], CAST([ts] AS DATETIME2(6)) AS [ts], CAST([val] AS INT) AS [val] FROM [__temp_target_{temp_table_id}]) AS [__MERGE_SOURCE__] ON [__MERGE_TARGET__].[id] = [__MERGE_SOURCE__].[id] AND [__MERGE_TARGET__].[ts] = [__MERGE_SOURCE__].[ts] WHEN MATCHED THEN UPDATE SET [__MERGE_TARGET__].[val] = [__MERGE_SOURCE__].[val] WHEN NOT MATCHED THEN INSERT ([id], [ts], [val]) VALUES ([__MERGE_SOURCE__].[id], [__MERGE_SOURCE__].[ts], [__MERGE_SOURCE__].[val]);", + f"DROP TABLE IF EXISTS [__temp_target_{temp_table_id}];", + ] + + +def test_merge_exists( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture, make_temp_table_name: t.Callable +): + mocker.patch( + "sqlmesh.core.engine_adapter.fabric.FabricEngineAdapter.table_exists", + return_value=False, + ) + + adapter = make_mocked_engine_adapter(FabricEngineAdapter) + + temp_table_mock = mocker.patch("sqlmesh.core.engine_adapter.EngineAdapter._get_temp_table") + table_name = "target" + temp_table_id = "abcdefgh" + temp_table_mock.return_value = make_temp_table_name(table_name, temp_table_id) + + df = pd.DataFrame({"id": [1, 2, 3], "ts": [1, 2, 3], "val": [4, 5, 6]}) + + # regular implementation + adapter.merge( + target_table=table_name, + source_table=df, + target_columns_to_types={ + "id": exp.DataType.build("int"), + "ts": exp.DataType.build("TIMESTAMP"), + "val": exp.DataType.build("int"), + }, + unique_key=[exp.to_identifier("id")], + ) + + assert to_sql_calls(adapter) == [ + f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '__temp_target_{temp_table_id}') EXEC('CREATE TABLE [__temp_target_{temp_table_id}] ([id] INT, [ts] DATETIME2(6), [val] INT)');""", + f"MERGE INTO [target] AS [__MERGE_TARGET__] USING (SELECT CAST([id] AS INT) AS [id], CAST([ts] AS DATETIME2(6)) AS [ts], CAST([val] AS INT) AS [val] FROM [__temp_target_{temp_table_id}]) AS [__MERGE_SOURCE__] ON [__MERGE_TARGET__].[id] = [__MERGE_SOURCE__].[id] WHEN MATCHED THEN UPDATE SET [__MERGE_TARGET__].[ts] = [__MERGE_SOURCE__].[ts], [__MERGE_TARGET__].[val] = [__MERGE_SOURCE__].[val] WHEN NOT MATCHED THEN INSERT ([id], [ts], [val]) VALUES ([__MERGE_SOURCE__].[id], [__MERGE_SOURCE__].[ts], [__MERGE_SOURCE__].[val]);", + f"DROP TABLE IF EXISTS [__temp_target_{temp_table_id}];", + ] + + # merge exists implementation + adapter.cursor.reset_mock() + adapter._connection_pool.get().reset_mock() + temp_table_mock.return_value = make_temp_table_name(table_name, temp_table_id) + adapter.merge( + target_table=table_name, + source_table=df, + target_columns_to_types={ + "id": exp.DataType.build("int"), + "ts": exp.DataType.build("TIMESTAMP"), + "val": exp.DataType.build("int"), + }, + unique_key=[exp.to_identifier("id")], + physical_properties={"mssql_merge_exists": True}, + ) + + assert to_sql_calls(adapter) == [ + f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '__temp_target_{temp_table_id}') EXEC('CREATE TABLE [__temp_target_{temp_table_id}] ([id] INT, [ts] DATETIME2(6), [val] INT)');""", + f"MERGE INTO [target] AS [__MERGE_TARGET__] USING (SELECT CAST([id] AS INT) AS [id], CAST([ts] AS DATETIME2(6)) AS [ts], CAST([val] AS INT) AS [val] FROM [__temp_target_{temp_table_id}]) AS [__MERGE_SOURCE__] ON [__MERGE_TARGET__].[id] = [__MERGE_SOURCE__].[id] WHEN MATCHED AND EXISTS(SELECT [__MERGE_TARGET__].[ts], [__MERGE_TARGET__].[val] EXCEPT SELECT [__MERGE_SOURCE__].[ts], [__MERGE_SOURCE__].[val]) THEN UPDATE SET [__MERGE_TARGET__].[ts] = [__MERGE_SOURCE__].[ts], [__MERGE_TARGET__].[val] = [__MERGE_SOURCE__].[val] WHEN NOT MATCHED THEN INSERT ([id], [ts], [val]) VALUES ([__MERGE_SOURCE__].[id], [__MERGE_SOURCE__].[ts], [__MERGE_SOURCE__].[val]);", + f"DROP TABLE IF EXISTS [__temp_target_{temp_table_id}];", + ] + + # merge exists and all model columns are keys + adapter.cursor.reset_mock() + adapter._connection_pool.get().reset_mock() + temp_table_mock.return_value = make_temp_table_name(table_name, temp_table_id) + adapter.merge( + target_table=table_name, + source_table=df, + target_columns_to_types={ + "id": exp.DataType.build("int"), + "ts": exp.DataType.build("TIMESTAMP"), + }, + unique_key=[exp.to_identifier("id"), exp.to_column("ts")], + physical_properties={"mssql_merge_exists": True}, + ) + + assert to_sql_calls(adapter) == [ + f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '__temp_target_{temp_table_id}') EXEC('CREATE TABLE [__temp_target_{temp_table_id}] ([id] INT, [ts] DATETIME2(6))');""", + f"MERGE INTO [target] AS [__MERGE_TARGET__] USING (SELECT CAST([id] AS INT) AS [id], CAST([ts] AS DATETIME2(6)) AS [ts] FROM [__temp_target_{temp_table_id}]) AS [__MERGE_SOURCE__] ON [__MERGE_TARGET__].[id] = [__MERGE_SOURCE__].[id] AND [__MERGE_TARGET__].[ts] = [__MERGE_SOURCE__].[ts] WHEN NOT MATCHED THEN INSERT ([id], [ts]) VALUES ([__MERGE_SOURCE__].[id], [__MERGE_SOURCE__].[ts]);", + f"DROP TABLE IF EXISTS [__temp_target_{temp_table_id}];", + ] diff --git a/tests/core/engine_adapter/test_mssql.py b/tests/core/engine_adapter/test_mssql.py index 5923afa217..ec6a4ba3e8 100644 --- a/tests/core/engine_adapter/test_mssql.py +++ b/tests/core/engine_adapter/test_mssql.py @@ -9,15 +9,14 @@ from sqlglot import expressions as exp from sqlglot import parse_one +from pathlib import Path +from sqlmesh import model from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter -from sqlmesh.core.snapshot import SnapshotEvaluator, SnapshotChangeCategory +from sqlmesh.core.snapshot import SnapshotEvaluator, SnapshotChangeCategory, Snapshot from sqlmesh.core.model import load_sql_based_model +from sqlmesh.core.model.kind import SCDType2ByTimeKind from sqlmesh.core import dialect as d -from sqlmesh.core.engine_adapter.shared import ( - DataObject, - DataObjectType, - InsertOverwriteStrategy, -) +from sqlmesh.core.engine_adapter.shared import DataObject, DataObjectType, SourceQuery from sqlmesh.utils.date import to_ds from tests.core.engine_adapter import to_sql_calls @@ -342,46 +341,6 @@ def test_insert_overwrite_by_time_partition_supports_insert_overwrite_pandas_exi ] -def test_insert_overwrite_by_time_partition_replace_where_pandas( - make_mocked_engine_adapter: t.Callable, mocker: MockerFixture, make_temp_table_name: t.Callable -): - mocker.patch( - "sqlmesh.core.engine_adapter.mssql.MSSQLEngineAdapter.table_exists", - return_value=False, - ) - - adapter = make_mocked_engine_adapter(MSSQLEngineAdapter) - adapter.INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.REPLACE_WHERE - - temp_table_mock = mocker.patch("sqlmesh.core.engine_adapter.EngineAdapter._get_temp_table") - table_name = "test_table" - temp_table_id = "abcdefgh" - temp_table_mock.return_value = make_temp_table_name(table_name, temp_table_id) - - df = pd.DataFrame({"a": [1, 2], "ds": ["2022-01-01", "2022-01-02"]}) - adapter.insert_overwrite_by_time_partition( - table_name, - df, - start="2022-01-01", - end="2022-01-02", - time_formatter=lambda x, _: exp.Literal.string(to_ds(x)), - time_column="ds", - target_columns_to_types={ - "a": exp.DataType.build("INT"), - "ds": exp.DataType.build("STRING"), - }, - ) - adapter._connection_pool.get().bulk_copy.assert_called_with( - f"__temp_test_table_{temp_table_id}", [(1, "2022-01-01"), (2, "2022-01-02")] - ) - - assert to_sql_calls(adapter) == [ - f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '__temp_test_table_{temp_table_id}') EXEC('CREATE TABLE [__temp_test_table_{temp_table_id}] ([a] INTEGER, [ds] VARCHAR(MAX))');""", - f"""MERGE INTO [test_table] AS [__MERGE_TARGET__] USING (SELECT [a] AS [a], [ds] AS [ds] FROM (SELECT CAST([a] AS INTEGER) AS [a], CAST([ds] AS VARCHAR(MAX)) AS [ds] FROM [__temp_test_table_{temp_table_id}]) AS [_subquery] WHERE [ds] BETWEEN '2022-01-01' AND '2022-01-02') AS [__MERGE_SOURCE__] ON (1 = 0) WHEN NOT MATCHED BY SOURCE AND [ds] BETWEEN '2022-01-01' AND '2022-01-02' THEN DELETE WHEN NOT MATCHED THEN INSERT ([a], [ds]) VALUES ([a], [ds]);""", - f"DROP TABLE IF EXISTS [__temp_test_table_{temp_table_id}];", - ] - - def test_insert_append_pandas( make_mocked_engine_adapter: t.Callable, mocker: MockerFixture, make_temp_table_name: t.Callable ): @@ -874,7 +833,7 @@ def test_create_table_from_query(make_mocked_engine_adapter: t.Callable, mocker: columns_mock.assert_called_once_with(exp.table_("__temp_ctas_test_random_id", quoted=True)) # We don't want to drop anything other than LIMIT 0 - # See https://github.com/TobikoData/sqlmesh/issues/4048 + # See https://github.com/SQLMesh/sqlmesh/issues/4048 adapter.ctas( table_name="test_schema.test_table", query_or_df=parse_one( @@ -889,7 +848,7 @@ def test_create_table_from_query(make_mocked_engine_adapter: t.Callable, mocker: def test_replace_query_strategy(adapter: MSSQLEngineAdapter, mocker: MockerFixture): - # ref issue 4472: https://github.com/TobikoData/sqlmesh/issues/4472 + # ref issue 4472: https://github.com/SQLMesh/sqlmesh/issues/4472 # The FULL strategy calls EngineAdapter.replace_query() which calls _insert_overwrite_by_condition() should use DELETE+INSERT and not MERGE expressions = d.parse( f""" @@ -957,3 +916,89 @@ def test_replace_query_strategy(adapter: MSSQLEngineAdapter, mocker: MockerFixtu "TRUNCATE TABLE [test_table];", "INSERT INTO [test_table] ([a], [b]) SELECT [a] AS [a], [b] AS [b] FROM [db].[upstream_table] AS [upstream_table];", ] + + +def test_mssql_merge_exists_switches_strategy_from_truncate_to_merge( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(MSSQLEngineAdapter) + + query = exp.select("*").from_("source") + source_queries = [SourceQuery(query_factory=lambda: query)] + + # Test WITHOUT mssql_merge_exists, should use DELETE+INSERT strategy + base_insert_overwrite = mocker.patch( + "sqlmesh.core.engine_adapter.base.EngineAdapter._insert_overwrite_by_condition" + ) + + adapter._insert_overwrite_by_condition( + table_name="target", + source_queries=source_queries, + target_columns_to_types={ + "id": exp.DataType.build("INT"), + "value": exp.DataType.build("VARCHAR"), + }, + where=None, + ) + + # Should call base DELETE+INSERT strategy + assert base_insert_overwrite.called + base_insert_overwrite.reset_mock() + + # Test WITH mssql_merge_exists uses MERGE strategy + super_insert_overwrite = mocker.patch( + "sqlmesh.core.engine_adapter.base.EngineAdapterWithIndexSupport._insert_overwrite_by_condition" + ) + + adapter._insert_overwrite_by_condition( + table_name="target", + source_queries=source_queries, + target_columns_to_types={ + "id": exp.DataType.build("INT"), + "value": exp.DataType.build("VARCHAR"), + }, + where=None, + table_properties={"mssql_merge_exists": True}, + ) + + # Should call super's MERGE strategy, not base DELETE+INSERT + assert super_insert_overwrite.called + assert not base_insert_overwrite.called + + +def test_python_scd2_model_preserves_physical_properties(make_snapshot): + @model( + "test_schema.python_scd2_with_mssql_merge", + kind=SCDType2ByTimeKind( + unique_key=["id"], + valid_from_name="valid_from", + valid_to_name="valid_to", + updated_at_name="updated_at", + ), + columns={ + "id": "INT", + "value": "VARCHAR", + "updated_at": "TIMESTAMP", + "valid_from": "TIMESTAMP", + "valid_to": "TIMESTAMP", + }, + physical_properties={"mssql_merge_exists": True}, + ) + def python_scd2_model(context, **kwargs): + import pandas as pd + + return pd.DataFrame( + {"id": [1, 2], "value": ["a", "b"], "updated_at": ["2024-01-01", "2024-01-02"]} + ) + + m = model.get_registry()["test_schema.python_scd2_with_mssql_merge"].model( + module_path=Path("."), + path=Path("."), + dialect="tsql", + ) + + # verify model has physical_properties that trigger merge strategy + assert "mssql_merge_exists" in m.physical_properties + snapshot: Snapshot = make_snapshot(m) + assert snapshot.node.physical_properties == m.physical_properties + assert snapshot.node.physical_properties.get("mssql_merge_exists") diff --git a/tests/core/engine_adapter/test_postgres.py b/tests/core/engine_adapter/test_postgres.py index 6134126a41..ebcdd03f55 100644 --- a/tests/core/engine_adapter/test_postgres.py +++ b/tests/core/engine_adapter/test_postgres.py @@ -177,3 +177,108 @@ def test_server_version(make_mocked_engine_adapter: t.Callable, mocker: MockerFi del adapter.server_version fetchone_mock.return_value = ("15.13 (Debian 15.13-1.pgdg120+1)",) assert adapter.server_version == (15, 13) + + +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="postgres") + new_grants_config = {"SELECT": ["user1", "user2"], "INSERT": ["user3"]} + + current_grants = [("SELECT", "old_user"), ("UPDATE", "admin_user")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + + assert executed_sql == ( + "SELECT privilege_type, grantee FROM information_schema.role_table_grants " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = current_role AND grantee <> current_role" + ) + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user1", "user2"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user3"' in sql_calls + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "old_user"' in sql_calls + assert 'REVOKE UPDATE ON "test_schema"."test_table" FROM "admin_user"' in sql_calls + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="postgres") + new_grants_config = {"SELECT": ["user1", "user2", "user3"], "INSERT": ["user2", "user4"]} + + current_grants = [ + ("SELECT", "user1"), + ("SELECT", "user5"), + ("INSERT", "user2"), + ("UPDATE", "user3"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + + assert executed_sql == ( + "SELECT privilege_type, grantee FROM information_schema.role_table_grants " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = current_role AND grantee <> current_role" + ) + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user2", "user3"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user4"' in sql_calls + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "user5"' in sql_calls + assert 'REVOKE UPDATE ON "test_schema"."test_table" FROM "user3"' in sql_calls + + +def test_diff_grants_configs(make_mocked_engine_adapter: t.Callable): + new_grants = {"select": ["USER1", "USER2"], "insert": ["user3"]} + old_grants = {"SELECT": ["user1", "user4"], "UPDATE": ["user5"]} + + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + additions, removals = adapter._diff_grants_configs(new_grants, old_grants) + + assert additions["select"] == ["USER2"] + assert additions["insert"] == ["user3"] + + assert removals["SELECT"] == ["user4"] + assert removals["UPDATE"] == ["user5"] + + +def test_sync_grants_config_with_default_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(PostgresEngineAdapter) + relation = exp.to_table("test_table", dialect="postgres") # No schema + new_grants_config = {"SELECT": ["user1"], "INSERT": ["user2"]} + + currrent_grants = [("UPDATE", "old_user")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=currrent_grants) + get_schema_mock = mocker.patch.object(adapter, "_get_current_schema", return_value="public") + + adapter.sync_grants_config(relation, new_grants_config) + + get_schema_mock.assert_called_once() + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="postgres") + + assert executed_sql == ( + "SELECT privilege_type, grantee FROM information_schema.role_table_grants " + "WHERE table_schema = 'public' AND table_name = 'test_table' " + "AND grantor = current_role AND grantee <> current_role" + ) diff --git a/tests/core/engine_adapter/test_redshift.py b/tests/core/engine_adapter/test_redshift.py index c5e3dfff17..5438943556 100644 --- a/tests/core/engine_adapter/test_redshift.py +++ b/tests/core/engine_adapter/test_redshift.py @@ -9,7 +9,7 @@ from sqlglot import parse_one from sqlmesh.core.engine_adapter import RedshiftEngineAdapter -from sqlmesh.core.engine_adapter.shared import DataObject +from sqlmesh.core.engine_adapter.shared import DataObject, DataObjectType from sqlmesh.utils.errors import SQLMeshError from tests.core.engine_adapter import to_sql_calls @@ -83,6 +83,154 @@ def test_varchar_size_workaround(make_mocked_engine_adapter: t.Callable, mocker: ] +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="redshift") + new_grants_config = {"SELECT": ["user1", "user2"], "INSERT": ["user3"]} + + current_grants = [("SELECT", "old_user"), ("UPDATE", "legacy_user")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "old_user"' in sql_calls + assert 'REVOKE UPDATE ON "test_schema"."test_table" FROM "legacy_user"' in sql_calls + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user1", "user2"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user3"' in sql_calls + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_schema.test_table", dialect="redshift") + new_grants_config = { + "SELECT": ["user_shared", "user_new"], + "INSERT": ["user_shared", "user_writer"], + } + + current_grants = [ + ("SELECT", "user_shared"), + ("SELECT", "user_legacy"), + ("INSERT", "user_shared"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + assert 'REVOKE SELECT ON "test_schema"."test_table" FROM "user_legacy"' in sql_calls + assert 'GRANT SELECT ON "test_schema"."test_table" TO "user_new"' in sql_calls + assert 'GRANT INSERT ON "test_schema"."test_table" TO "user_writer"' in sql_calls + + +@pytest.mark.parametrize( + "table_type", + [ + (DataObjectType.TABLE), + (DataObjectType.VIEW), + (DataObjectType.MATERIALIZED_VIEW), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockerFixture, + table_type: DataObjectType, +) -> None: + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_schema.test_object", dialect="redshift") + + mocker.patch.object(adapter, "fetchall", return_value=[]) + + adapter.sync_grants_config(relation, {"SELECT": ["user_test"]}, table_type) + + sql_calls = to_sql_calls(adapter) + # we don't need to explicitly specify object_type for tables and views + assert sql_calls == [f'GRANT SELECT ON "test_schema"."test_object" TO "user_test"'] + + +def test_sync_grants_config_quotes(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table('"TestSchema"."TestTable"', dialect="redshift") + new_grants_config = {"SELECT": ["user1", "user2"], "INSERT": ["user3"]} + + current_grants = [("SELECT", "user_old"), ("UPDATE", "user_legacy")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'TestSchema' AND table_name = 'TestTable' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 4 + assert 'REVOKE SELECT ON "TestSchema"."TestTable" FROM "user_old"' in sql_calls + assert 'REVOKE UPDATE ON "TestSchema"."TestTable" FROM "user_legacy"' in sql_calls + assert 'GRANT SELECT ON "TestSchema"."TestTable" TO "user1", "user2"' in sql_calls + assert 'GRANT INSERT ON "TestSchema"."TestTable" TO "user3"' in sql_calls + + +def test_sync_grants_config_no_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(RedshiftEngineAdapter) + relation = exp.to_table("test_table", dialect="redshift") + new_grants_config = {"SELECT": ["user1"], "INSERT": ["user2"]} + + current_grants = [("UPDATE", "user_old")] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + get_schema_mock = mocker.patch.object(adapter, "_get_current_schema", return_value="public") + + adapter.sync_grants_config(relation, new_grants_config) + + get_schema_mock.assert_called_once() + + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="redshift") + expected_sql = ( + "SELECT privilege_type, grantee FROM information_schema.table_privileges " + "WHERE table_schema = 'public' AND table_name = 'test_table' " + "AND grantor = CURRENT_USER AND grantee <> CURRENT_USER" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + assert 'REVOKE UPDATE ON "test_table" FROM "user_old"' in sql_calls + assert 'GRANT SELECT ON "test_table" TO "user1"' in sql_calls + assert 'GRANT INSERT ON "test_table" TO "user2"' in sql_calls + + def test_create_table_from_query_exists_no_if_not_exists( adapter: t.Callable, mocker: MockerFixture ): diff --git a/tests/core/engine_adapter/test_snowflake.py b/tests/core/engine_adapter/test_snowflake.py index 9a1e068aa6..dcb6820297 100644 --- a/tests/core/engine_adapter/test_snowflake.py +++ b/tests/core/engine_adapter/test_snowflake.py @@ -4,12 +4,14 @@ import pytest from pytest_mock.plugin import MockerFixture from sqlglot import exp, parse_one +from sqlglot.optimizer.normalize_identifiers import normalize_identifiers import sqlmesh.core.dialect as d from sqlmesh.core.dialect import normalize_model_name +from sqlmesh.core.engine_adapter import SnowflakeEngineAdapter from sqlmesh.core.engine_adapter.base import EngineAdapter +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.model import load_sql_based_model -from sqlmesh.core.engine_adapter import SnowflakeEngineAdapter from sqlmesh.core.model.definition import SqlModel from sqlmesh.core.node import IntervalUnit from sqlmesh.utils.errors import SQLMeshError @@ -39,6 +41,38 @@ def test_get_temp_table(mocker: MockerFixture, make_mocked_engine_adapter: t.Cal assert value.sql(dialect=adapter.dialect) == '"CATALOG"."DB"."__temp_TEST_TABLE_abcdefgh"' +def test_get_data_objects_lowercases_columns( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +) -> None: + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter, patch_get_data_objects=False) + + adapter.get_current_catalog = mocker.Mock(return_value="TEST_CATALOG") + + adapter.fetchdf = mocker.Mock( + return_value=pd.DataFrame( # type: ignore[assignment] + [ + { + "CATALOG": "TEST_CATALOG", + "NAME": "MY_TABLE", + "SCHEMA_NAME": "PUBLIC", + "TYPE": "TABLE", + "CLUSTERING_KEY": "ID", + } + ] + ) + ) + + data_objects = adapter._get_data_objects("TEST_CATALOG.PUBLIC") + + assert len(data_objects) == 1 + data_object = data_objects[0] + assert data_object.catalog == "TEST_CATALOG" + assert data_object.schema_name == "PUBLIC" + assert data_object.name == "MY_TABLE" + assert data_object.type == DataObjectType.TABLE + assert data_object.clustering_key == "ID" + + @pytest.mark.parametrize( "current_warehouse, current_warehouse_exp, configured_warehouse, configured_warehouse_exp, should_change", [ @@ -89,7 +123,7 @@ def test_get_temp_table(mocker: MockerFixture, make_mocked_engine_adapter: t.Cal def test_session( mocker: MockerFixture, make_mocked_engine_adapter: t.Callable, - current_warehouse: t.Union[str, exp.Expression], + current_warehouse: t.Union[str, exp.Expr], current_warehouse_exp: str, configured_warehouse: t.Optional[str], configured_warehouse_exp: t.Optional[str], @@ -212,6 +246,204 @@ def test_multiple_column_comments(make_mocked_engine_adapter: t.Callable, mocker ] +def test_sync_grants_config(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table("test_db.test_schema.test_table", dialect="snowflake"), dialect="snowflake" + ) + new_grants_config = {"SELECT": ["ROLE role1", "ROLE role2"], "INSERT": ["ROLE role3"]} + + current_grants = [ + ("SELECT", "ROLE old_role"), + ("UPDATE", "ROLE legacy_role"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + "SELECT privilege_type, grantee FROM TEST_DB.INFORMATION_SCHEMA.TABLE_PRIVILEGES " + "WHERE table_catalog = 'TEST_DB' AND table_schema = 'TEST_SCHEMA' AND table_name = 'TEST_TABLE' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert 'GRANT SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "ROLE1"' in sql_calls + assert 'GRANT SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "ROLE2"' in sql_calls + assert 'GRANT INSERT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "ROLE3"' in sql_calls + assert ( + 'REVOKE SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" FROM ROLE "OLD_ROLE"' + in sql_calls + ) + assert ( + 'REVOKE UPDATE ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" FROM ROLE "LEGACY_ROLE"' + in sql_calls + ) + + +def test_sync_grants_config_with_overlaps( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table("test_db.test_schema.test_table", dialect="snowflake"), dialect="snowflake" + ) + new_grants_config = { + "SELECT": ["ROLE shared", "ROLE new_role"], + "INSERT": ["ROLE shared", "ROLE writer"], + } + + current_grants = [ + ("SELECT", "ROLE shared"), + ("SELECT", "ROLE legacy"), + ("INSERT", "ROLE shared"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + """SELECT privilege_type, grantee FROM TEST_DB.INFORMATION_SCHEMA.TABLE_PRIVILEGES """ + "WHERE table_catalog = 'TEST_DB' AND table_schema = 'TEST_SCHEMA' AND table_name = 'TEST_TABLE' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 3 + + assert ( + 'GRANT SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "NEW_ROLE"' in sql_calls + ) + assert ( + 'GRANT INSERT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" TO ROLE "WRITER"' in sql_calls + ) + assert ( + 'REVOKE SELECT ON TABLE "TEST_DB"."TEST_SCHEMA"."TEST_TABLE" FROM ROLE "LEGACY"' + in sql_calls + ) + + +@pytest.mark.parametrize( + "table_type, expected_keyword", + [ + (DataObjectType.TABLE, "TABLE"), + (DataObjectType.VIEW, "VIEW"), + (DataObjectType.MATERIALIZED_VIEW, "MATERIALIZED VIEW"), + (DataObjectType.MANAGED_TABLE, "DYNAMIC TABLE"), + ], +) +def test_sync_grants_config_object_kind( + make_mocked_engine_adapter: t.Callable, + mocker: MockerFixture, + table_type: DataObjectType, + expected_keyword: str, +) -> None: + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table("test_db.test_schema.test_object", dialect="snowflake"), dialect="snowflake" + ) + + mocker.patch.object(adapter, "fetchall", return_value=[]) + + adapter.sync_grants_config(relation, {"SELECT": ["ROLE test"]}, table_type) + + sql_calls = to_sql_calls(adapter) + assert sql_calls == [ + f'GRANT SELECT ON {expected_keyword} "TEST_DB"."TEST_SCHEMA"."TEST_OBJECT" TO ROLE "TEST"' + ] + + +def test_sync_grants_config_quotes(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table('"test_db"."test_schema"."test_table"', dialect="snowflake"), + dialect="snowflake", + ) + new_grants_config = {"SELECT": ["ROLE role1", "ROLE role2"], "INSERT": ["ROLE role3"]} + + current_grants = [ + ("SELECT", "ROLE old_role"), + ("UPDATE", "ROLE legacy_role"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + """SELECT privilege_type, grantee FROM "test_db".INFORMATION_SCHEMA.TABLE_PRIVILEGES """ + "WHERE table_catalog = 'test_db' AND table_schema = 'test_schema' AND table_name = 'test_table' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert 'GRANT SELECT ON TABLE "test_db"."test_schema"."test_table" TO ROLE "ROLE1"' in sql_calls + assert 'GRANT SELECT ON TABLE "test_db"."test_schema"."test_table" TO ROLE "ROLE2"' in sql_calls + assert 'GRANT INSERT ON TABLE "test_db"."test_schema"."test_table" TO ROLE "ROLE3"' in sql_calls + assert ( + 'REVOKE SELECT ON TABLE "test_db"."test_schema"."test_table" FROM ROLE "OLD_ROLE"' + in sql_calls + ) + assert ( + 'REVOKE UPDATE ON TABLE "test_db"."test_schema"."test_table" FROM ROLE "LEGACY_ROLE"' + in sql_calls + ) + + +def test_sync_grants_config_no_catalog_or_schema( + make_mocked_engine_adapter: t.Callable, mocker: MockerFixture +): + adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) + relation = normalize_identifiers( + exp.to_table('"TesT_Table"', dialect="snowflake"), dialect="snowflake" + ) + new_grants_config = {"SELECT": ["ROLE role1", "ROLE role2"], "INSERT": ["ROLE role3"]} + + current_grants = [ + ("SELECT", "ROLE old_role"), + ("UPDATE", "ROLE legacy_role"), + ] + fetchall_mock = mocker.patch.object(adapter, "fetchall", return_value=current_grants) + mocker.patch.object(adapter, "get_current_catalog", return_value="caTalog") + mocker.patch.object(adapter, "_get_current_schema", return_value="sChema") + + adapter.sync_grants_config(relation, new_grants_config) + + fetchall_mock.assert_called_once() + executed_query = fetchall_mock.call_args[0][0] + executed_sql = executed_query.sql(dialect="snowflake") + expected_sql = ( + """SELECT privilege_type, grantee FROM "caTalog".INFORMATION_SCHEMA.TABLE_PRIVILEGES """ + "WHERE table_catalog = 'caTalog' AND table_schema = 'sChema' AND table_name = 'TesT_Table' " + "AND grantor = CURRENT_ROLE() AND grantee <> CURRENT_ROLE()" + ) + assert executed_sql == expected_sql + + sql_calls = to_sql_calls(adapter) + assert len(sql_calls) == 5 + + assert 'GRANT SELECT ON TABLE "TesT_Table" TO ROLE "ROLE1"' in sql_calls + assert 'GRANT SELECT ON TABLE "TesT_Table" TO ROLE "ROLE2"' in sql_calls + assert 'GRANT INSERT ON TABLE "TesT_Table" TO ROLE "ROLE3"' in sql_calls + assert 'REVOKE SELECT ON TABLE "TesT_Table" FROM ROLE "OLD_ROLE"' in sql_calls + assert 'REVOKE UPDATE ON TABLE "TesT_Table" FROM ROLE "LEGACY_ROLE"' in sql_calls + + def test_df_to_source_queries_use_schema( make_mocked_engine_adapter: t.Callable, mocker: MockerFixture ): @@ -325,12 +557,12 @@ def test_create_managed_table(make_mocked_engine_adapter: t.Callable, mocker: Mo def test_drop_managed_table(make_mocked_engine_adapter: t.Callable, mocker: MockerFixture): adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter) - adapter.drop_managed_table(table_name=exp.parse_identifier("foo"), exists=False) - adapter.drop_managed_table(table_name=exp.parse_identifier("foo"), exists=True) + adapter.drop_managed_table(table_name="foo.bar", exists=False) + adapter.drop_managed_table(table_name="foo.bar", exists=True) assert to_sql_calls(adapter) == [ - 'DROP DYNAMIC TABLE "foo"', - 'DROP DYNAMIC TABLE IF EXISTS "foo"', + 'DROP DYNAMIC TABLE "foo"."bar"', + 'DROP DYNAMIC TABLE IF EXISTS "foo"."bar"', ] @@ -688,7 +920,7 @@ def test_clone_table(mocker: MockerFixture, make_mocked_engine_adapter: t.Callab adapter = make_mocked_engine_adapter(SnowflakeEngineAdapter, default_catalog="test_catalog") adapter.clone_table("target_table", "source_table") adapter.cursor.execute.assert_called_once_with( - 'CREATE TABLE "target_table" CLONE "source_table"' + 'CREATE TABLE IF NOT EXISTS "target_table" CLONE "source_table"' ) # Validate with transient type we create the clone table accordingly @@ -700,7 +932,7 @@ def test_clone_table(mocker: MockerFixture, make_mocked_engine_adapter: t.Callab "target_table", "source_table", rendered_physical_properties=rendered_physical_properties ) adapter.cursor.execute.assert_called_once_with( - 'CREATE TRANSIENT TABLE "target_table" CLONE "source_table"' + 'CREATE TRANSIENT TABLE IF NOT EXISTS "target_table" CLONE "source_table"' ) # Validate other engine adapters would work as usual even when we pass the properties @@ -710,7 +942,7 @@ def test_clone_table(mocker: MockerFixture, make_mocked_engine_adapter: t.Callab "target_table", "source_table", rendered_physical_properties=rendered_physical_properties ) adapter.cursor.execute.assert_called_once_with( - 'CREATE TABLE "target_table" CLONE "source_table"' + 'CREATE TABLE IF NOT EXISTS "target_table" CLONE "source_table"' ) diff --git a/tests/core/engine_adapter/test_spark.py b/tests/core/engine_adapter/test_spark.py index f1929639a2..d7c3127f05 100644 --- a/tests/core/engine_adapter/test_spark.py +++ b/tests/core/engine_adapter/test_spark.py @@ -66,14 +66,15 @@ def test_create_table_properties(make_mocked_engine_adapter: t.Callable): ) +@pytest.mark.parametrize("wap_enabled", [True, False]) def test_replace_query_table_properties_not_exists( - mocker: MockerFixture, make_mocked_engine_adapter: t.Callable + mocker: MockerFixture, make_mocked_engine_adapter: t.Callable, wap_enabled: bool ): mocker.patch( "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.table_exists", return_value=False, ) - adapter = make_mocked_engine_adapter(SparkEngineAdapter) + adapter = make_mocked_engine_adapter(SparkEngineAdapter, wap_enabled=wap_enabled) columns_to_types = { "cola": exp.DataType.build("INT"), @@ -89,10 +90,13 @@ def test_replace_query_table_properties_not_exists( table_properties={"a": exp.convert(1)}, ) - assert to_sql_calls(adapter) == [ + expected_sql_calls = [ "CREATE TABLE IF NOT EXISTS `test_table` USING ICEBERG PARTITIONED BY (`colb`) TBLPROPERTIES ('a'=1) AS SELECT CAST(`cola` AS INT) AS `cola`, CAST(`colb` AS STRING) AS `colb`, CAST(`colc` AS STRING) AS `colc` FROM (SELECT 1 AS `cola`, '2' AS `colb`, '3' AS `colc`) AS `_subquery`", - "INSERT INTO `test_table` SELECT * FROM `test_table`", ] + if wap_enabled: + expected_sql_calls.append("INSERT INTO `test_table` SELECT * FROM `test_table`") + + assert to_sql_calls(adapter) == expected_sql_calls def test_replace_query_table_properties_exists( @@ -220,7 +224,7 @@ def test_replace_query_self_ref_not_exists( lambda self: "spark_catalog", ) mocker.patch( - "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.get_current_database", + "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter._get_current_schema", side_effect=lambda: "default", ) @@ -279,7 +283,7 @@ def test_replace_query_self_ref_exists( return_value="spark_catalog", ) mocker.patch( - "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.get_current_database", + "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter._get_current_schema", return_value="default", ) @@ -825,13 +829,16 @@ def test_wap_publish(make_mocked_engine_adapter: t.Callable, mocker: MockerFixtu ) -def test_create_table_iceberg(mocker: MockerFixture, make_mocked_engine_adapter: t.Callable): +@pytest.mark.parametrize("wap_enabled", [True, False]) +def test_create_table_iceberg( + mocker: MockerFixture, make_mocked_engine_adapter: t.Callable, wap_enabled: bool +): mocker.patch( "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.table_exists", return_value=False, ) - adapter = make_mocked_engine_adapter(SparkEngineAdapter) + adapter = make_mocked_engine_adapter(SparkEngineAdapter, wap_enabled=wap_enabled) columns_to_types = { "cola": exp.DataType.build("INT"), @@ -846,10 +853,13 @@ def test_create_table_iceberg(mocker: MockerFixture, make_mocked_engine_adapter: storage_format="ICEBERG", ) - assert to_sql_calls(adapter) == [ + expected_sql_calls = [ "CREATE TABLE IF NOT EXISTS `test_table` (`cola` INT, `colb` STRING, `colc` STRING) USING ICEBERG PARTITIONED BY (`colb`)", - "INSERT INTO `test_table` SELECT * FROM `test_table`", ] + if wap_enabled: + expected_sql_calls.append("INSERT INTO `test_table` SELECT * FROM `test_table`") + + assert to_sql_calls(adapter) == expected_sql_calls def test_comments_hive(mocker: MockerFixture, make_mocked_engine_adapter: t.Callable): @@ -973,7 +983,7 @@ def test_create_table_with_wap(make_mocked_engine_adapter: t.Callable, mocker: M "sqlmesh.core.engine_adapter.spark.SparkEngineAdapter.table_exists", return_value=False, ) - adapter = make_mocked_engine_adapter(SparkEngineAdapter) + adapter = make_mocked_engine_adapter(SparkEngineAdapter, wap_enabled=True) adapter.create_table( "catalog.schema.table.branch_wap_12345", diff --git a/tests/core/engine_adapter/test_trino.py b/tests/core/engine_adapter/test_trino.py index 07c4657eb3..1bfe82b858 100644 --- a/tests/core/engine_adapter/test_trino.py +++ b/tests/core/engine_adapter/test_trino.py @@ -11,6 +11,7 @@ from sqlmesh.core.model import load_sql_based_model from sqlmesh.core.model.definition import SqlModel from sqlmesh.core.dialect import schema_ +from sqlmesh.utils.date import to_ds from sqlmesh.utils.errors import SQLMeshError from tests.core.engine_adapter import to_sql_calls @@ -403,6 +404,123 @@ def test_delta_timestamps(make_mocked_engine_adapter: t.Callable): } +def test_timestamp_mapping(): + """Test that timestamp_mapping config property is properly defined and accessible.""" + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + ) + + assert config._connection_factory_with_kwargs.keywords["source"] == "sqlmesh" + + adapter = config.create_engine_adapter() + assert adapter.timestamp_mapping is None + + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + source="my_source", + timestamp_mapping={ + "TIMESTAMP": "TIMESTAMP(6)", + "TIMESTAMP(3)": "TIMESTAMP WITH TIME ZONE", + }, + ) + assert config._connection_factory_with_kwargs.keywords["source"] == "my_source" + adapter = config.create_engine_adapter() + assert adapter.timestamp_mapping is not None + assert adapter.timestamp_mapping[exp.DataType.build("TIMESTAMP")] == exp.DataType.build( + "TIMESTAMP(6)" + ) + + +def test_delta_timestamps_with_custom_mapping(make_mocked_engine_adapter: t.Callable): + """Test that _apply_timestamp_mapping + _to_delta_ts respects custom timestamp_mapping.""" + # Create config with custom timestamp mapping + # Mapped columns are skipped by _to_delta_ts + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + timestamp_mapping={ + "TIMESTAMP": "TIMESTAMP(3)", + "TIMESTAMP(1)": "TIMESTAMP(3)", + "TIMESTAMP WITH TIME ZONE": "TIMESTAMP(6) WITH TIME ZONE", + "TIMESTAMP(1) WITH TIME ZONE": "TIMESTAMP(6) WITH TIME ZONE", + }, + ) + + adapter = make_mocked_engine_adapter( + TrinoEngineAdapter, timestamp_mapping=config.timestamp_mapping + ) + + ts3 = exp.DataType.build("timestamp(3)") + ts6_tz = exp.DataType.build("timestamp(6) with time zone") + + columns_to_types = { + "ts": exp.DataType.build("TIMESTAMP"), + "ts_1": exp.DataType.build("TIMESTAMP(1)"), + "ts_tz": exp.DataType.build("TIMESTAMP WITH TIME ZONE"), + "ts_tz_1": exp.DataType.build("TIMESTAMP(1) WITH TIME ZONE"), + } + + # Apply mapping first, then convert to delta types (skipping mapped columns) + mapped_columns_to_types, mapped_column_names = adapter._apply_timestamp_mapping( + columns_to_types + ) + delta_columns_to_types = adapter._to_delta_ts(mapped_columns_to_types, mapped_column_names) + + # All types were mapped, so _to_delta_ts skips them - they keep their mapped types + assert delta_columns_to_types == { + "ts": ts3, + "ts_1": ts3, + "ts_tz": ts6_tz, + "ts_tz_1": ts6_tz, + } + + +def test_delta_timestamps_with_partial_mapping(make_mocked_engine_adapter: t.Callable): + """Test that _apply_timestamp_mapping + _to_delta_ts uses custom mapping for specified types.""" + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + timestamp_mapping={ + "TIMESTAMP": "TIMESTAMP(3)", + }, + ) + + adapter = make_mocked_engine_adapter( + TrinoEngineAdapter, timestamp_mapping=config.timestamp_mapping + ) + + ts3 = exp.DataType.build("TIMESTAMP(3)") + ts6 = exp.DataType.build("timestamp(6)") + ts3_tz = exp.DataType.build("timestamp(3) with time zone") + + columns_to_types = { + "ts": exp.DataType.build("TIMESTAMP"), + "ts_1": exp.DataType.build("TIMESTAMP(1)"), + "ts_tz": exp.DataType.build("TIMESTAMP WITH TIME ZONE"), + } + + # Apply mapping first, then convert to delta types (skipping mapped columns) + mapped_columns_to_types, mapped_column_names = adapter._apply_timestamp_mapping( + columns_to_types + ) + delta_columns_to_types = adapter._to_delta_ts(mapped_columns_to_types, mapped_column_names) + + # TIMESTAMP is in mapping → TIMESTAMP(3), skipped by _to_delta_ts + # TIMESTAMP(1) is NOT in mapping, uses default TIMESTAMP → ts6 + # TIMESTAMP WITH TIME ZONE is NOT in mapping, uses default TIMESTAMPTZ → ts3_tz + assert delta_columns_to_types == { + "ts": ts3, # Mapped to TIMESTAMP(3), skipped by _to_delta_ts + "ts_1": ts6, # Not in mapping, uses default + "ts_tz": ts3_tz, # Not in mapping, uses default + } + + def test_table_format(trino_mocked_engine_adapter: TrinoEngineAdapter, mocker: MockerFixture): adapter = trino_mocked_engine_adapter mocker.patch( @@ -668,7 +786,7 @@ def test_replace_table_catalog_support( adapter.replace_query( table_name=".".join([catalog_name, "schema", "test_table"]), - query_or_df=parse_one("SELECT 1 AS col"), + query_or_df=t.cast(exp.Query, parse_one("SELECT 1 AS col")), ) sql_calls = to_sql_calls(adapter) @@ -683,3 +801,148 @@ def test_replace_table_catalog_support( sql_calls[0] == f'CREATE TABLE IF NOT EXISTS "{catalog_name}"."schema"."test_table" AS SELECT 1 AS "col"' ) + + +@pytest.mark.parametrize( + "catalog_type_overrides", [{}, {"my_catalog": "hive"}, {"other_catalog": "iceberg"}] +) +def test_insert_overwrite_time_partition_hive( + make_mocked_engine_adapter: t.Callable, catalog_type_overrides: t.Dict[str, str] +): + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + catalog_type_overrides=catalog_type_overrides, + ) + adapter: TrinoEngineAdapter = make_mocked_engine_adapter( + TrinoEngineAdapter, catalog_type_overrides=config.catalog_type_overrides + ) + adapter.fetchone = MagicMock(return_value=None) # type: ignore + + adapter.insert_overwrite_by_time_partition( + table_name=".".join(["my_catalog", "schema", "test_table"]), + query_or_df=t.cast(exp.Query, parse_one("SELECT a, b FROM tbl")), + start="2022-01-01", + end="2022-01-02", + time_column="b", + time_formatter=lambda x, _: exp.Literal.string(to_ds(x)), + target_columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, + ) + + assert to_sql_calls(adapter) == [ + "SET SESSION my_catalog.insert_existing_partitions_behavior='OVERWRITE'", + 'INSERT INTO "my_catalog"."schema"."test_table" ("a", "b") SELECT "a", "b" FROM (SELECT "a", "b" FROM "tbl") AS "_subquery" WHERE "b" BETWEEN \'2022-01-01\' AND \'2022-01-02\'', + "SET SESSION my_catalog.insert_existing_partitions_behavior='APPEND'", + ] + + +@pytest.mark.parametrize( + "catalog_type_overrides", + [ + {"my_catalog": "iceberg"}, + {"my_catalog": "unknown"}, + ], +) +def test_insert_overwrite_time_partition_iceberg( + make_mocked_engine_adapter: t.Callable, catalog_type_overrides: t.Dict[str, str] +): + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + catalog_type_overrides=catalog_type_overrides, + ) + adapter: TrinoEngineAdapter = make_mocked_engine_adapter( + TrinoEngineAdapter, catalog_type_overrides=config.catalog_type_overrides + ) + adapter.fetchone = MagicMock(return_value=None) # type: ignore + + adapter.insert_overwrite_by_time_partition( + table_name=".".join(["my_catalog", "schema", "test_table"]), + query_or_df=t.cast(exp.Query, parse_one("SELECT a, b FROM tbl")), + start="2022-01-01", + end="2022-01-02", + time_column="b", + time_formatter=lambda x, _: exp.Literal.string(to_ds(x)), + target_columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, + ) + + assert to_sql_calls(adapter) == [ + 'DELETE FROM "my_catalog"."schema"."test_table" WHERE "b" BETWEEN \'2022-01-01\' AND \'2022-01-02\'', + 'INSERT INTO "my_catalog"."schema"."test_table" ("a", "b") SELECT "a", "b" FROM (SELECT "a", "b" FROM "tbl") AS "_subquery" WHERE "b" BETWEEN \'2022-01-01\' AND \'2022-01-02\'', + ] + + +def test_delta_timestamps_with_non_timestamp_columns(make_mocked_engine_adapter: t.Callable): + """Test that _apply_timestamp_mapping + _to_delta_ts handles non-timestamp columns.""" + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + timestamp_mapping={ + "TIMESTAMP": "TIMESTAMP(3)", + }, + ) + + adapter = make_mocked_engine_adapter( + TrinoEngineAdapter, timestamp_mapping=config.timestamp_mapping + ) + + ts3 = exp.DataType.build("TIMESTAMP(3)") + ts6 = exp.DataType.build("timestamp(6)") + + columns_to_types = { + "ts": exp.DataType.build("TIMESTAMP"), + "ts_1": exp.DataType.build("TIMESTAMP(1)"), + "int_col": exp.DataType.build("INT"), + "varchar_col": exp.DataType.build("VARCHAR(100)"), + "decimal_col": exp.DataType.build("DECIMAL(10,2)"), + } + + # Apply mapping first, then convert to delta types (skipping mapped columns) + mapped_columns_to_types, mapped_column_names = adapter._apply_timestamp_mapping( + columns_to_types + ) + delta_columns_to_types = adapter._to_delta_ts(mapped_columns_to_types, mapped_column_names) + + # TIMESTAMP is in mapping → TIMESTAMP(3), skipped by _to_delta_ts + # TIMESTAMP(1) is NOT in mapping (exact match), uses default TIMESTAMP → ts6 + # Non-timestamp columns should pass through unchanged + assert delta_columns_to_types == { + "ts": ts3, # Mapped to TIMESTAMP(3), skipped by _to_delta_ts + "ts_1": ts6, # Not in mapping, uses default + "int_col": exp.DataType.build("INT"), + "varchar_col": exp.DataType.build("VARCHAR(100)"), + "decimal_col": exp.DataType.build("DECIMAL(10,2)"), + } + + +def test_delta_timestamps_with_empty_mapping(make_mocked_engine_adapter: t.Callable): + """Test that _to_delta_ts handles empty custom mapping dictionary.""" + config = TrinoConnectionConfig( + user="user", + host="host", + catalog="catalog", + timestamp_mapping={}, + ) + + adapter = make_mocked_engine_adapter( + TrinoEngineAdapter, timestamp_mapping=config.timestamp_mapping + ) + + ts6 = exp.DataType.build("timestamp(6)") + ts3_tz = exp.DataType.build("timestamp(3) with time zone") + + columns_to_types = { + "ts": exp.DataType.build("TIMESTAMP"), + "ts_tz": exp.DataType.build("TIMESTAMP WITH TIME ZONE"), + } + + delta_columns_to_types = adapter._to_delta_ts(columns_to_types) + + # With empty custom mapping, should fall back to defaults + assert delta_columns_to_types == { + "ts": ts6, + "ts_tz": ts3_tz, + } diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/.gitkeep b/tests/core/integration/__init__.py similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/.gitkeep rename to tests/core/integration/__init__.py diff --git a/tests/core/integration/conftest.py b/tests/core/integration/conftest.py new file mode 100644 index 0000000000..99875e5974 --- /dev/null +++ b/tests/core/integration/conftest.py @@ -0,0 +1,8 @@ +import pytest +from pytest_mock.plugin import MockerFixture + + +@pytest.fixture(autouse=True) +def mock_choices(mocker: MockerFixture): + mocker.patch("sqlmesh.core.console.TerminalConsole._get_snapshot_change_category") + mocker.patch("sqlmesh.core.console.TerminalConsole._prompt_backfill") diff --git a/tests/core/integration/test_audits.py b/tests/core/integration/test_audits.py new file mode 100644 index 0000000000..457974fdac --- /dev/null +++ b/tests/core/integration/test_audits.py @@ -0,0 +1,348 @@ +from __future__ import annotations + +import typing as t +from textwrap import dedent +import pytest +from pathlib import Path +import time_machine +from sqlglot import exp +from IPython.utils.capture import capture_output + +from sqlmesh.core.config import ( + Config, + ModelDefaultsConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.utils.errors import ( + PlanError, +) +from tests.utils.test_helpers import use_terminal_console +from tests.utils.test_filesystem import create_temp_file + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@use_terminal_console +def test_audit_only_metadata_change(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Add a new audit + model = context.get_model("sushi.waiter_revenue_by_day") + audits = model.audits.copy() + audits.append(("number_of_rows", {"threshold": exp.Literal.number(1)})) + model = model.copy(update={"audits": audits}) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + assert len(plan.new_snapshots) == 2 + assert all(s.change_category.is_metadata for s in plan.new_snapshots) + assert not plan.missing_intervals + + with capture_output() as output: + context.apply(plan) + + assert "Auditing models" in output.stdout + assert model.name in output.stdout + + +@use_terminal_console +def test_audits_running_on_metadata_changes(tmp_path: Path): + def setup_senario(model_before: str, model_after: str): + models_dir = Path("models") + create_temp_file(tmp_path, models_dir / "test.sql", model_before) + + # Create first snapshot + context = Context(paths=tmp_path, config=Config()) + context.plan("prod", no_prompts=True, auto_apply=True) + + # Create second (metadata) snapshot + create_temp_file(tmp_path, models_dir / "test.sql", model_after) + context.load() + + with capture_output() as output: + with pytest.raises(PlanError): + context.plan("prod", no_prompts=True, auto_apply=True) + + assert 'Failed models\n\n "model"' in output.stdout + + return output + + # Ensure incorrect audits (bad data, incorrect definition etc) are evaluated immediately + output = setup_senario( + "MODEL (name model); SELECT NULL AS col", + "MODEL (name model, audits (not_null(columns=[col]))); SELECT NULL AS col", + ) + assert "'not_null' audit error: 1 row failed" in output.stdout + + output = setup_senario( + "MODEL (name model); SELECT NULL AS col", + "MODEL (name model, audits (not_null(columns=[this_col_does_not_exist]))); SELECT NULL AS col", + ) + assert ( + 'Binder Error: Referenced column "this_col_does_not_exist" not found in \nFROM clause!' + in output.stdout + ) + + +@pytest.mark.slow +def test_default_audits_applied_in_plan(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + + # Create a model with data that will pass the audits + create_temp_file( + tmp_path, + models_dir / "orders.sql", + dedent(""" + MODEL ( + name test.orders, + kind FULL + ); + + SELECT + 1 AS order_id, + 'customer_1' AS customer_id, + 100.50 AS amount, + '2024-01-01'::DATE AS order_date + UNION ALL + SELECT + 2 AS order_id, + 'customer_2' AS customer_id, + 200.75 AS amount, + '2024-01-02'::DATE AS order_date + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", + audits=[ + "not_null(columns := [order_id, customer_id])", + "unique_values(columns := [order_id])", + ], + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Create and apply plan, here audits should pass + plan = context.plan("prod", no_prompts=True) + context.apply(plan) + + # Verify model has the default audits + model = context.get_model("test.orders") + assert len(model.audits) == 2 + + audit_names = [audit[0] for audit in model.audits] + assert "not_null" in audit_names + assert "unique_values" in audit_names + + # Verify audit arguments are preserved + for audit_name, audit_args in model.audits: + if audit_name == "not_null": + assert "columns" in audit_args + columns = [col.name for col in audit_args["columns"].expressions] + assert "order_id" in columns + assert "customer_id" in columns + elif audit_name == "unique_values": + assert "columns" in audit_args + columns = [col.name for col in audit_args["columns"].expressions] + assert "order_id" in columns + + +@pytest.mark.slow +def test_default_audits_fail_on_bad_data(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + + # Create a model with data that violates NOT NULL constraint + create_temp_file( + tmp_path, + models_dir / "bad_orders.sql", + dedent(""" + MODEL ( + name test.bad_orders, + kind FULL + ); + + SELECT + 1 AS order_id, + NULL AS customer_id, -- This violates NOT NULL + 100.50 AS amount, + '2024-01-01'::DATE AS order_date + UNION ALL + SELECT + 2 AS order_id, + 'customer_2' AS customer_id, + 200.75 AS amount, + '2024-01-02'::DATE AS order_date + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", audits=["not_null(columns := [customer_id])"] + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Plan should fail due to audit failure + with pytest.raises(PlanError): + context.plan("prod", no_prompts=True, auto_apply=True) + + +@pytest.mark.slow +def test_default_audits_with_model_specific_audits(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + audits_dir = tmp_path / "audits" + audits_dir.mkdir(exist_ok=True) + + create_temp_file( + tmp_path, + audits_dir / "range_check.sql", + dedent(""" + AUDIT ( + name range_check + ); + + SELECT * FROM @this_model + WHERE @column < @min_value OR @column > @max_value + """), + ) + + # Create a model with its own audits in addition to defaults + create_temp_file( + tmp_path, + models_dir / "products.sql", + dedent(""" + MODEL ( + name test.products, + kind FULL, + audits ( + range_check(column := price, min_value := 0, max_value := 10000) + ) + ); + + SELECT + 1 AS product_id, + 'Widget' AS product_name, + 99.99 AS price + UNION ALL + SELECT + 2 AS product_id, + 'Gadget' AS product_name, + 149.99 AS price + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", + audits=[ + "not_null(columns := [product_id, product_name])", + "unique_values(columns := [product_id])", + ], + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Create and apply plan + plan = context.plan("prod", no_prompts=True) + context.apply(plan) + + # Verify model has both default and model-specific audits + model = context.get_model("test.products") + assert len(model.audits) == 3 + + audit_names = [audit[0] for audit in model.audits] + assert "not_null" in audit_names + assert "unique_values" in audit_names + assert "range_check" in audit_names + + # Verify audit execution order, default audits first then model-specific + assert model.audits[0][0] == "not_null" + assert model.audits[1][0] == "unique_values" + assert model.audits[2][0] == "range_check" + + +@pytest.mark.slow +def test_default_audits_with_custom_audit_definitions(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir(exist_ok=True) + audits_dir = tmp_path / "audits" + audits_dir.mkdir(exist_ok=True) + + # Create custom audit definition + create_temp_file( + tmp_path, + audits_dir / "positive_amount.sql", + dedent(""" + AUDIT ( + name positive_amount + ); + + SELECT * FROM @this_model + WHERE @column <= 0 + """), + ) + + # Create a model + create_temp_file( + tmp_path, + models_dir / "transactions.sql", + dedent(""" + MODEL ( + name test.transactions, + kind FULL + ); + + SELECT + 1 AS transaction_id, + 'TXN001' AS transaction_code, + 250.00 AS amount, + '2024-01-01'::DATE AS transaction_date + UNION ALL + SELECT + 2 AS transaction_id, + 'TXN002' AS transaction_code, + 150.00 AS amount, + '2024-01-02'::DATE AS transaction_date + """), + ) + + config = Config( + model_defaults=ModelDefaultsConfig( + dialect="duckdb", + audits=[ + "not_null(columns := [transaction_id, transaction_code])", + "unique_values(columns := [transaction_id])", + "positive_amount(column := amount)", + ], + ) + ) + + context = Context(paths=tmp_path, config=config) + + # Create and apply plan + plan = context.plan("prod", no_prompts=True) + context.apply(plan) + + # Verify model has all default audits including custom + model = context.get_model("test.transactions") + assert len(model.audits) == 3 + + audit_names = [audit[0] for audit in model.audits] + assert "not_null" in audit_names + assert "unique_values" in audit_names + assert "positive_amount" in audit_names + + # Verify custom audit arguments + for audit_name, audit_args in model.audits: + if audit_name == "positive_amount": + assert "column" in audit_args + assert audit_args["column"].name == "amount" diff --git a/tests/core/integration/test_auto_restatement.py b/tests/core/integration/test_auto_restatement.py new file mode 100644 index 0000000000..1bda373a8f --- /dev/null +++ b/tests/core/integration/test_auto_restatement.py @@ -0,0 +1,219 @@ +from __future__ import annotations + +import typing as t +import pandas as pd # noqa: TID253 +import pytest +import time_machine +from sqlglot import exp + +from sqlmesh.core import dialect as d +from sqlmesh.core.macros import macro +from sqlmesh.core.model import ( + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.utils.date import to_timestamp + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 01:00:00 UTC") +def test_run_auto_restatement(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + context.engine_adapter.execute( + "CREATE TABLE _test_auto_restatement_intervals (name STRING, start_ds STRING, end_ds STRING)" + ) + + @macro() + def record_intervals( + evaluator, name: exp.Expr, start: exp.Expr, end: exp.Expr, **kwargs: t.Any + ) -> None: + if evaluator.runtime_stage == "evaluating": + evaluator.engine_adapter.insert_append( + "_test_auto_restatement_intervals", + pd.DataFrame({"name": [name.name], "start_ds": [start.name], "end_ds": [end.name]}), + ) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday + auto_restatement_intervals 3, + ), + start '2023-01-01', + ); + + @record_intervals('new_model', @start_ds, @end_ds); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + context.upsert_model(new_model) + + new_model_downstream_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model_downstream, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + ), + cron '@hourly', + ); + + @record_intervals('new_model_downstream', @start_ts, @end_ts); + + SELECT * FROM memory.sushi.new_model; + """ + ) + new_model_downstream = load_sql_based_model(new_model_downstream_expr) + context.upsert_model(new_model_downstream) + + plan = context.plan_builder("prod").build() + context.apply(plan) + + with time_machine.travel("2023-01-08 06:01:00 UTC"): + assert context.run() + + recorded_intervals_df = context.engine_adapter.fetchdf( + "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model'" + ) + # The first interval is the first backfill and the second interval should be the 3 auto restated intervals + assert recorded_intervals_df.to_dict() == { + "start_ds": {0: "2023-01-01", 1: "2023-01-05"}, + "end_ds": {0: "2023-01-07", 1: "2023-01-07"}, + } + recorded_intervals_downstream_df = context.engine_adapter.fetchdf( + "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model_downstream'" + ) + # The first interval is the first backfill, the second interval should be the 3 days of restated intervals, and + # the third interval should catch up to the current hour + assert recorded_intervals_downstream_df.to_dict() == { + "start_ds": { + 0: "2023-01-01 00:00:00", + 1: "2023-01-05 00:00:00", + 2: "2023-01-08 01:00:00", + }, + "end_ds": { + 0: "2023-01-08 00:59:59.999999", + 1: "2023-01-07 23:59:59.999999", + 2: "2023-01-08 05:59:59.999999", + }, + } + + snapshot = context.get_snapshot(new_model.name) + snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ + snapshot.snapshot_id + ] + assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") + assert not snapshot.pending_restatement_intervals + + snapshot_downstream = context.get_snapshot(new_model_downstream.name) + snapshot_downstream = context.state_sync.state_sync.get_snapshots( + [snapshot_downstream.snapshot_id] + )[snapshot_downstream.snapshot_id] + assert not snapshot_downstream.next_auto_restatement_ts + assert not snapshot_downstream.pending_restatement_intervals + + +@time_machine.travel("2023-01-08 01:00:00 UTC") +def test_run_auto_restatement_plan_preview(init_and_plan_context: t.Callable): + context, init_plan = init_and_plan_context("examples/sushi") + context.apply(init_plan) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + auto_restatement_cron '0 6 * * 7', + ), + start '2023-01-01', + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + context.upsert_model(new_model) + snapshot = context.get_snapshot(new_model.name) + + plan_dev = context.plan_builder("dev").build() + # Make sure that a limited preview is computed by default + assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") + assert plan_dev.missing_intervals == [ + SnapshotIntervals( + snapshot.snapshot_id, + [(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ) + ] + assert not plan_dev.deployability_index.is_deployable(snapshot.snapshot_id) + context.apply(plan_dev) + + plan_prod = context.plan_builder("prod").build() + assert plan_prod.missing_intervals == [ + SnapshotIntervals( + context.get_snapshot(new_model.name).snapshot_id, + [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ) + ] + context.apply(plan_prod) + + +@time_machine.travel("2023-01-08 01:00:00 UTC") +def test_run_auto_restatement_failure(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + @macro() + def fail_auto_restatement(evaluator, start: exp.Expr, **kwargs: t.Any) -> None: + if evaluator.runtime_stage == "evaluating" and start.name != "2023-01-01": + raise Exception("Failed") + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday + auto_restatement_intervals 3, + ), + start '2023-01-01', + ); + + @fail_auto_restatement(@start_ds); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + context.upsert_model(new_model) + + plan = context.plan_builder("prod").build() + context.apply(plan) + + with time_machine.travel("2023-01-08 06:01:00 UTC"): + run_status = context.run() + assert run_status.is_failure + + snapshot = context.get_snapshot(new_model.name) + snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ + snapshot.snapshot_id + ] + assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") + assert snapshot.pending_restatement_intervals == [ + (to_timestamp("2023-01-05"), to_timestamp("2023-01-08")) + ] diff --git a/tests/core/integration/test_aux_commands.py b/tests/core/integration/test_aux_commands.py new file mode 100644 index 0000000000..326e81e0c1 --- /dev/null +++ b/tests/core/integration/test_aux_commands.py @@ -0,0 +1,367 @@ +from __future__ import annotations + +import typing as t +from unittest.mock import patch +import pytest +from pathlib import Path +from sqlmesh.core.config.naming import NameInferenceConfig +from sqlmesh.core.model.common import ParsableSql +import time_machine +from pytest_mock.plugin import MockerFixture + +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.core.model import ( + SqlModel, +) +from sqlmesh.utils.errors import ( + SQLMeshError, +) +from sqlmesh.utils.date import now +from tests.conftest import DuckDBMetadata +from tests.utils.test_helpers import use_terminal_console +from tests.utils.test_filesystem import create_temp_file +from tests.core.integration.utils import add_projection_to_model, apply_to_environment + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_table_name(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert snapshot + assert ( + context.table_name("sushi.waiter_revenue_by_day", "prod") + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" + ) + + with pytest.raises(SQLMeshError, match="Environment 'dev' was not found."): + context.table_name("sushi.waiter_revenue_by_day", "dev") + + with pytest.raises( + SQLMeshError, match="Model 'sushi.missing' was not found in environment 'prod'." + ): + context.table_name("sushi.missing", "prod") + + # Add a new projection + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("dev_a", auto_apply=True, no_prompts=True, skip_tests=True) + + new_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert new_snapshot.version != snapshot.version + + assert ( + context.table_name("sushi.waiter_revenue_by_day", "dev_a") + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{new_snapshot.version}" + ) + + # Make a forward-only change + context.upsert_model(model, stamp="forward_only") + + context.plan("dev_b", auto_apply=True, no_prompts=True, skip_tests=True, forward_only=True) + + forward_only_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert forward_only_snapshot.version == snapshot.version + assert forward_only_snapshot.dev_version != snapshot.version + + assert ( + context.table_name("sushi.waiter_revenue_by_day", "dev_b") + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{forward_only_snapshot.dev_version}__dev" + ) + + assert ( + context.table_name("sushi.waiter_revenue_by_day", "dev_b", prod=True) + == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" + ) + + +def test_janitor_cleanup_order(mocker: MockerFixture, tmp_path: Path): + def setup_scenario(): + models_dir = tmp_path / "models" + + if not models_dir.exists(): + models_dir.mkdir() + + model1_path = models_dir / "model1.sql" + + with open(model1_path, "w") as f: + f.write("MODEL(name test.model1, kind FULL); SELECT 1 AS col") + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + ) + ctx = Context(paths=[tmp_path], config=config) + + ctx.plan("dev", no_prompts=True, auto_apply=True) + + model1_snapshot = ctx.get_snapshot("test.model1") + + # Delete the model file to cause a snapshot expiration + model1_path.unlink() + + ctx.load() + + ctx.plan("dev", no_prompts=True, auto_apply=True) + + # Invalidate the environment to cause an environment cleanup + ctx.invalidate_environment("dev") + + try: + ctx._run_janitor(ignore_ttl=True) + except: + pass + + return ctx, model1_snapshot + + # Case 1: Assume that the snapshot cleanup yields an error, the snapshot records + # should still exist in the state sync so the next janitor can retry + mocker.patch( + "sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup", + side_effect=Exception("snapshot cleanup error"), + ) + ctx, model1_snapshot = setup_scenario() + + # - Check that the snapshot record exists in the state sync + state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) + assert state_snapshot + + # - Run the janitor again, this time it should succeed + mocker.patch("sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup") + ctx._run_janitor(ignore_ttl=True) + + # - Check that the snapshot record does not exist in the state sync anymore + state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) + assert not state_snapshot + + # Case 2: Assume that the view cleanup yields an error, the enviroment + # record should still exist + mocker.patch( + "sqlmesh.core.context.cleanup_expired_views", side_effect=Exception("view cleanup error") + ) + ctx, model1_snapshot = setup_scenario() + + views = ctx.fetchdf("FROM duckdb_views() SELECT * EXCLUDE(sql) WHERE NOT internal") + assert views.empty + + # - Check that the environment record exists in the state sync + assert ctx.state_sync.get_environment("dev") + + # - Run the janitor again, this time it should succeed + mocker.patch("sqlmesh.core.context.cleanup_expired_views") + ctx._run_janitor(ignore_ttl=True) + + # - Check that the environment record does not exist in the state sync anymore + assert not ctx.state_sync.get_environment("dev") + + +@use_terminal_console +def test_destroy(copy_to_temp_path): + # Testing project with two gateways to verify cleanup is performed across engines + paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") + path = Path(paths[0]) + first_db_path = str(path / "db_1.db") + second_db_path = str(path / "db_2.db") + + config = Config( + gateways={ + "first": GatewayConfig( + connection=DuckDBConnectionConfig(database=first_db_path), + variables={"overriden_var": "gateway_1"}, + ), + "second": GatewayConfig( + connection=DuckDBConnectionConfig(database=second_db_path), + variables={"overriden_var": "gateway_2"}, + ), + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + model_naming=NameInferenceConfig(infer_names=True), + default_gateway="first", + gateway_managed_virtual_layer=True, + variables={"overriden_var": "global", "global_one": 88}, + ) + + context = Context(paths=paths, config=config) + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 4 + context.apply(plan) + + # Confirm cache exists + cache_path = Path(path) / ".cache" + assert cache_path.exists() + assert len(list(cache_path.iterdir())) > 0 + + model = context.get_model("db_1.first_schema.model_one") + + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder().build() + context.apply(plan) + + state_environments = context.state_reader.get_environments() + state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) + + assert len(state_snapshots) == len(state_environments[0].snapshots) + + # Create dev environment with changed models + model = context.get_model("db_2.second_schema.model_one") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + model = context.get_model("first_schema.model_two") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder("dev").build() + context.apply(plan) + + dev_environment = context.state_sync.get_environment("dev") + assert dev_environment is not None + + state_environments = context.state_reader.get_environments() + state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) + assert ( + len(state_snapshots) + == len(state_environments[0].snapshots) + == len(state_environments[1].snapshots) + ) + + # The state tables at this point should be able to be retrieved + state_tables = { + "_environments", + "_snapshots", + "_intervals", + "_auto_restatements", + "_environment_statements", + "_intervals", + "_versions", + } + for table_name in state_tables: + context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") + + # The actual tables as well + context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_one") + context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_two") + context.fetchdf(f"SELECT * FROM db_1.first_schema.model_one") + context.fetchdf(f"SELECT * FROM db_1.first_schema.model_two") + + # Use the destroy command to remove all data objects and state + # Mock the console confirmation to automatically return True + with patch.object(context.console, "_confirm", return_value=True): + context._destroy() + + # Ensure all tables have been removed + for table_name in state_tables: + with pytest.raises( + Exception, match=f"Catalog Error: Table with name {table_name} does not exist!" + ): + context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") + + # Validate tables have been deleted as well + with pytest.raises( + Exception, match=r"Catalog Error: Table with name.*model_two.*does not exist" + ): + context.fetchdf("SELECT * FROM db_1.first_schema.model_two") + with pytest.raises( + Exception, match=r"Catalog Error: Table with name.*model_one.*does not exist" + ): + context.fetchdf("SELECT * FROM db_1.first_schema.model_one") + + with pytest.raises( + Exception, match=r"Catalog Error: Table with name.*model_two.*does not exist" + ): + context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_two") + with pytest.raises( + Exception, match=r"Catalog Error: Table with name.*model_one.*does not exist" + ): + context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_one") + + # Ensure the cache has been removed + assert not cache_path.exists() + + +@use_terminal_console +def test_render_path_instead_of_model(tmp_path: Path): + create_temp_file(tmp_path, Path("models/test.sql"), "MODEL (name test_model); SELECT 1 AS col") + ctx = Context(paths=tmp_path, config=Config()) + + # Case 1: Fail gracefully when the user is passing in a path instead of a model name + for test_model in ["models/test.sql", "models/test.py"]: + with pytest.raises( + SQLMeshError, + match="Resolving models by path is not supported, please pass in the model name instead.", + ): + ctx.render(test_model) + + # Case 2: Fail gracefully when the model name is not found + with pytest.raises(SQLMeshError, match="Cannot find model with name 'incorrect_model'"): + ctx.render("incorrect_model") + + # Case 3: Render the model successfully + assert ctx.render("test_model").sql() == 'SELECT 1 AS "col"' + + +def test_invalidating_environment(sushi_context: Context): + apply_to_environment(sushi_context, "dev") + start_environment = sushi_context.state_sync.get_environment("dev") + assert start_environment is not None + metadata = DuckDBMetadata.from_context(sushi_context) + start_schemas = set(metadata.schemas) + assert "sushi__dev" in start_schemas + sushi_context.invalidate_environment("dev") + invalidate_environment = sushi_context.state_sync.get_environment("dev") + assert invalidate_environment is not None + schemas_prior_to_janitor = set(metadata.schemas) + assert invalidate_environment.expiration_ts < start_environment.expiration_ts # type: ignore + assert start_schemas == schemas_prior_to_janitor + sushi_context._run_janitor() + schemas_after_janitor = set(metadata.schemas) + assert sushi_context.state_sync.get_environment("dev") is None + assert start_schemas - schemas_after_janitor == {"sushi__dev"} + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_evaluate_uncategorized_snapshot(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Add a new projection + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + # Downstream model references the new projection + downstream_model = context.get_model("sushi.top_waiters") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_model), literal=False)) + + df = context.evaluate( + "sushi.top_waiters", start="2023-01-05", end="2023-01-06", execution_time=now() + ) + assert set(df["one"].tolist()) == {1} diff --git a/tests/core/integration/test_change_scenarios.py b/tests/core/integration/test_change_scenarios.py new file mode 100644 index 0000000000..fb1762220f --- /dev/null +++ b/tests/core/integration/test_change_scenarios.py @@ -0,0 +1,1517 @@ +from __future__ import annotations + +import typing as t +import json +from datetime import timedelta +from unittest import mock +import pandas as pd # noqa: TID253 +import pytest +from pathlib import Path +from sqlmesh.core.model.common import ParsableSql +import time_machine +from sqlglot.expressions import DataType +import re + +from sqlmesh.cli.project_init import init_example_project +from sqlmesh.core import constants as c +from sqlmesh.core import dialect as d +from sqlmesh.core.config import ( + AutoCategorizationMode, + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + FullKind, + ModelKind, + ModelKindName, + SqlModel, + PythonModel, + ViewKind, + load_sql_based_model, +) +from sqlmesh.core.model.kind import model_kind_type_from_name +from sqlmesh.core.plan import Plan, SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import now, to_timestamp +from sqlmesh.utils.errors import ( + SQLMeshError, +) +from tests.core.integration.utils import ( + apply_to_environment, + add_projection_to_model, + initial_add, + change_data_type, + validate_apply_basics, + change_model_kind, + validate_model_kind_change, + validate_query_change, + validate_plan_changes, +) + +pytestmark = pytest.mark.slow + + +def test_auto_categorization(sushi_context: Context): + environment = "dev" + for config in sushi_context.configs.values(): + config.plan.auto_categorize_changes.sql = AutoCategorizationMode.FULL + initial_add(sushi_context, environment) + + version = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).version + fingerprint = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + + model = t.cast(SqlModel, sushi_context.get_model("sushi.customers", raise_if_missing=True)) + sushi_context.upsert_model( + "sushi.customers", + query_=ParsableSql(sql=model.query.select("'foo' AS foo").sql(dialect=model.dialect)), # type: ignore + ) + apply_to_environment(sushi_context, environment) + + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + != fingerprint + ) + assert ( + sushi_context.get_snapshot("sushi.waiter_as_customer_by_day", raise_if_missing=True).version + == version + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_breaking_only_impacts_immediate_children(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + breaking_model = context.get_model("sushi.orders") + breaking_model = breaking_model.copy(update={"stamp": "force new version"}) + context.upsert_model(breaking_model) + breaking_snapshot = context.get_snapshot(breaking_model, raise_if_missing=True) + + non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) + non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) + top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan_builder = context.plan_builder("dev", skip_tests=True, enable_preview=False) + plan_builder.set_choice(breaking_snapshot, SnapshotChangeCategory.BREAKING) + plan = plan_builder.build() + assert ( + plan.context_diff.snapshots[breaking_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert not any(i.snapshot_id == top_waiter_snapshot.snapshot_id for i in plan.missing_intervals) + + context.apply(plan) + assert ( + not context.plan_builder("dev", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.missing_intervals + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@pytest.mark.parametrize( + "context_fixture", + ["sushi_context", "sushi_dbt_context", "sushi_test_dbt_context", "sushi_no_default_catalog"], +) +def test_model_add(context_fixture: Context, request): + initial_add(request.getfixturevalue(context_fixture), "dev") + + +def test_model_removed(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + + top_waiters_snapshot_id = sushi_context.get_snapshot( + "sushi.top_waiters", raise_if_missing=True + ).snapshot_id + + sushi_context._models.pop('"memory"."sushi"."top_waiters"') + + def _validate_plan(context, plan): + validate_plan_changes(plan, removed=[top_waiters_snapshot_id]) + assert not plan.missing_intervals + + def _validate_apply(context): + assert not sushi_context.get_snapshot("sushi.top_waiters", raise_if_missing=False) + assert sushi_context.state_reader.get_snapshots([top_waiters_snapshot_id]) + env = sushi_context.state_reader.get_environment(environment) + assert env + assert all(snapshot.name != '"memory"."sushi"."top_waiters"' for snapshot in env.snapshots) + + apply_to_environment( + sushi_context, + environment, + SnapshotChangeCategory.BREAKING, + plan_validators=[_validate_plan], + apply_validators=[_validate_apply], + ) + + +def test_non_breaking_change(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + validate_query_change(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING, False) + + +def test_breaking_change(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + validate_query_change(sushi_context, environment, SnapshotChangeCategory.BREAKING, False) + + +def test_logical_change(sushi_context: Context): + environment = "dev" + initial_add(sushi_context, environment) + previous_sushi_items_version = sushi_context.get_snapshot( + "sushi.items", raise_if_missing=True + ).version + + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.FLOAT, + DataType.Type.DOUBLE, + ) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + assert ( + sushi_context.get_snapshot("sushi.items", raise_if_missing=True).version + == previous_sushi_items_version + ) + + +@pytest.mark.parametrize( + "from_, to", + [ + (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.FULL), + (ModelKindName.FULL, ModelKindName.INCREMENTAL_BY_TIME_RANGE), + ], +) +def test_model_kind_change(from_: ModelKindName, to: ModelKindName, sushi_context: Context): + environment = f"test_model_kind_change__{from_.value.lower()}__{to.value.lower()}" + incremental_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True).copy() + + if from_ != ModelKindName.INCREMENTAL_BY_TIME_RANGE: + change_model_kind(sushi_context, from_) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + if to == ModelKindName.INCREMENTAL_BY_TIME_RANGE: + sushi_context.upsert_model(incremental_snapshot.model) + else: + change_model_kind(sushi_context, to) + + logical = to in (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.EMBEDDED) + validate_model_kind_change(to, sushi_context, environment, logical=logical) + + +def test_environment_isolation(sushi_context: Context): + prod_snapshots = sushi_context.snapshots.values() + + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + directly_modified = ['"memory"."sushi"."items"'] + indirectly_modified = [ + '"memory"."sushi"."order_items"', + '"memory"."sushi"."waiter_revenue_by_day"', + '"memory"."sushi"."customer_revenue_by_day"', + '"memory"."sushi"."customer_revenue_lifetime"', + '"memory"."sushi"."top_waiters"', + "assert_item_price_above_zero", + ] + + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) + + # Verify prod unchanged + validate_apply_basics(sushi_context, "prod", prod_snapshots) + + def _validate_plan(context, plan): + validate_plan_changes(plan, modified=directly_modified + indirectly_modified) + assert not plan.missing_intervals + + apply_to_environment( + sushi_context, + "prod", + SnapshotChangeCategory.BREAKING, + plan_validators=[_validate_plan], + ) + + +def test_environment_promotion(sushi_context: Context): + initial_add(sushi_context, "dev") + + # Simulate prod "ahead" + change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) + apply_to_environment(sushi_context, "prod", SnapshotChangeCategory.BREAKING) + + # Simulate rebase + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) + + # Make changes in dev + change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DECIMAL) + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.NON_BREAKING) + + change_data_type(sushi_context, "sushi.top_waiters", DataType.Type.DOUBLE, DataType.Type.INT) + apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) + + change_data_type( + sushi_context, + "sushi.customer_revenue_by_day", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + apply_to_environment( + sushi_context, + "dev", + SnapshotChangeCategory.FORWARD_ONLY, + allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], + ) + + # Promote to prod + def _validate_plan(context, plan): + sushi_items_snapshot = context.get_snapshot("sushi.items", raise_if_missing=True) + sushi_top_waiters_snapshot = context.get_snapshot( + "sushi.top_waiters", raise_if_missing=True + ) + sushi_customer_revenue_by_day_snapshot = context.get_snapshot( + "sushi.customer_revenue_by_day", raise_if_missing=True + ) + + assert ( + plan.context_diff.modified_snapshots[sushi_items_snapshot.name][0].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.modified_snapshots[sushi_top_waiters_snapshot.name][0].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.modified_snapshots[sushi_customer_revenue_by_day_snapshot.name][ + 0 + ].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert plan.context_diff.snapshots[ + sushi_customer_revenue_by_day_snapshot.snapshot_id + ].is_forward_only + + apply_to_environment( + sushi_context, + "prod", + SnapshotChangeCategory.NON_BREAKING, + plan_validators=[_validate_plan], + allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], + ) + + +def test_no_override(sushi_context: Context) -> None: + change_data_type( + sushi_context, + "sushi.items", + DataType.Type.INT, + DataType.Type.BIGINT, + ) + + change_data_type( + sushi_context, + "sushi.order_items", + DataType.Type.INT, + DataType.Type.BIGINT, + ) + + plan_builder = sushi_context.plan_builder("prod") + plan = plan_builder.build() + + sushi_items_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) + sushi_order_items_snapshot = sushi_context.get_snapshot( + "sushi.order_items", raise_if_missing=True + ) + sushi_water_revenue_by_day_snapshot = sushi_context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + + items = plan.context_diff.snapshots[sushi_items_snapshot.snapshot_id] + order_items = plan.context_diff.snapshots[sushi_order_items_snapshot.snapshot_id] + waiter_revenue = plan.context_diff.snapshots[sushi_water_revenue_by_day_snapshot.snapshot_id] + + plan_builder.set_choice(items, SnapshotChangeCategory.BREAKING).set_choice( + order_items, SnapshotChangeCategory.NON_BREAKING + ) + plan_builder.build() + assert items.is_new_version + assert waiter_revenue.is_new_version + plan_builder.set_choice(items, SnapshotChangeCategory.NON_BREAKING) + plan_builder.build() + assert not waiter_revenue.is_new_version + + +@pytest.mark.parametrize( + "change_categories, expected", + [ + ([SnapshotChangeCategory.NON_BREAKING], SnapshotChangeCategory.BREAKING), + ([SnapshotChangeCategory.BREAKING], SnapshotChangeCategory.BREAKING), + ( + [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.NON_BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ( + [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ( + [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.NON_BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ( + [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.BREAKING], + SnapshotChangeCategory.BREAKING, + ), + ], +) +def test_revert( + sushi_context: Context, + change_categories: t.List[SnapshotChangeCategory], + expected: SnapshotChangeCategory, +): + environment = "prod" + original_snapshot_id = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) + + types = (DataType.Type.DOUBLE, DataType.Type.FLOAT, DataType.Type.DECIMAL) + assert len(change_categories) < len(types) + + for i, category in enumerate(change_categories): + change_data_type(sushi_context, "sushi.items", *types[i : i + 2]) + apply_to_environment(sushi_context, environment, category) + assert ( + sushi_context.get_snapshot("sushi.items", raise_if_missing=True) != original_snapshot_id + ) + + change_data_type(sushi_context, "sushi.items", types[len(change_categories)], types[0]) + + def _validate_plan(_, plan): + snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') + assert snapshot.change_category == expected + assert not plan.missing_intervals + + apply_to_environment( + sushi_context, + environment, + change_categories[-1], + plan_validators=[_validate_plan], + ) + assert sushi_context.get_snapshot("sushi.items", raise_if_missing=True) == original_snapshot_id + + +def test_revert_after_downstream_change(sushi_context: Context): + environment = "prod" + change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.BREAKING) + + change_data_type( + sushi_context, + "sushi.waiter_revenue_by_day", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) + + change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DOUBLE) + + def _validate_plan(_, plan): + snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') + assert snapshot.change_category == SnapshotChangeCategory.BREAKING + assert plan.missing_intervals + + apply_to_environment( + sushi_context, + environment, + SnapshotChangeCategory.BREAKING, + plan_validators=[_validate_plan], + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_change_after_forward_only_in_dev(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + # Make sure that the most downstream model is a materialized model. + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + # Make sushi.orders a forward-only model. + model = context.get_model("sushi.orders") + updated_model_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"stamp": "force new version", "kind": updated_model_kind}) + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert not plan.requires_backfill + context.apply(plan) + + # Make a non-breaking change to a model. + model = context.get_model("sushi.top_waiters") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the non-breaking changes. + context.apply(plan) + + # Make a non-breaking change upstream from the previously modified model. + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the upstream non-breaking changes. + context.apply(plan) + assert not context.plan_builder("dev", skip_tests=True).build().requires_backfill + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@pytest.mark.parametrize("forward_only", [False, True]) +def test_plan_repairs_unrenderable_snapshot_state( + init_and_plan_context: t.Callable, forward_only: bool +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + target_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + assert target_snapshot + + # Manually corrupt the snapshot's query + raw_snapshot = context.state_sync.state_sync.engine_adapter.fetchone( + f"SELECT snapshot FROM sqlmesh._snapshots WHERE name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'" + )[0] # type: ignore + parsed_snapshot = json.loads(raw_snapshot) + parsed_snapshot["node"]["query"] = "SELECT @missing_macro()" + context.state_sync.state_sync.engine_adapter.update_table( + "sqlmesh._snapshots", + {"snapshot": json.dumps(parsed_snapshot)}, + f"name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'", + ) + + context.clear_caches() + target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ + target_snapshot.snapshot_id + ] + + with pytest.raises(Exception): + target_snapshot_in_state.model.render_query_or_raise() + + # Repair the snapshot by creating a new version of it + context.upsert_model(target_snapshot.model.name, stamp="repair") + target_snapshot = context.get_snapshot(target_snapshot.name) + + plan_builder = context.plan_builder("prod", forward_only=forward_only) + plan = plan_builder.build() + if not forward_only: + assert target_snapshot.snapshot_id in {i.snapshot_id for i in plan.missing_intervals} + assert plan.directly_modified == {target_snapshot.snapshot_id} + plan_builder.set_choice(target_snapshot, SnapshotChangeCategory.NON_BREAKING) + plan = plan_builder.build() + + context.apply(plan) + + context.clear_caches() + assert context.get_snapshot(target_snapshot.name).model.render_query_or_raise() + target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ + target_snapshot.snapshot_id + ] + assert target_snapshot_in_state.model.render_query_or_raise() + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_no_backfill_for_model_downstream_of_metadata_change(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # Make sushi.waiter_revenue_by_day a forward-only model. + forward_only_model = context.get_model("sushi.waiter_revenue_by_day") + updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) + forward_only_model = forward_only_model.copy(update={"kind": updated_model_kind}) + context.upsert_model(forward_only_model) + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Make a metadata change upstream of the forward-only model. + context.upsert_model("sushi.orders", owner="new_owner") + + plan = context.plan_builder("test_dev").build() + assert plan.has_changes + assert not plan.directly_modified + assert not plan.indirectly_modified + assert not plan.missing_intervals + assert all( + snapshot.change_category == SnapshotChangeCategory.METADATA + for snapshot in plan.new_snapshots + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_set_choice_is_reflected_in_missing_intervals(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan_builder = context.plan_builder("dev", skip_tests=True) + plan = plan_builder.build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Change the category to BREAKING + plan = plan_builder.set_choice( + plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.BREAKING + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_BREAKING + ) + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Change the category back to NON_BREAKING + plan = plan_builder.set_choice( + plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.NON_BREAKING + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [ + pd.to_datetime(x) + for x in [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + ] + ] + + # Promote changes to prod + prod_plan = context.plan_builder(skip_tests=True).build() + assert not prod_plan.missing_intervals + + context.apply(prod_plan) + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert prod_df["event_date"].tolist() == [ + pd.to_datetime(x) + for x in [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + ] + ] + + +def test_plan_production_environment_statements(tmp_path: Path): + model_a = """ + MODEL ( + name test_schema.a, + kind FULL, + ); + + @IF( + @runtime_stage IN ('evaluating', 'creating'), + INSERT INTO schema_names_for_prod (physical_schema_name) VALUES (@resolve_template('@{schema_name}')) + ); + + SELECT 1 AS account_id + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + before_all = [ + "CREATE TABLE IF NOT EXISTS schema_names_for_@this_env (physical_schema_name VARCHAR)", + "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS should_create AS SELECT @runtime_stage)", + ] + after_all = [ + "@IF(@this_env = 'prod', CREATE TABLE IF NOT EXISTS after_t AS SELECT @var_5)", + "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS not_create AS SELECT @runtime_stage)", + ] + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=before_all, + after_all=after_all, + variables={"var_5": 5}, + ) + ctx = Context(paths=[tmp_path], config=config) + ctx.plan(auto_apply=True, no_prompts=True) + + before_t = ctx.fetchdf("select * from schema_names_for_prod").to_dict() + assert before_t["physical_schema_name"][0] == "sqlmesh__test_schema" + + after_t = ctx.fetchdf("select * from after_t").to_dict() + assert after_t["5"][0] == 5 + + environment_statements = ctx.state_reader.get_environment_statements(c.PROD) + assert environment_statements[0].before_all == before_all + assert environment_statements[0].after_all == after_all + assert environment_statements[0].python_env.keys() == {"__sqlmesh__vars__"} + assert environment_statements[0].python_env["__sqlmesh__vars__"].payload == "{'var_5': 5}" + + should_create = ctx.fetchdf("select * from should_create").to_dict() + assert should_create["before_all"][0] == "before_all" + + with pytest.raises( + Exception, match=r"Catalog Error: Table with name not_create does not exist!" + ): + ctx.fetchdf("select * from not_create") + + +def test_environment_statements_error_handling(tmp_path: Path): + model_a = """ + MODEL ( + name test_schema.a, + kind FULL, + ); + + SELECT 1 AS account_id + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + before_all = [ + "CREATE TABLE identical_table (physical_schema_name VARCHAR)", + "CREATE TABLE identical_table (physical_schema_name VARCHAR)", + ] + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=before_all, + ) + ctx = Context(paths=[tmp_path], config=config) + + expected_error_message = re.escape( + """An error occurred during execution of the following 'before_all' statement: + +CREATE TABLE identical_table (physical_schema_name TEXT) + +Catalog Error: Table with name "identical_table" already exists!""" + ) + + with pytest.raises(SQLMeshError, match=expected_error_message): + ctx.plan(auto_apply=True, no_prompts=True) + + after_all = [ + "@bad_macro()", + ] + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + after_all=after_all, + ) + ctx = Context(paths=[tmp_path], config=config) + + expected_error_message = re.escape( + """An error occurred during rendering of the 'after_all' statements: + +Failed to resolve macros for + +@bad_macro() + +Macro 'bad_macro' does not exist.""" + ) + + with pytest.raises(SQLMeshError, match=expected_error_message): + ctx.plan(auto_apply=True, no_prompts=True) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_full_model_change_with_plan_start_not_matching_model_start( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.top_waiters") + context.upsert_model(model, kind=model_kind_type_from_name("FULL")()) # type: ignore + + # Apply the change with --skip-backfill first and no plan start + context.plan("dev", skip_tests=True, skip_backfill=True, no_prompts=True, auto_apply=True) + + # Apply the plan again but this time don't skip backfill and set start + # to be later than the model start + context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, start="1 day ago") + + # Check that the number of rows is not 0 + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.top_waiters")[0] + assert row_num > 0 + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_hourly_model_with_lookback_no_backfill_in_dev(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = SqlModel.parse_obj( + { + **model.dict(), + "kind": model.kind.copy(update={"lookback": 1}), + "cron": "@hourly", + "audits": [], + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + top_waiters_model = context.get_model("sushi.top_waiters") + top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) + context.upsert_model(top_waiters_model) + + context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + with time_machine.travel(now() + timedelta(hours=2)): + plan = context.plan_builder("dev", skip_tests=True).build() + # Make sure the waiter_revenue_by_day model is not backfilled. + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_max_interval_end_per_model_not_applied_when_end_is_provided( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + context.run() + + plan = context.plan_builder( + restate_models=["*"], start="2023-01-09", end="2023-01-09" + ).build() + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_against_expired_environment(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + modified_models = {model.fqn, context.get_model("sushi.top_waiters").fqn} + + plan = context.plan_builder("dev").build() + assert plan.has_changes + assert set(plan.context_diff.modified_snapshots) == modified_models + assert plan.missing_intervals + context.apply(plan) + + # Make sure there are no changes when comparing against the existing environment. + plan = context.plan_builder("dev").build() + assert not plan.has_changes + assert not plan.context_diff.modified_snapshots + assert not plan.missing_intervals + + # Invalidate the environment and make sure that the plan detects the changes. + context.invalidate_environment("dev") + plan = context.plan_builder("dev").build() + assert plan.has_changes + assert set(plan.context_diff.modified_snapshots) == modified_models + assert not plan.missing_intervals + context.apply(plan) + + +def test_plan_environment_statements_doesnt_cause_extra_diff(tmp_path: Path): + model_a = """ + MODEL ( + name test_schema.a, + kind FULL, + ); + + SELECT 1; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + (models_dir / "a.sql").write_text(model_a) + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=["select 1 as before_all"], + after_all=["select 2 as after_all"], + ) + ctx = Context(paths=[tmp_path], config=config) + + # first plan - should apply changes + assert ctx.plan(auto_apply=True, no_prompts=True).has_changes + + # second plan - nothing has changed so should report no changes + assert not ctx.plan(auto_apply=True, no_prompts=True).has_changes + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_snapshot_table_exists_for_promoted_snapshot(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) + + # Drop the views and make sure SQLMesh recreates them later + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + context.engine_adapter.drop_view(top_waiters_snapshot.table_name()) + context.engine_adapter.drop_view(top_waiters_snapshot.table_name(False)) + + # Make the environment unfinalized to force recreation of all views in the virtual layer + context.state_sync.state_sync.engine_adapter.execute( + "UPDATE sqlmesh._environments SET finalized_ts = NULL WHERE name = 'dev'" + ) + + context.plan( + "prod", + restate_models=["sushi.top_waiters"], + auto_apply=True, + no_prompts=True, + skip_tests=True, + ) + assert context.engine_adapter.table_exists(top_waiters_snapshot.table_name()) + + +def test_plan_twice_with_star_macro_yields_no_diff(tmp_path: Path): + init_example_project(tmp_path, engine_type="duckdb") + + star_model_definition = """ + MODEL ( + name sqlmesh_example.star_model, + kind FULL + ); + + SELECT @STAR(sqlmesh_example.full_model) FROM sqlmesh_example.full_model + """ + + star_model_path = tmp_path / "models" / "star_model.sql" + star_model_path.write_text(star_model_definition) + + db_path = str(tmp_path / "db.db") + config = Config( + gateways={"main": GatewayConfig(connection=DuckDBConnectionConfig(database=db_path))}, + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + ) + context = Context(paths=tmp_path, config=config) + context.plan(auto_apply=True, no_prompts=True) + + # Instantiate new context to remove caches etc + new_context = Context(paths=tmp_path, config=config) + + star_model = new_context.get_model("sqlmesh_example.star_model") + assert ( + star_model.render_query_or_raise().sql() + == 'SELECT CAST("full_model"."item_id" AS INT) AS "item_id", CAST("full_model"."num_orders" AS BIGINT) AS "num_orders" FROM "db"."sqlmesh_example"."full_model" AS "full_model"' + ) + + new_plan = new_context.plan_builder().build() + assert not new_plan.has_changes + assert not new_plan.new_snapshots + + +class OldPythonModel(PythonModel): + kind: ModelKind = ViewKind() + + +def test_python_model_default_kind_change(init_and_plan_context: t.Callable): + """ + Around 2024-07-17 Python models had their default Kind changed from VIEW to FULL in order to + avoid some edge cases where the views might not get updated in certain situations. + + This test ensures that if a user had a Python `kind: VIEW` model stored in state, + it can still be loaded without error and just show as a breaking change from `kind: VIEW` + to `kind: FULL` + """ + + # note: we deliberately dont specify a Kind here to allow the defaults to be picked up + python_model_file = """import typing as t +import pandas as pd # noqa: TID253 +from sqlmesh import ExecutionContext, model + +@model( + "sushi.python_view_model", + columns={ + "id": "int", + } +) +def execute( + context: ExecutionContext, + **kwargs: t.Any, +) -> pd.DataFrame: + return pd.DataFrame([ + {"id": 1} + ]) +""" + + context: Context + context, _ = init_and_plan_context("examples/sushi") + + with open(context.path / "models" / "python_view_model.py", mode="w", encoding="utf8") as f: + f.write(python_model_file) + + # monkey-patch PythonModel to default to kind: View again + # and ViewKind to allow python models again + with ( + mock.patch.object(ViewKind, "supports_python_models", return_value=True), + mock.patch("sqlmesh.core.model.definition.PythonModel", OldPythonModel), + ): + context.load() + + # check the monkey-patching worked + model = context.get_model("sushi.python_view_model") + assert model.kind.name == ModelKindName.VIEW + assert model.source_type == "python" + + # apply plan + plan: Plan = context.plan(auto_apply=True) + + # check that run() still works even though we have a Python model with kind: View in the state + snapshot_ids = [s for s in plan.directly_modified if "python_view_model" in s.name] + snapshot_from_state = list(context.state_sync.get_snapshots(snapshot_ids).values())[0] + assert snapshot_from_state.model.kind.name == ModelKindName.VIEW + assert snapshot_from_state.model.source_type == "python" + context.run() + + # reload context to load model with new defaults + # this also shows the earlier monkey-patching is no longer in effect + context.load() + model = context.get_model("sushi.python_view_model") + assert model.kind.name == ModelKindName.FULL + assert model.source_type == "python" + + plan = context.plan( + categorizer_config=CategorizerConfig.all_full() + ) # the default categorizer_config doesnt auto-categorize python models + + assert plan.has_changes + assert not plan.indirectly_modified + + assert len(plan.directly_modified) == 1 + snapshot_id = list(plan.directly_modified)[0] + assert snapshot_id.name == '"memory"."sushi"."python_view_model"' + assert plan.modified_snapshots[snapshot_id].change_category == SnapshotChangeCategory.BREAKING + + context.apply(plan) + + df = context.engine_adapter.fetchdf("SELECT id FROM sushi.python_view_model") + assert df["id"].to_list() == [1] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@pytest.mark.parametrize( + "parent_a_category,parent_b_category,expected_child_category", + [ + ( + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.INDIRECT_BREAKING, + ), + ( + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.INDIRECT_NON_BREAKING, + ), + ( + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.INDIRECT_NON_BREAKING, + ), + ( + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.INDIRECT_BREAKING, + ), + ( + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + ), + ( + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + ), + ( + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.BREAKING, + SnapshotChangeCategory.INDIRECT_BREAKING, + ), + ( + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.NON_BREAKING, + SnapshotChangeCategory.INDIRECT_NON_BREAKING, + ), + ( + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + SnapshotChangeCategory.METADATA, + ), + ], +) +def test_rebase_two_changed_parents( + init_and_plan_context: t.Callable, + parent_a_category: SnapshotChangeCategory, # This change is deployed to prod first + parent_b_category: SnapshotChangeCategory, # This change is deployed to prod second + expected_child_category: SnapshotChangeCategory, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + initial_model_a = context.get_model("sushi.orders") + initial_model_b = context.get_model("sushi.items") + + # Make change A and deploy it to dev_a + context.upsert_model(initial_model_a.name, stamp="1") + plan_builder = context.plan_builder("dev_a", skip_tests=True) + plan_builder.set_choice(context.get_snapshot(initial_model_a.name), parent_a_category) + context.apply(plan_builder.build()) + + # Make change B and deploy it to dev_b + context.upsert_model(initial_model_a) + context.upsert_model(initial_model_b.name, stamp="1") + plan_builder = context.plan_builder("dev_b", skip_tests=True) + plan_builder.set_choice(context.get_snapshot(initial_model_b.name), parent_b_category) + context.apply(plan_builder.build()) + + # Deploy change A to prod + context.upsert_model(initial_model_a.name, stamp="1") + context.upsert_model(initial_model_b) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Apply change B in addition to A and plan against prod + context.upsert_model(initial_model_b.name, stamp="1") + plan = context.plan_builder("prod", skip_tests=True).build() + + # Validate the category of child snapshots + direct_child_snapshot = plan.snapshots[context.get_snapshot("sushi.order_items").snapshot_id] + assert direct_child_snapshot.change_category == expected_child_category + + indirect_child_snapshot = plan.snapshots[context.get_snapshot("sushi.top_waiters").snapshot_id] + assert indirect_child_snapshot.change_category == expected_child_category + + +@pytest.mark.parametrize( + "context_fixture", + ["sushi_context", "sushi_no_default_catalog"], +) +def test_unaligned_start_snapshots(context_fixture: Context, request): + context = request.getfixturevalue(context_fixture) + environment = "dev" + apply_to_environment(context, environment) + # Make breaking change to model upstream of a depends_on_self model + context.upsert_model("sushi.order_items", stamp="1") + # Apply the change starting at a date later then the beginning of the downstream depends_on_self model + plan = apply_to_environment( + context, + environment, + choice=SnapshotChangeCategory.BREAKING, + plan_start="2 days ago", + enable_preview=True, + ) + revenue_lifetime_snapshot = context.get_snapshot( + "sushi.customer_revenue_lifetime", raise_if_missing=True + ) + # Validate that the depends_on_self model is non-deployable + assert not plan.deployability_index.is_deployable(revenue_lifetime_snapshot) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_unaligned_start_snapshot_with_non_deployable_downstream(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + downstream_model_name = "memory.sushi.customer_max_revenue" + + expressions = d.parse( + f""" + MODEL ( + name {downstream_model_name}, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key customer_id, + forward_only true, + ), + ); + + SELECT + customer_id, MAX(revenue) AS max_revenue + FROM memory.sushi.customer_revenue_lifetime + GROUP BY 1; + """ + ) + + downstream_model = load_sql_based_model(expressions) + assert downstream_model.forward_only + context.upsert_model(downstream_model) + + context.plan(auto_apply=True, no_prompts=True) + + customer_revenue_lifetime_model = context.get_model("sushi.customer_revenue_lifetime") + kwargs = { + **customer_revenue_lifetime_model.dict(), + "name": "memory.sushi.customer_revenue_lifetime_new", + "kind": dict( + name="INCREMENTAL_UNMANAGED" + ), # Make it incremental unmanaged to ensure the depends_on_past behavior. + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + context.upsert_model( + downstream_model_name, + query_=ParsableSql( + sql="SELECT customer_id, MAX(revenue) AS max_revenue FROM memory.sushi.customer_revenue_lifetime_new GROUP BY 1" + ), + ) + + plan = context.plan_builder("dev", enable_preview=True).build() + assert {s.name for s in plan.new_snapshots} == { + '"memory"."sushi"."customer_revenue_lifetime_new"', + '"memory"."sushi"."customer_max_revenue"', + } + for snapshot_interval in plan.missing_intervals: + assert not plan.deployability_index.is_deployable(snapshot_interval.snapshot_id) + assert snapshot_interval.intervals[0][0] == to_timestamp("2023-01-07") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_view_is_updated_with_new_table_references( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Add a new projection to the base model + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Run the janitor to delete the old snapshot record + context.run_janitor(ignore_ttl=True) + + # Check the downstream view and make sure it's still queryable + assert context.get_model("sushi.top_waiters").kind.is_view + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi.top_waiters")[0] + assert row_num > 0 + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_annotated_self_referential_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # Projections are fully annotated in the query but columns were not specified explicitly + expressions = d.parse( + f""" + MODEL ( + name memory.sushi.test_self_ref, + kind FULL, + start '2023-01-01', + ); + + SELECT 1::INT AS one FROM memory.sushi.test_self_ref; + """ + ) + model = load_sql_based_model(expressions) + assert model.depends_on_self + context.upsert_model(model) + + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) + + df = context.fetchdf("SELECT one FROM memory.sushi.test_self_ref") + assert len(df) == 0 + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_creating_stage_for_first_batch_only(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + expressions = d.parse( + """ + MODEL ( + name memory.sushi.test_batch_size, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key one, + batch_size 1, + ), + + start '2023-01-01', + ); + + CREATE SCHEMA IF NOT EXISTS test_schema; + CREATE TABLE IF NOT EXISTS test_schema.creating_counter (a INT); + + SELECT 1::INT AS one; + + @IF(@runtime_stage = 'creating', INSERT INTO test_schema.creating_counter (a) VALUES (1)); + """ + ) + model = load_sql_based_model(expressions) + context.upsert_model(model) + + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) + assert ( + context.engine_adapter.fetchone("SELECT COUNT(*) FROM test_schema.creating_counter")[0] == 1 + ) diff --git a/tests/core/integration/test_config.py b/tests/core/integration/test_config.py new file mode 100644 index 0000000000..5d571cd7c5 --- /dev/null +++ b/tests/core/integration/test_config.py @@ -0,0 +1,580 @@ +from __future__ import annotations + +import typing as t +from unittest.mock import patch +import logging +import pytest +from pytest import MonkeyPatch +from pathlib import Path +from pytest_mock.plugin import MockerFixture +from sqlglot import exp +from IPython.utils.capture import capture_output + +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, + TableNamingConvention, + AutoCategorizationMode, +) +from sqlmesh.core.config.common import EnvironmentSuffixTarget +from sqlmesh.core.context import Context +from sqlmesh.core.config.plan import PlanConfig +from sqlmesh.core.engine_adapter import DuckDBEngineAdapter +from sqlmesh.core.model import SqlModel +from sqlmesh.core.model.common import ParsableSql +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.errors import ( + ConfigError, +) +from tests.conftest import DuckDBMetadata +from tests.utils.test_helpers import use_terminal_console +from tests.utils.test_filesystem import create_temp_file +from tests.core.integration.utils import apply_to_environment, initial_add + +pytestmark = pytest.mark.slow + + +@pytest.mark.set_default_connection(disable=True) +def test_missing_connection_config(): + # This is testing the actual implementation of Config.get_connection + # To make writing tests easier, it's patched by the autouse fixture provide_sqlmesh_default_connection + # Case 1: No default_connection or gateways specified should raise a ConfigError + with pytest.raises(ConfigError): + ctx = Context(config=Config()) + + # Case 2: No connection specified in the gateway should raise a ConfigError + with pytest.raises(ConfigError): + ctx = Context(config=Config(gateways={"incorrect": GatewayConfig()})) + + # Case 3: Specifying a default_connection or connection in the gateway should work + ctx = Context(config=Config(default_connection=DuckDBConnectionConfig())) + ctx = Context( + config=Config(gateways={"default": GatewayConfig(connection=DuckDBConnectionConfig())}) + ) + + +def test_physical_table_naming_strategy_table_only(copy_to_temp_path: t.Callable): + sushi_context = Context( + paths=copy_to_temp_path("examples/sushi"), + config="table_only_naming_config", + ) + + assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.TABLE_ONLY + sushi_context.plan(auto_apply=True) + + adapter = sushi_context.engine_adapter + + snapshot_tables = [ + dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) + for r in adapter.fetchall( + "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" + ) + ] + + assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) + + prod_env = sushi_context.state_reader.get_environment("prod") + assert prod_env + + prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) + + assert all( + s.table_naming_convention == TableNamingConvention.TABLE_ONLY + for s in prod_env_snapshots.values() + ) + + +def test_physical_table_naming_strategy_hash_md5(copy_to_temp_path: t.Callable): + sushi_context = Context( + paths=copy_to_temp_path("examples/sushi"), + config="hash_md5_naming_config", + ) + + assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.HASH_MD5 + sushi_context.plan(auto_apply=True) + + adapter = sushi_context.engine_adapter + + snapshot_tables = [ + dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) + for r in adapter.fetchall( + "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" + ) + ] + + assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) + assert all([t["table"].startswith("sqlmesh_md5") for t in snapshot_tables]) + + prod_env = sushi_context.state_reader.get_environment("prod") + assert prod_env + + prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) + + assert all( + s.table_naming_convention == TableNamingConvention.HASH_MD5 + for s in prod_env_snapshots.values() + ) + + +def test_environment_suffix_target_table(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context( + "examples/sushi", config="environment_suffix_table_config" + ) + context.apply(plan) + metadata = DuckDBMetadata.from_context(context) + environments_schemas = {"sushi"} + internal_schemas = {"sqlmesh", "sqlmesh__sushi"} + starting_schemas = environments_schemas | internal_schemas + # Make sure no new schemas are created + assert set(metadata.schemas) - starting_schemas == {"raw"} + prod_views = {x for x in metadata.qualified_views if x.db in environments_schemas} + # Make sure that all models are present + assert len(prod_views) == 16 + apply_to_environment(context, "dev") + # Make sure no new schemas are created + assert set(metadata.schemas) - starting_schemas == {"raw"} + dev_views = { + x for x in metadata.qualified_views if x.db in environments_schemas and "__dev" in x.name + } + # Make sure that there is a view with `__dev` for each view that exists in prod + assert len(dev_views) == len(prod_views) + assert {x.name.replace("__dev", "") for x in dev_views} - {x.name for x in prod_views} == set() + context.invalidate_environment("dev") + context._run_janitor() + views_after_janitor = metadata.qualified_views + # Make sure that the number of views after the janitor is the same as when you subtract away dev views + assert len(views_after_janitor) == len( + {x.sql(dialect="duckdb") for x in views_after_janitor} + - {x.sql(dialect="duckdb") for x in dev_views} + ) + # Double check there are no dev views + assert len({x for x in views_after_janitor if "__dev" in x.name}) == 0 + # Make sure prod views were not removed + assert {x.sql(dialect="duckdb") for x in prod_views} - { + x.sql(dialect="duckdb") for x in views_after_janitor + } == set() + + +def test_environment_suffix_target_catalog(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: + monkeypatch.chdir(tmp_path) + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(catalogs={"main_warehouse": ":memory:"}), + environment_suffix_target=EnvironmentSuffixTarget.CATALOG, + ) + + assert config.default_connection + + models_dir = tmp_path / "models" + models_dir.mkdir() + + (models_dir / "model.sql").write_text(""" + MODEL ( + name example_schema.test_model, + kind FULL + ); + + SELECT '1' as a""") + + (models_dir / "fqn_model.sql").write_text(""" + MODEL ( + name memory.example_fqn_schema.test_model_fqn, + kind FULL + ); + + SELECT '1' as a""") + + ctx = Context(config=config, paths=tmp_path) + + metadata = DuckDBMetadata.from_context(ctx) + assert ctx.default_catalog == "main_warehouse" + assert metadata.catalogs == {"main_warehouse", "memory"} + + ctx.plan(auto_apply=True) + + # prod should go to the default catalog and not be overridden to a catalog called 'prod' + assert ( + ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore + == "1" + ) + assert ( + ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore + == "1" + ) + assert metadata.catalogs == {"main_warehouse", "memory"} + assert metadata.schemas_in_catalog("main_warehouse") == [ + "example_schema", + "sqlmesh__example_schema", + ] + assert metadata.schemas_in_catalog("memory") == [ + "example_fqn_schema", + "sqlmesh__example_fqn_schema", + ] + + # dev should be overridden to go to a catalogs called 'main_warehouse__dev' and 'memory__dev' + ctx.plan(environment="dev", include_unmodified=True, auto_apply=True) + assert ( + ctx.engine_adapter.fetchone("select * from main_warehouse__dev.example_schema.test_model")[ + 0 + ] # type: ignore + == "1" + ) + assert ( + ctx.engine_adapter.fetchone("select * from memory__dev.example_fqn_schema.test_model_fqn")[ + 0 + ] # type: ignore + == "1" + ) + assert metadata.catalogs == {"main_warehouse", "main_warehouse__dev", "memory", "memory__dev"} + + # schemas in dev envs should match prod and not have a suffix + assert metadata.schemas_in_catalog("main_warehouse") == [ + "example_schema", + "sqlmesh__example_schema", + ] + assert metadata.schemas_in_catalog("main_warehouse__dev") == ["example_schema"] + assert metadata.schemas_in_catalog("memory") == [ + "example_fqn_schema", + "sqlmesh__example_fqn_schema", + ] + assert metadata.schemas_in_catalog("memory__dev") == ["example_fqn_schema"] + + ctx.invalidate_environment("dev", sync=True) + + # dev catalogs cleaned up + assert metadata.catalogs == {"main_warehouse", "memory"} + + # prod catalogs still contain physical layer and views still work + assert metadata.schemas_in_catalog("main_warehouse") == [ + "example_schema", + "sqlmesh__example_schema", + ] + assert metadata.schemas_in_catalog("memory") == [ + "example_fqn_schema", + "sqlmesh__example_fqn_schema", + ] + + assert ( + ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore + == "1" + ) + assert ( + ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore + == "1" + ) + + +def test_environment_catalog_mapping(init_and_plan_context: t.Callable): + environments_schemas = {"raw", "sushi"} + + def get_prod_dev_views(metadata: DuckDBMetadata) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: + views = metadata.qualified_views + prod_views = { + x for x in views if x.catalog == "prod_catalog" if x.db in environments_schemas + } + dev_views = {x for x in views if x.catalog == "dev_catalog" if x.db in environments_schemas} + return prod_views, dev_views + + def get_default_catalog_and_non_tables( + metadata: DuckDBMetadata, default_catalog: t.Optional[str] + ) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: + tables = metadata.qualified_tables + user_default_tables = { + x for x in tables if x.catalog == default_catalog and x.db != "sqlmesh" + } + non_default_tables = {x for x in tables if x.catalog != default_catalog} + return user_default_tables, non_default_tables + + context, plan = init_and_plan_context( + "examples/sushi", config="environment_catalog_mapping_config" + ) + context.apply(plan) + metadata = DuckDBMetadata(context.engine_adapter) + state_metadata = DuckDBMetadata.from_context(context.state_sync.state_sync) + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 0 + assert len(user_default_tables) == 15 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + apply_to_environment(context, "dev") + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 16 + assert len(user_default_tables) == 16 + assert len(non_default_tables) == 0 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + apply_to_environment(context, "prodnot") + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 32 + assert len(user_default_tables) == 16 + assert len(non_default_tables) == 0 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + context.invalidate_environment("dev") + context._run_janitor() + prod_views, dev_views = get_prod_dev_views(metadata) + ( + user_default_tables, + non_default_tables, + ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) + assert len(prod_views) == 16 + assert len(dev_views) == 16 + assert len(user_default_tables) == 16 + assert len(non_default_tables) == 0 + assert state_metadata.schemas == ["sqlmesh"] + assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( + { + "physical.sqlmesh._environments", + "physical.sqlmesh._intervals", + "physical.sqlmesh._snapshots", + "physical.sqlmesh._versions", + } + ) + + +@use_terminal_console +def test_plan_always_recreate_environment(tmp_path: Path): + def plan_with_output(ctx: Context, environment: str): + with patch.object(logger, "info") as mock_logger: + with capture_output() as output: + ctx.load() + ctx.plan(environment, no_prompts=True, auto_apply=True) + + # Facade logs info "Promoting environment {environment}" + assert mock_logger.call_args[0][1] == environment + + return output + + models_dir = tmp_path / "models" + + logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") + + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" + ) + + config = Config(plan=PlanConfig(always_recreate_environment=True)) + ctx = Context(paths=[tmp_path], config=config) + + # Case 1: Neither prod nor dev exists, so dev is initialized + output = plan_with_output(ctx, "dev") + + assert """`dev` environment will be initialized""" in output.stdout + + # Case 2: Prod does not exist, so dev is updated + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 5 AS col" + ) + + output = plan_with_output(ctx, "dev") + assert "`dev` environment will be initialized" in output.stdout + + # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod + output = plan_with_output(ctx, "prod") + assert "`prod` environment will be initialized" in output.stdout + + # Case 4: Dev is updated with a breaking change. Prod exists now so plan comparisons moving forward should be against prod + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout + assert "Differences from the `prod` environment" in output.stdout + + # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes + # so it's still classified as a breaking change + create_temp_file( + tmp_path, + models_dir / "a.sql", + "MODEL (name test.a, kind FULL, owner 'test'); SELECT 10 AS col", + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout + assert "Differences from the `prod` environment" in output.stdout + + stdout_rstrip = "\n".join([line.rstrip() for line in output.stdout.split("\n")]) + assert ( + """MODEL ( + name test.a, ++ owner test, + kind FULL + ) + SELECT +- 5 AS col ++ 10 AS col""" + in stdout_rstrip + ) + + # Case 6: Ensure that target environment and create_from environment are not the same + output = plan_with_output(ctx, "prod") + assert not "New environment `prod` will be created from `prod`" in output.stdout + + # Case 7: Check that we can still run Context::diff() against any environment + for environment in ["dev", "prod"]: + context_diff = ctx._context_diff(environment) + assert context_diff.environment == environment + + +def test_before_all_after_all_execution_order(tmp_path: Path, mocker: MockerFixture): + model = """ + MODEL ( + name test_schema.model_that_depends_on_before_all, + kind FULL, + ); + + SELECT id, value FROM before_all_created_table + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "model.sql", "w") as f: + f.write(model) + + # before_all statement that creates a table that the above model depends on + before_all_statement = ( + "CREATE TABLE IF NOT EXISTS before_all_created_table AS SELECT 1 AS id, 'test' AS value" + ) + + # after_all that depends on the model + after_all_statement = "CREATE TABLE IF NOT EXISTS after_all_created_table AS SELECT id, value FROM test_schema.model_that_depends_on_before_all" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + before_all=[before_all_statement], + after_all=[after_all_statement], + ) + + execute_calls: t.List[str] = [] + + original_duckdb_execute = DuckDBEngineAdapter.execute + + def track_duckdb_execute(self, expression, **kwargs): + sql = expression if isinstance(expression, str) else expression.sql(dialect="duckdb") + state_tables = [ + "_snapshots", + "_environments", + "_versions", + "_intervals", + "_auto_restatements", + "_environment_statements", + ] + + # to ignore the state queries + if not any(table in sql.lower() for table in state_tables): + execute_calls.append(sql) + + return original_duckdb_execute(self, expression, **kwargs) + + ctx = Context(paths=[tmp_path], config=config) + + # the plan would fail if the execution order ever changes and before_all statements dont execute first + ctx.plan(auto_apply=True, no_prompts=True) + + mocker.patch.object(DuckDBEngineAdapter, "execute", track_duckdb_execute) + + # run with the patched execute + ctx.run("prod", start="2023-01-01", end="2023-01-02") + + # validate explicitly that the first execute is for the before_all + assert "before_all_created_table" in execute_calls[0] + + # and that the last is the sole after all that depends on the model + assert "after_all_created_table" in execute_calls[-1] + + +def test_auto_categorization(sushi_context: Context): + environment = "dev" + for config in sushi_context.configs.values(): + config.plan.auto_categorize_changes.sql = AutoCategorizationMode.FULL + initial_add(sushi_context, environment) + + version = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).version + fingerprint = sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + + model = t.cast(SqlModel, sushi_context.get_model("sushi.customers", raise_if_missing=True)) + sushi_context.upsert_model( + "sushi.customers", + query_=ParsableSql(sql=model.query.select("'foo' AS foo").sql(dialect=model.dialect)), # type: ignore + ) + apply_to_environment(sushi_context, environment) + + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + sushi_context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ).fingerprint + != fingerprint + ) + assert ( + sushi_context.get_snapshot("sushi.waiter_as_customer_by_day", raise_if_missing=True).version + == version + ) diff --git a/tests/core/integration/test_cron.py b/tests/core/integration/test_cron.py new file mode 100644 index 0000000000..fa327ac36f --- /dev/null +++ b/tests/core/integration/test_cron.py @@ -0,0 +1,247 @@ +from __future__ import annotations + +import typing as t +import pytest +import time_machine + +from sqlmesh.core import dialect as d +from sqlmesh.core.model import ( + SqlModel, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.utils.date import to_timestamp +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +@pytest.mark.parametrize( + "forward_only, expected_intervals", + [ + ( + False, + [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + ], + ), + ( + True, + [ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + ], + ), + ], +) +def test_cron_not_aligned_with_day_boundary( + init_and_plan_context: t.Callable, + forward_only: bool, + expected_intervals: t.List[t.Tuple[int, int]], +): + context, plan = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj( + { + **model.dict(), + "kind": model.kind.copy(update={"forward_only": forward_only}), + "cron": "0 12 * * *", + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) + assert waiter_revenue_by_day_snapshot.intervals == [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) + ] + + model = add_projection_to_model(t.cast(SqlModel, model), literal=True) + context.upsert_model(model) + + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + + with time_machine.travel("2023-01-08 00:10:00 UTC"): # Past model's cron. + plan = context.plan_builder( + "dev", select_models=[model.name], skip_tests=True, enable_preview=True + ).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=expected_intervals, + ), + ] + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_cron_not_aligned_with_day_boundary_new_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + existing_model = context.get_model("sushi.waiter_revenue_by_day") + existing_model = SqlModel.parse_obj( + { + **existing_model.dict(), + "kind": existing_model.kind.copy(update={"forward_only": True}), + } + ) + context.upsert_model(existing_model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + # Add a new model and make a change to a forward-only model. + # The cron of the new model is not aligned with the day boundary. + new_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind FULL, + cron '0 8 * * *', + start '2023-01-01', + ); + + SELECT 1 AS one; + """ + ) + ) + context.upsert_model(new_model) + + existing_model = add_projection_to_model(t.cast(SqlModel, existing_model), literal=True) + context.upsert_model(existing_model) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "memory.sushi.new_model", raise_if_missing=True + ).snapshot_id, + intervals=[(to_timestamp("2023-01-06"), to_timestamp("2023-01-07"))], + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2023-01-08 00:00:00 UTC", tick=False) +def test_parent_cron_after_child(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj( + { + **model.dict(), + "cron": "50 23 * * *", + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) + assert waiter_revenue_by_day_snapshot.intervals == [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) + ] + + top_waiters_model = context.get_model("sushi.top_waiters") + top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) + context.upsert_model(top_waiters_model) + + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + with time_machine.travel("2023-01-08 23:55:00 UTC"): # Past parent's cron, but before child's + plan = context.plan_builder("dev", skip_tests=True).build() + # Make sure the waiter_revenue_by_day model is not backfilled. + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2025-03-08 00:00:00 UTC") +def test_tz(init_and_plan_context): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model( + SqlModel.parse_obj( + {**model.dict(), "cron_tz": "America/Los_Angeles", "start": "2025-03-07"} + ) + ) + + def assert_intervals(plan, intervals): + assert ( + next( + intervals.intervals + for intervals in plan.missing_intervals + if intervals.snapshot_id.name == model.fqn + ) + == intervals + ) + + plan = context.plan_builder("prod", skip_tests=True).build() + + # we have missing intervals but not waiter_revenue_by_day because it's not midnight pacific yet + assert plan.missing_intervals + + with pytest.raises(StopIteration): + assert_intervals(plan, []) + + # now we're ready 8AM UTC == midnight PST + with time_machine.travel("2025-03-08 08:00:00 UTC"): + plan = context.plan_builder("prod", skip_tests=True).build() + assert_intervals(plan, [(to_timestamp("2025-03-07"), to_timestamp("2025-03-08"))]) + + with time_machine.travel("2025-03-09 07:00:00 UTC"): + plan = context.plan_builder("prod", skip_tests=True).build() + + assert_intervals( + plan, + [ + (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), + ], + ) + + with time_machine.travel("2025-03-09 08:00:00 UTC"): + plan = context.plan_builder("prod", skip_tests=True).build() + + assert_intervals( + plan, + [ + (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), + (to_timestamp("2025-03-08"), to_timestamp("2025-03-09")), + ], + ) + + context.apply(plan) + + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.missing_intervals diff --git a/tests/core/integration/test_dbt.py b/tests/core/integration/test_dbt.py new file mode 100644 index 0000000000..6f23acb97e --- /dev/null +++ b/tests/core/integration/test_dbt.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import typing as t +import pytest +from sqlmesh.core.model.common import ParsableSql +import time_machine + +from sqlmesh.core.context import Context +from sqlmesh.core.model import ( + IncrementalUnmanagedKind, +) +from sqlmesh.core.snapshot import ( + DeployabilityIndex, + SnapshotChangeCategory, +) + +if t.TYPE_CHECKING: + pass + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_select_star_is_directly_modified(sushi_test_dbt_context: Context): + context = sushi_test_dbt_context + + model = context.get_model("sushi.simple_model_a") + context.upsert_model( + model, + query_=ParsableSql(sql="SELECT 1 AS a, 2 AS b"), + ) + + snapshot_a_id = context.get_snapshot("sushi.simple_model_a").snapshot_id # type: ignore + snapshot_b_id = context.get_snapshot("sushi.simple_model_b").snapshot_id # type: ignore + + plan = context.plan_builder("dev", skip_tests=True).build() + assert plan.directly_modified == {snapshot_a_id, snapshot_b_id} + assert {i.snapshot_id for i in plan.missing_intervals} == {snapshot_a_id, snapshot_b_id} + + assert plan.snapshots[snapshot_a_id].change_category == SnapshotChangeCategory.NON_BREAKING + assert plan.snapshots[snapshot_b_id].change_category == SnapshotChangeCategory.NON_BREAKING + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_is_incremental_table_is_missing(sushi_test_dbt_context: Context): + context = sushi_test_dbt_context + + model = context.get_model("sushi.waiter_revenue_by_day_v2") + model = model.copy(update={"kind": IncrementalUnmanagedKind(), "start": "2023-01-01"}) + context.upsert_model(model) + context._standalone_audits["sushi.test_top_waiters"].start = "2023-01-01" + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + snapshot = context.get_snapshot("sushi.waiter_revenue_by_day_v2") + assert snapshot + + # Manually drop the table + context.engine_adapter.drop_table(snapshot.table_name()) + + context.snapshot_evaluator.evaluate( + snapshot, + start="2023-01-01", + end="2023-01-08", + execution_time="2023-01-08 15:00:00", + snapshots={s.name: s for s in context.snapshots.values()}, + deployability_index=DeployabilityIndex.all_deployable(), + ) + + # Make sure the table was recreated + assert context.engine_adapter.table_exists(snapshot.table_name()) + + +def test_model_attr(sushi_test_dbt_context: Context, assert_exp_eq): + context = sushi_test_dbt_context + model = context.get_model("sushi.top_waiters") + assert_exp_eq( + model.render_query(), + """ + SELECT + CAST("waiter_id" AS INT) AS "waiter_id", + CAST("revenue" AS DOUBLE) AS "revenue", + 3 AS "model_columns" + FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" + WHERE + "ds" = ( + SELECT + MAX("ds") + FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" + ) + ORDER BY + "revenue" DESC NULLS FIRST + LIMIT 10 + """, + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_requirements(sushi_dbt_context: Context): + assert set(sushi_dbt_context.requirements) == {"dbt-core", "dbt-duckdb"} + assert sushi_dbt_context.requirements["dbt-core"].startswith("1.") + assert sushi_dbt_context.requirements["dbt-duckdb"].startswith("1.") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_dialect_with_normalization_strategy(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context( + "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" + ) + assert context.default_dialect == "duckdb,normalization_strategy=LOWERCASE" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dbt_before_all_with_var_ref_source(init_and_plan_context: t.Callable): + _, plan = init_and_plan_context( + "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" + ) + environment_statements = plan.to_evaluatable().environment_statements + assert environment_statements + rendered_statements = [e.render_before_all(dialect="duckdb") for e in environment_statements] + assert rendered_statements[0] == [ + "CREATE TABLE IF NOT EXISTS analytic_stats (physical_table TEXT, evaluation_time TEXT)", + "CREATE TABLE IF NOT EXISTS to_be_executed_last (col TEXT)", + "SELECT 1 AS var, 'items' AS src, 'waiters' AS ref", + ] diff --git a/tests/core/integration/test_dev_only_vde.py b/tests/core/integration/test_dev_only_vde.py new file mode 100644 index 0000000000..611e207771 --- /dev/null +++ b/tests/core/integration/test_dev_only_vde.py @@ -0,0 +1,477 @@ +from __future__ import annotations + +import typing as t +import pytest +from sqlmesh.core.model.common import ParsableSql +import time_machine + +from sqlmesh.core import dialect as d +from sqlmesh.core.config.common import VirtualEnvironmentMode +from sqlmesh.core.model import ( + FullKind, + IncrementalUnmanagedKind, + SqlModel, + ViewKind, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import to_date, to_timestamp +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + + assert all( + s.virtual_environment_mode.is_dev_only or not s.is_model or s.is_symbolic + for s in context.snapshots.values() + ) + + # Init prod + context.plan("prod", auto_apply=True, no_prompts=True) + + # Make a change in dev + original_model = context.get_model("sushi.waiter_revenue_by_day") + original_fingerprint = context.get_snapshot(original_model.name).fingerprint + model = original_model.copy( + update={ + "query_": ParsableSql( + sql=original_model.query.order_by("waiter_id").sql(dialect=original_model.dialect) + ) + } + ) + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + + plan_dev = context.plan_builder("dev").build() + assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") + assert plan_dev.requires_backfill + assert plan_dev.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + ] + assert plan_dev.context_diff.snapshots[context.get_snapshot(model.name).snapshot_id].intervals + assert plan_dev.context_diff.snapshots[ + context.get_snapshot("sushi.top_waiters").snapshot_id + ].intervals + assert plan_dev.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].dev_intervals + assert plan_dev.context_diff.snapshots[ + context.get_snapshot("sushi.top_waiters").snapshot_id + ].dev_intervals + context.apply(plan_dev) + + # Make sure the waiter_revenue_by_day model is a table in prod and a view in dev + table_types_df = context.engine_adapter.fetchdf( + "SELECT table_schema, table_type FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 'waiter_revenue_by_day'" + ) + assert table_types_df.to_dict("records") == [ + {"table_schema": "sushi", "table_type": "BASE TABLE"}, + {"table_schema": "sushi__dev", "table_type": "VIEW"}, + ] + + # Check that the specified dates were backfilled + min_event_date = context.engine_adapter.fetchone( + "SELECT MIN(event_date) FROM sushi__dev.waiter_revenue_by_day" + )[0] + assert min_event_date == to_date("2023-01-07") + + # Make sure the changes are applied without backfill in prod + plan_prod = context.plan_builder("prod").build() + assert not plan_prod.requires_backfill + assert not plan_prod.missing_intervals + context.apply(plan_prod) + assert "one" in context.engine_adapter.columns("sushi.waiter_revenue_by_day") + + # Make sure the revert of a breaking changes results in a full rebuild + context.upsert_model(original_model) + assert context.get_snapshot(original_model.name).fingerprint == original_fingerprint + + plan_prod = context.plan_builder( + "prod", allow_destructive_models=["sushi.waiter_revenue_by_day"] + ).build() + assert not plan_prod.requires_backfill + assert not plan_prod.missing_intervals + context.apply(plan_prod) + assert "one" not in context.engine_adapter.columns("sushi.waiter_revenue_by_day") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + # Change to full kind + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.missing_intervals + assert prod_plan.requires_backfill + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + # Change back to view + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": ViewKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "view" + + # Change to incremental + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": IncrementalUnmanagedKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + # Change back to full + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change_incremental( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + + forward_only_model_name = "memory.sushi.test_forward_only_model" + forward_only_model_expressions = d.parse( + f""" + MODEL ( + name {forward_only_model_name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + ), + ); + + SELECT '2023-01-01' AS ds, 'value' AS value; + """ + ) + forward_only_model = load_sql_based_model(forward_only_model_expressions) + forward_only_model = forward_only_model.copy( + update={"virtual_environment_mode": VirtualEnvironmentMode.DEV_ONLY} + ) + context.upsert_model(forward_only_model) + + context.plan("prod", auto_apply=True, no_prompts=True) + + # Change to view + model = context.get_model(forward_only_model_name) + original_kind = model.kind + model = model.copy(update={"kind": ViewKind()}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "view" + + model = model.copy(update={"kind": original_kind}) + context.upsert_model(model) + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change_with_follow_up_changes_in_dev( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + # Make sure the initial state is a view + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "view" + + # Change to incremental unmanaged kind + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": IncrementalUnmanagedKind()}) + context.upsert_model(model) + dev_plan = context.plan_builder("dev", skip_tests=True).build() + assert dev_plan.missing_intervals + assert dev_plan.requires_backfill + context.apply(dev_plan) + + # Make a follow-up forward-only change + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + dev_plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() + context.apply(dev_plan) + + # Deploy to prod + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals + assert not prod_plan.context_diff.snapshots[ + context.get_snapshot(model.name).snapshot_id + ].intervals + context.apply(prod_plan) + data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) + assert len(data_objects) == 1 + assert data_objects[0].type == "table" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_kind_change_manual_categorization( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + dev_plan_builder = context.plan_builder("dev", skip_tests=True, no_auto_categorization=True) + dev_plan_builder.set_choice( + dev_plan_builder._context_diff.snapshots[context.get_snapshot(model.name).snapshot_id], + SnapshotChangeCategory.NON_BREAKING, + ) + dev_plan = dev_plan_builder.build() + assert dev_plan.requires_backfill + assert len(dev_plan.missing_intervals) == 1 + context.apply(dev_plan) + + prod_plan = context.plan_builder("prod", skip_tests=True).build() + assert prod_plan.requires_backfill + assert prod_plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_seed_model_change( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.load() + context.plan("prod", auto_apply=True, no_prompts=True) + + seed_model = context.get_model("sushi.waiter_names") + with open(seed_model.seed_path, "a") as fd: + fd.write("\n123,New Test Name") + + context.load() + seed_model_snapshot = context.get_snapshot("sushi.waiter_names") + plan = context.plan_builder("dev").build() + assert plan.directly_modified == {seed_model_snapshot.snapshot_id} + assert len(plan.missing_intervals) == 2 + context.apply(plan) + + actual_seed_df_in_dev = context.fetchdf("SELECT * FROM sushi__dev.waiter_names WHERE id = 123") + assert actual_seed_df_in_dev.to_dict("records") == [{"id": 123, "name": "New Test Name"}] + actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") + assert actual_seed_df_in_prod.empty + + plan = context.plan_builder("prod").build() + assert plan.directly_modified == {seed_model_snapshot.snapshot_id} + assert len(plan.missing_intervals) == 1 + assert plan.missing_intervals[0].snapshot_id == seed_model_snapshot.snapshot_id + context.apply(plan) + + actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") + assert actual_seed_df_in_prod.to_dict("records") == [{"id": 123, "name": "New Test Name"}] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_change_downstream_of_seed( + init_and_plan_context: t.Callable, +): + """This test covers a scenario when a model downstream of a seed model is modified and explicitly selected + causing an (unhydrated) seed model to sourced from the state. If SQLMesh attempts to create + a table for the unchanged seed model, it will fail because the seed model is not hydrated. + """ + context, _ = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.load() + context.plan("prod", auto_apply=True, no_prompts=True) + + # Make sure that a different version of the seed model is loaded + seed_model = context.get_model("sushi.waiter_names") + seed_model = seed_model.copy(update={"stamp": "force new version"}) + context.upsert_model(seed_model) + + # Make a change to the downstream model + model = context.get_model("sushi.waiter_as_customer_by_day") + model = model.copy(update={"stamp": "force new version"}) + context.upsert_model(model) + + # It is important to clear the cache so that the hydrated seed model is not sourced from the cache + context.clear_caches() + + # Make sure to use the selector so that the seed model is sourced from the state + plan = context.plan_builder("dev", select_models=[model.name]).build() + assert len(plan.directly_modified) == 1 + assert list(plan.directly_modified)[0].name == model.fqn + assert len(plan.missing_intervals) == 1 + assert plan.missing_intervals[0].snapshot_id.name == model.fqn + + # Make sure there's no error when applying the plan + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_model_change_standalone_audit( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + # Change a model upstream from a standalone audit + model = context.get_model("sushi.items") + model = model.copy(update={"stamp": "force new version"}) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + + # Make sure the standalone audit is among modified + assert ( + context.get_snapshot("assert_item_price_above_zero").snapshot_id + in plan.indirectly_modified[context.get_snapshot("sushi.items").snapshot_id] + ) + + # Make sure there's no error when applying the plan + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_virtual_environment_mode_dev_only_seed_model_change_schema( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context( + "examples/sushi", config="test_config_virtual_environment_mode_dev_only" + ) + context.apply(plan) + + new_csv = [] + with open(context.path / "seeds" / "waiter_names.csv", "r") as fd: + is_header = True + for idx, line in enumerate(fd): + line = line.strip() + if not line: + continue + if is_header: + new_csv.append(line + ",new_column") + is_header = False + else: + new_csv.append(line + f",v{idx}") + + with open(context.path / "seeds" / "waiter_names.csv", "w") as fd: + fd.write("\n".join(new_csv)) + + context.load() + + downstream_model = context.get_model("sushi.waiter_as_customer_by_day") + downstream_model_kind = downstream_model.kind.dict() + downstream_model_kwargs = { + **downstream_model.dict(), + "kind": { + **downstream_model_kind, + "on_destructive_change": "allow", + }, + "audits": [], + # Use the new column + "query": "SELECT '2023-01-07' AS event_date, new_column AS new_column FROM sushi.waiter_names", + } + context.upsert_model(SqlModel.parse_obj(downstream_model_kwargs)) + + context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True, enable_preview=True) + + assert ( + context.engine_adapter.fetchone( + "SELECT COUNT(*) FROM sushi__dev.waiter_as_customer_by_day" + )[0] + == len(new_csv) - 1 + ) + + # Deploy to prod + context.clear_caches() + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + assert "new_column" in context.engine_adapter.columns("sushi.waiter_as_customer_by_day") diff --git a/tests/core/integration/test_forward_only.py b/tests/core/integration/test_forward_only.py new file mode 100644 index 0000000000..2dddf18efd --- /dev/null +++ b/tests/core/integration/test_forward_only.py @@ -0,0 +1,1497 @@ +from __future__ import annotations + +import typing as t +import numpy as np # noqa: TID253 +import pandas as pd # noqa: TID253 +import pytest +import time_machine + +from sqlmesh.core import dialect as d +from sqlmesh.core.context import Context +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + FullKind, + SqlModel, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import to_datetime, to_timestamp +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +@pytest.mark.parametrize( + "context_fixture", + ["sushi_context", "sushi_no_default_catalog"], +) +def test_forward_only_plan_with_effective_date(context_fixture: Context, request): + context = request.getfixturevalue(context_fixture) + model_name = "sushi.waiter_revenue_by_day" + model = context.get_model(model_name) + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model)), start="2023-01-01") + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan_builder = context.plan_builder("dev", skip_tests=True, forward_only=True) + plan = plan_builder.build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only + + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + ] + + plan = plan_builder.set_effective_from("2023-01-05").build() + # Default start should be set to effective_from + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + plan = plan_builder.set_start("2023-01-06").build() + # Start override should take precedence + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + plan = plan_builder.set_effective_from("2023-01-04").build() + # Start should remain unchanged + assert plan.start == "2023-01-06" + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [ + pd.to_datetime("2023-01-06"), + pd.to_datetime("2023-01-07"), + ] + + prod_plan = context.plan_builder(skip_tests=True).build() + # Make sure that the previously set effective_from is respected + assert prod_plan.start == to_timestamp("2023-01-04") + assert prod_plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(prod_plan) + + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert prod_df["event_date"].tolist() == [ + pd.to_datetime(x) for x in ["2023-01-04", "2023-01-05", "2023-01-06", "2023-01-07"] + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_model_regular_plan(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = add_projection_to_model(t.cast(SqlModel, model)) + forward_only_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"kind": forward_only_kind}) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only + + assert plan.start == to_datetime("2023-01-01") + assert not plan.missing_intervals + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert not dev_df["event_date"].tolist() + + # Run a restatement plan to preview changes + plan_builder = context.plan_builder( + "dev", skip_tests=True, restate_models=[model_name], enable_preview=False + ) + plan_builder.set_start("2023-01-06") + assert plan_builder.build().missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Make sure that changed start is reflected in missing intervals + plan_builder.set_start("2023-01-07") + assert plan_builder.build().missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan_builder.build()) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] + + # Promote changes to prod + prod_plan = context.plan_builder(skip_tests=True).build() + assert not prod_plan.missing_intervals + + context.apply(prod_plan) + + # The change was applied in a forward-only manner so no values in the new column should be populated + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert not prod_df["event_date"].tolist() + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_model_regular_plan_preview_enabled(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = add_projection_to_model(t.cast(SqlModel, model)) + forward_only_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"kind": forward_only_kind}) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only + + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_model_restate_full_history_in_dev(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + model_name = "memory.sushi.customer_max_revenue" + expressions = d.parse( + f""" + MODEL ( + name {model_name}, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key customer_id, + forward_only true, + ), + ); + + SELECT + customer_id, MAX(revenue) AS max_revenue + FROM memory.sushi.customer_revenue_lifetime + GROUP BY 1; + """ + ) + + model = load_sql_based_model(expressions) + assert model.forward_only + assert model.kind.full_history_restatement_only + context.upsert_model(model) + + context.plan("prod", skip_tests=True, auto_apply=True, enable_preview=False) + + model_kwargs = { + **model.dict(), + # Make a breaking change. + "query": model.query.order_by("customer_id"), # type: ignore + } + context.upsert_model(SqlModel.parse_obj(model_kwargs)) + + # Apply the model change in dev + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert not plan.missing_intervals + context.apply(plan) + + snapshot = context.get_snapshot(model, raise_if_missing=True) + snapshot_table_name = snapshot.table_name(False) + + # Manually insert a dummy value to check that the table is recreated during the restatement + context.engine_adapter.insert_append( + snapshot_table_name, + pd.DataFrame({"customer_id": [-1], "max_revenue": [100]}), + ) + df = context.engine_adapter.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" + ) + assert df["cnt"][0] == 1 + + # Apply a restatement plan in dev + plan = context.plan("dev", restate_models=[model.name], auto_apply=True, enable_preview=False) + assert len(plan.missing_intervals) == 1 + + # Check that the dummy value is not present + df = context.engine_adapter.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" + ) + assert df["cnt"][0] == 0 + + # Check that the table is not empty + df = context.engine_adapter.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue" + ) + assert df["cnt"][0] > 0 + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_full_history_restatement_model_regular_plan_preview_enabled( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.marketing" # SCD2 model + + model = context.get_model(model_name) + model = add_projection_to_model(t.cast(SqlModel, model)) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + customers_snapshot = context.get_snapshot("sushi.customers", raise_if_missing=True) + active_customers_snapshot = context.get_snapshot( + "sushi.active_customers", raise_if_missing=True + ) + waiter_as_customer_snapshot = context.get_snapshot( + "sushi.waiter_as_customer_by_day", raise_if_missing=True + ) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + + assert len(plan.new_snapshots) == 6 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[customers_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[active_customers_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[waiter_as_customer_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert all(s.is_forward_only for s in plan.new_snapshots) + + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_metadata_changed_regular_plan_preview_enabled(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model_name = "sushi.waiter_revenue_by_day" + + model = context.get_model(model_name) + model = model.copy(update={"owner": "new_owner"}) + + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.METADATA + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.METADATA + ) + assert not plan.missing_intervals + assert not plan.restatements + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_forward_only_preview_child_that_runs_before_parent(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # This model runs at minute 30 of every hour + upstream_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.upstream_model, + kind FULL, + cron '30 * * * *', + start '2023-01-01', + ); + + SELECT 1 AS a; + """ + ) + ) + context.upsert_model(upstream_model) + + # This model runs at minute 0 of every hour, so it runs before the upstream model + downstream_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.downstream_model, + kind INCREMENTAL_BY_TIME_RANGE( + time_column event_date, + forward_only True, + ), + cron '0 * * * *', + start '2023-01-01', + ); + + SELECT a, '2023-01-06' AS event_date FROM memory.sushi.upstream_model; + """ + ) + ) + context.upsert_model(downstream_model) + + context.plan("prod", skip_tests=True, auto_apply=True) + + with time_machine.travel("2023-01-08 00:05:00 UTC"): + # The downstream model runs but not the upstream model + context.run("prod") + + # Now it's time for the upstream model to run but it hasn't run yet + with time_machine.travel("2023-01-08 00:35:00 UTC"): + # Make a change to the downstream model. + downstream_model = add_projection_to_model(t.cast(SqlModel, downstream_model), literal=True) + context.upsert_model(downstream_model) + + # The plan should only backfill the downstream model despite upstream missing intervals + plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot( + downstream_model.name, raise_if_missing=True + ).snapshot_id, + intervals=[ + (to_timestamp("2023-01-07 23:00:00"), to_timestamp("2023-01-08 00:00:00")) + ], + ), + ] + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_forward_only_monthly_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj( + { + **model.dict(), + "kind": model.kind.copy(update={"forward_only": True}), + "cron": "0 0 1 * *", + "start": "2022-01-01", + "audits": [], + } + ) + context.upsert_model(model) + + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) + assert waiter_revenue_by_day_snapshot.intervals == [ + (to_timestamp("2022-01-01"), to_timestamp("2023-01-01")) + ] + + model = add_projection_to_model(t.cast(SqlModel, model), literal=True) + context.upsert_model(model) + + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + + plan = context.plan_builder( + "dev", select_models=[model.name], skip_tests=True, enable_preview=True + ).build() + assert to_timestamp(plan.start) == to_timestamp("2022-12-01") + assert to_timestamp(plan.end) == to_timestamp("2023-01-08") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[(to_timestamp("2022-12-01"), to_timestamp("2023-01-01"))], + ), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_parent_created_in_dev_child_created_in_prod( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") + waiter_revenue_by_day_model = add_projection_to_model( + t.cast(SqlModel, waiter_revenue_by_day_model) + ) + forward_only_kind = waiter_revenue_by_day_model.kind.copy(update={"forward_only": True}) + waiter_revenue_by_day_model = waiter_revenue_by_day_model.copy( + update={"kind": forward_only_kind} + ) + context.upsert_model(waiter_revenue_by_day_model) + + waiter_revenue_by_day_snapshot = context.get_snapshot( + waiter_revenue_by_day_model, raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert all(s.is_forward_only for s in plan.new_snapshots) + assert plan.start == to_datetime("2023-01-01") + assert not plan.missing_intervals + + context.apply(plan) + + # Update the child to refer to a newly added column. + top_waiters_model = context.get_model("sushi.top_waiters") + top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=False) + context.upsert_model(top_waiters_model) + + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_forward_only_view_migration( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.top_waiters") + assert model.kind.is_view + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + + # Apply a forward-only plan + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True, forward_only=True) + + # Make sure that the new column got reflected in the view schema + df = context.fetchdf("SELECT one FROM sushi.top_waiters LIMIT 1") + assert len(df) == 1 + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_new_forward_only_model(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, enable_preview=False) + + snapshot = context.get_snapshot("sushi.marketing") + + # The deployable table should not exist yet + assert not context.engine_adapter.table_exists(snapshot.table_name()) + assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) + + context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) + + assert context.engine_adapter.table_exists(snapshot.table_name()) + assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) + + +@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) +@pytest.mark.parametrize("has_view_binding", [False, True]) +def test_non_breaking_change_after_forward_only_in_dev( + init_and_plan_context: t.Callable, has_view_binding: bool +): + context, plan = init_and_plan_context("examples/sushi") + context.snapshot_evaluator.adapter.HAS_VIEW_BINDING = has_view_binding + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert all(s.is_forward_only for s in plan.new_snapshots) + assert to_timestamp(plan.start) == to_timestamp("2023-01-07") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], + ), + ] + + # Apply the forward-only changes first. + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] + + # Make a non-breaking change to a model downstream. + model = context.get_model("sushi.top_waiters") + # Select 'one' column from the updated upstream model. + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model), literal=False)) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert to_timestamp(plan.start) == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the non-breaking changes. + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT waiter_id FROM sushi__dev.top_waiters WHERE one IS NOT NULL" + ) + assert not dev_df.empty + + prod_df = context.engine_adapter.fetchdf("DESCRIBE sushi.top_waiters") + assert "one" not in prod_df["column_name"].tolist() + + # Deploy both changes to prod. + plan = context.plan_builder("prod", skip_tests=True).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" + ) + assert prod_df.empty + + prod_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT waiter_id FROM sushi.top_waiters WHERE one IS NOT NULL" + ) + assert prod_df.empty + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_change_after_forward_only_in_dev(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + # Make sure that the most downstream model is a materialized model. + model = context.get_model("sushi.top_waiters") + model = model.copy(update={"kind": FullKind()}) + context.upsert_model(model) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + # Make sushi.orders a forward-only model. + model = context.get_model("sushi.orders") + updated_model_kind = model.kind.copy(update={"forward_only": True}) + model = model.copy(update={"stamp": "force new version", "kind": updated_model_kind}) + context.upsert_model(model) + snapshot = context.get_snapshot(model, raise_if_missing=True) + + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert ( + plan.context_diff.snapshots[snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only + assert not plan.requires_backfill + context.apply(plan) + + # Make a non-breaking change to a model. + model = context.get_model("sushi.top_waiters") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 1 + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the non-breaking changes. + context.apply(plan) + + # Make a non-breaking change upstream from the previously modified model. + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + waiter_revenue_by_day_snapshot = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() + assert len(plan.new_snapshots) == 2 + assert ( + plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + # Apply the upstream non-breaking changes. + context.apply(plan) + assert not context.plan_builder("dev", skip_tests=True).build().requires_backfill + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiters_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_changes_downstream_of_indirect_non_breaking_snapshot_without_intervals( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Make a breaking change first but don't backfill it + model = context.get_model("sushi.orders") + model = model.copy(update={"stamp": "force new version"}) + context.upsert_model(model) + plan_builder = context.plan_builder( + "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True + ) + plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.BREAKING) + context.apply(plan_builder.build()) + + # Now make a non-breaking change to the same snapshot. + model = model.copy(update={"stamp": "force another new version"}) + context.upsert_model(model) + plan_builder = context.plan_builder( + "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True + ) + plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.NON_BREAKING) + context.apply(plan_builder.build()) + + # Now make a change to a model downstream of the above model. + downstream_model = context.get_model("sushi.top_waiters") + downstream_model = downstream_model.copy(update={"stamp": "yet another new version"}) + context.upsert_model(downstream_model) + plan = context.plan_builder("dev", skip_tests=True).build() + + # If the parent is not representative then the child cannot be deployable + deployability_index = plan.deployability_index + assert not deployability_index.is_representative( + context.get_snapshot("sushi.waiter_revenue_by_day") + ) + assert not deployability_index.is_deployable(context.get_snapshot("sushi.top_waiters")) + + +@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) +def test_metadata_change_after_forward_only_results_in_migration(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Make a forward-only change + model = context.get_model("sushi.waiter_revenue_by_day") + model = model.copy(update={"kind": model.kind.copy(update={"forward_only": True})}) + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + assert len(plan.new_snapshots) == 2 + assert all(s.is_forward_only for s in plan.new_snapshots) + + # Follow-up with a metadata change in the same environment + model = model.copy(update={"owner": "new_owner"}) + context.upsert_model(model) + plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + assert len(plan.new_snapshots) == 2 + assert all(s.change_category == SnapshotChangeCategory.METADATA for s in plan.new_snapshots) + + # Deploy the latest change to prod + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + # Check that the new column was added in prod + columns = context.engine_adapter.columns("sushi.waiter_revenue_by_day") + assert "one" in columns + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_downstream_of_forward_only(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Make sushi.orders a forward-only model. + forward_only_model = context.get_model("sushi.orders") + updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) + forward_only_model = forward_only_model.copy( + update={"stamp": "force new version", "kind": updated_model_kind} + ) + context.upsert_model(forward_only_model) + forward_only_snapshot = context.get_snapshot(forward_only_model, raise_if_missing=True) + + non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") + non_breaking_model = non_breaking_model.copy(update={"start": "2023-01-01"}) + context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) + non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) + top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) + + plan = context.plan_builder( + "dev", + skip_tests=True, + enable_preview=False, + categorizer_config=CategorizerConfig.all_full(), + ).build() + assert ( + plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].is_forward_only + assert not plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].is_forward_only + assert not plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].is_forward_only + + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiter_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=non_breaking_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("dev", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + # Deploy everything to prod. + plan = context.plan_builder("prod", skip_tests=True).build() + assert plan.start == to_timestamp("2023-01-01") + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=top_waiter_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=non_breaking_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + ] + + context.apply(plan) + assert ( + not context.plan_builder("prod", skip_tests=True, enable_preview=False) + .build() + .requires_backfill + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_view_model_non_representative_snapshot( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + # Forward-only parent + forward_only_model_name = "memory.sushi.test_forward_only_model" + forward_only_model_expressions = d.parse( + f""" + MODEL ( + name {forward_only_model_name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + ), + ); + + SELECT '2023-01-01' AS ds, 'value' AS value; + """ + ) + forward_only_model = load_sql_based_model(forward_only_model_expressions) + assert forward_only_model.forward_only + context.upsert_model(forward_only_model) + + # FULL downstream model. + full_downstream_model_name = "memory.sushi.test_full_downstream_model" + full_downstream_model_expressions = d.parse( + f""" + MODEL ( + name {full_downstream_model_name}, + kind FULL, + ); + + SELECT ds, value FROM {forward_only_model_name}; + """ + ) + full_downstream_model = load_sql_based_model(full_downstream_model_expressions) + context.upsert_model(full_downstream_model) + + # VIEW downstream of the previous FULL model. + view_downstream_model_name = "memory.sushi.test_view_downstream_model" + view_downstream_model_expressions = d.parse( + f""" + MODEL ( + name {view_downstream_model_name}, + kind VIEW, + ); + + SELECT ds, value FROM {full_downstream_model_name}; + """ + ) + view_downstream_model = load_sql_based_model(view_downstream_model_expressions) + context.upsert_model(view_downstream_model) + + # Apply the initial plan with all 3 models. + context.plan(auto_apply=True, no_prompts=True) + + # Make a change to the forward-only model and apply it in dev. + context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) + forward_only_model_snapshot_id = context.get_snapshot(forward_only_model_name).snapshot_id + full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id + view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id + dev_plan = context.plan("dev", auto_apply=True, no_prompts=True, enable_preview=False) + assert ( + dev_plan.snapshots[forward_only_model_snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + dev_plan.snapshots[full_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert ( + dev_plan.snapshots[view_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + assert not dev_plan.missing_intervals + + # Make a follow-up breaking change to the downstream full model. + new_full_downstream_model_expressions = d.parse( + f""" + MODEL ( + name {full_downstream_model_name}, + kind FULL, + ); + + SELECT ds, 'new_value' AS value FROM {forward_only_model_name}; + """ + ) + new_full_downstream_model = load_sql_based_model(new_full_downstream_model_expressions) + context.upsert_model(new_full_downstream_model) + full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id + view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id + dev_plan = context.plan( + "dev", + categorizer_config=CategorizerConfig.all_full(), + auto_apply=True, + no_prompts=True, + enable_preview=False, + ) + assert ( + dev_plan.snapshots[full_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert ( + dev_plan.snapshots[view_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_BREAKING + ) + assert len(dev_plan.missing_intervals) == 2 + assert dev_plan.missing_intervals[0].snapshot_id == full_downstream_model_snapshot_id + assert dev_plan.missing_intervals[1].snapshot_id == view_downstream_model_snapshot_id + + # Check that the representative view hasn't been created yet. + assert not context.engine_adapter.table_exists( + context.get_snapshot(view_downstream_model_name).table_name() + ) + + # Now promote the very first change to prod without promoting the 2nd breaking change. + context.upsert_model(full_downstream_model) + context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) + + # Finally, make a non-breaking change to the full model in the same dev environment. + context.upsert_model(add_projection_to_model(t.cast(SqlModel, new_full_downstream_model))) + full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id + view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id + dev_plan = context.plan( + "dev", + categorizer_config=CategorizerConfig.all_full(), + auto_apply=True, + no_prompts=True, + enable_preview=False, + ) + assert ( + dev_plan.snapshots[full_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.NON_BREAKING + ) + assert ( + dev_plan.snapshots[view_downstream_model_snapshot_id].change_category + == SnapshotChangeCategory.INDIRECT_NON_BREAKING + ) + + # Deploy changes to prod + context.plan("prod", auto_apply=True, no_prompts=True) + + # Check that the representative view has been created. + assert context.engine_adapter.table_exists( + context.get_snapshot(view_downstream_model_name).table_name() + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_indirect_non_breaking_view_model_non_representative_snapshot_migration( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + forward_only_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.forward_only_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + forward_only_model = load_sql_based_model(forward_only_model_expr) + context.upsert_model(forward_only_model) + + downstream_view_a_expr = d.parse( + """ + MODEL ( + name memory.sushi.downstream_view_a, + kind VIEW, + ); + + SELECT a from memory.sushi.forward_only_model; + """ + ) + downstream_view_a = load_sql_based_model(downstream_view_a_expr) + context.upsert_model(downstream_view_a) + + downstream_view_b_expr = d.parse( + """ + MODEL ( + name memory.sushi.downstream_view_b, + kind VIEW, + ); + + SELECT a from memory.sushi.downstream_view_a; + """ + ) + downstream_view_b = load_sql_based_model(downstream_view_b_expr) + context.upsert_model(downstream_view_b) + + context.plan(auto_apply=True, no_prompts=True, skip_tests=True) + + # Make a forward-only change + context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) + # Make a non-breaking change downstream + context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_view_a))) + + context.plan(auto_apply=True, no_prompts=True, skip_tests=True) + + # Make sure the downstrean indirect non-breaking view is available in prod + count = context.engine_adapter.fetchone("SELECT COUNT(*) FROM memory.sushi.downstream_view_b")[ + 0 + ] + assert count > 0 + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_new_forward_only_model_concurrent_versions(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + + # Add the first version of the model and apply it to dev_a. + context.upsert_model(new_model) + snapshot_a = context.get_snapshot(new_model.name) + plan_a = context.plan_builder("dev_a").build() + snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] + + assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots + assert snapshot_a.snapshot_id in plan_a.context_diff.added + assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING + + context.apply(plan_a) + + new_model_alt_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS b; + """ + ) + new_model_alt = load_sql_based_model(new_model_alt_expr) + + # Add the second version of the model but don't apply it yet + context.upsert_model(new_model_alt) + snapshot_b = context.get_snapshot(new_model_alt.name) + plan_b = context.plan_builder("dev_b").build() + snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] + + assert snapshot_b.snapshot_id in plan_b.context_diff.new_snapshots + assert snapshot_b.snapshot_id in plan_b.context_diff.added + assert snapshot_b.change_category == SnapshotChangeCategory.BREAKING + + assert snapshot_b.fingerprint != snapshot_a.fingerprint + assert snapshot_b.version == snapshot_a.version + + # Apply the 1st version to prod + context.upsert_model(new_model) + plan_prod_a = context.plan_builder("prod").build() + assert snapshot_a.snapshot_id in plan_prod_a.snapshots + assert ( + plan_prod_a.snapshots[snapshot_a.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + context.apply(plan_prod_a) + + df = context.fetchdf("SELECT * FROM memory.sushi.new_model") + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} + + # Modify the 1st version in prod to trigger a forward-only change + new_model = add_projection_to_model(t.cast(SqlModel, new_model)) + context.upsert_model(new_model) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + # Apply the 2nd version to dev_b. + # At this point the snapshot of the 2nd version has already been categorized but not + # persisted in the state. This means that when the snapshot of the 1st version was + # being unpaused during promotion to prod, the state of the 2nd version snapshot was not updated + context.apply(plan_b) + + # Apply the 2nd version to prod + context.upsert_model(new_model_alt) + plan_prod_b = context.plan_builder("prod").build() + assert ( + plan_prod_b.snapshots[snapshot_b.snapshot_id].change_category + == SnapshotChangeCategory.BREAKING + ) + assert not plan_prod_b.requires_backfill + context.apply(plan_prod_b) + + df = context.fetchdf("SELECT * FROM memory.sushi.new_model").replace({np.nan: None}) + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: None}} + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_new_forward_only_model_same_dev_environment(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + new_model_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS a; + """ + ) + new_model = load_sql_based_model(new_model_expr) + + # Add the first version of the model and apply it to dev. + context.upsert_model(new_model) + snapshot_a = context.get_snapshot(new_model.name) + plan_a = context.plan_builder("dev").build() + snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] + + assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots + assert snapshot_a.snapshot_id in plan_a.context_diff.added + assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING + + context.apply(plan_a) + + df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model") + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} + + new_model_alt_expr = d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only TRUE, + on_destructive_change 'allow', + ), + ); + + SELECT '2023-01-07' AS ds, 1 AS b; + """ + ) + new_model_alt = load_sql_based_model(new_model_alt_expr) + + # Add the second version of the model and apply it to the same environment. + context.upsert_model(new_model_alt) + snapshot_b = context.get_snapshot(new_model_alt.name) + + context.invalidate_environment("dev", sync=True) + plan_b = context.plan_builder("dev").build() + snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] + + context.apply(plan_b) + + df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model").replace({np.nan: None}) + assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: 1}} diff --git a/tests/core/integration/test_model_kinds.py b/tests/core/integration/test_model_kinds.py new file mode 100644 index 0000000000..1cc1bf7aeb --- /dev/null +++ b/tests/core/integration/test_model_kinds.py @@ -0,0 +1,2644 @@ +from __future__ import annotations + +import typing as t +from collections import Counter +from datetime import timedelta +from unittest import mock +import pandas as pd # noqa: TID253 +import pytest +from pathlib import Path +import time_machine +from pytest_mock.plugin import MockerFixture +from sqlglot import exp + +from sqlmesh import CustomMaterialization +from sqlmesh.core import dialect as d +from sqlmesh.core.config import ( + Config, + ModelDefaultsConfig, + DuckDBConnectionConfig, + GatewayConfig, +) +from sqlmesh.core.console import Console +from sqlmesh.core.context import Context +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + Model, + SqlModel, + CustomKind, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.utils.date import to_date, to_timestamp +from sqlmesh.utils.pydantic import validate_string +from tests.conftest import SushiDataValidator +from sqlmesh.utils import CorrelationId +from tests.utils.test_filesystem import create_temp_file + +if t.TYPE_CHECKING: + from sqlmesh import QueryOrDF + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_incremental_by_partition(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + source_name = "raw.test_incremental_by_partition" + model_name = "memory.sushi.test_incremental_by_partition" + + expressions = d.parse( + f""" + MODEL ( + name {model_name}, + kind INCREMENTAL_BY_PARTITION (disable_restatement false), + partitioned_by [key], + allow_partials true, + start '2023-01-07', + ); + + SELECT key, value FROM {source_name}; + """ + ) + model = load_sql_based_model(expressions) + context.upsert_model(model) + + context.engine_adapter.ctas( + source_name, + d.parse_one("SELECT 'key_a' AS key, 1 AS value"), + ) + + context.plan(auto_apply=True, no_prompts=True) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_a", 1), + ] + + context.engine_adapter.replace_query( + source_name, + d.parse_one("SELECT 'key_b' AS key, 1 AS value"), + ) + context.run(ignore_cron=True) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_a", 1), + ("key_b", 1), + ] + + context.engine_adapter.replace_query( + source_name, + d.parse_one("SELECT 'key_a' AS key, 2 AS value"), + ) + # Run 1 minute later. + with time_machine.travel("2023-01-08 15:01:00 UTC"): + context.run(ignore_cron=True) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_b", 1), + ("key_a", 2), + ] + + # model should fully refresh on restatement + context.engine_adapter.replace_query( + source_name, + d.parse_one("SELECT 'key_c' AS key, 3 AS value"), + ) + context.plan(auto_apply=True, no_prompts=True, restate_models=[model_name]) + assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ + ("key_c", 3), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_custom_materialization(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + custom_insert_called = False + + class CustomFullMaterialization(CustomMaterialization): + NAME = "test_custom_full" + + def insert( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + nonlocal custom_insert_called + custom_insert_called = True + + self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) + + model = context.get_model("sushi.top_waiters") + kwargs = { + **model.dict(), + # Make a breaking change. + "kind": dict(name="CUSTOM", materialization="test_custom_full"), + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + context.plan(auto_apply=True, no_prompts=True) + + assert custom_insert_called + + +# needs to be defined at the top level. If its defined within the test body, +# adding to the snapshot cache fails with: AttributeError: Can't pickle local object +class TestCustomKind(CustomKind): + __test__ = False # prevent pytest warning since this isnt a class containing tests + + @property + def custom_property(self) -> str: + return validate_string(self.materialization_properties.get("custom_property")) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_custom_materialization_with_custom_kind(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + custom_insert_calls = [] + + class CustomFullMaterialization(CustomMaterialization[TestCustomKind]): + NAME = "test_custom_full_with_custom_kind" + + def insert( + self, + table_name: str, + query_or_df: QueryOrDF, + model: Model, + is_first_insert: bool, + render_kwargs: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + assert isinstance(model.kind, TestCustomKind) + + nonlocal custom_insert_calls + custom_insert_calls.append(model.kind.custom_property) + + self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) + + model = context.get_model("sushi.top_waiters") + kwargs = { + **model.dict(), + # Make a breaking change. + "kind": dict( + name="CUSTOM", + materialization="test_custom_full_with_custom_kind", + materialization_properties={"custom_property": "pytest"}, + ), + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + context.plan(auto_apply=True) + + assert custom_insert_calls == ["pytest"] + + # no changes + context.plan(auto_apply=True) + + assert custom_insert_calls == ["pytest"] + + # change a property on the custom kind, breaking change + kwargs["kind"]["materialization_properties"]["custom_property"] = "some value" + context.upsert_model(SqlModel.parse_obj(kwargs)) + context.plan(auto_apply=True) + + assert custom_insert_calls == ["pytest", "some value"] + + +def test_incremental_time_self_reference( + mocker: MockerFixture, sushi_context: Context, sushi_data_validator: SushiDataValidator +): + start_ts = to_timestamp("1 week ago") + start_date, end_date = to_date("1 week ago"), to_date("yesterday") + if to_timestamp(start_date) < start_ts: + # The start date must be aligned by the interval unit. + start_date += timedelta(days=1) + + df = sushi_context.engine_adapter.fetchdf( + "SELECT MIN(event_date) FROM sushi.customer_revenue_lifetime" + ) + assert df.iloc[0, 0] == pd.to_datetime(start_date) + df = sushi_context.engine_adapter.fetchdf( + "SELECT MAX(event_date) FROM sushi.customer_revenue_lifetime" + ) + assert df.iloc[0, 0] == pd.to_datetime(end_date) + results = sushi_data_validator.validate("sushi.customer_revenue_lifetime", start_date, end_date) + plan = sushi_context.plan_builder( + restate_models=["sushi.customer_revenue_lifetime", "sushi.customer_revenue_by_day"], + start=start_date, + end="5 days ago", + ).build() + revenue_lifeteime_snapshot = sushi_context.get_snapshot( + "sushi.customer_revenue_lifetime", raise_if_missing=True + ) + revenue_by_day_snapshot = sushi_context.get_snapshot( + "sushi.customer_revenue_by_day", raise_if_missing=True + ) + assert sorted(plan.missing_intervals, key=lambda x: x.snapshot_id) == sorted( + [ + SnapshotIntervals( + snapshot_id=revenue_lifeteime_snapshot.snapshot_id, + intervals=[ + (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), + (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), + (to_timestamp(to_date("5 days ago")), to_timestamp(to_date("4 days ago"))), + (to_timestamp(to_date("4 days ago")), to_timestamp(to_date("3 days ago"))), + (to_timestamp(to_date("3 days ago")), to_timestamp(to_date("2 days ago"))), + (to_timestamp(to_date("2 days ago")), to_timestamp(to_date("1 days ago"))), + (to_timestamp(to_date("1 day ago")), to_timestamp(to_date("today"))), + ], + ), + SnapshotIntervals( + snapshot_id=revenue_by_day_snapshot.snapshot_id, + intervals=[ + (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), + (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), + ], + ), + ], + key=lambda x: x.snapshot_id, + ) + sushi_context.console = mocker.Mock(spec=Console) + sushi_context.apply(plan) + num_batch_calls = Counter( + [x[0][0] for x in sushi_context.console.update_snapshot_evaluation_progress.call_args_list] # type: ignore + ) + # Validate that we made 7 calls to the customer_revenue_lifetime snapshot and 1 call to the customer_revenue_by_day snapshot + assert num_batch_calls == { + sushi_context.get_snapshot("sushi.customer_revenue_lifetime", raise_if_missing=True): 7, + sushi_context.get_snapshot("sushi.customer_revenue_by_day", raise_if_missing=True): 1, + } + # Validate that the results are the same as before the restate + assert results == sushi_data_validator.validate( + "sushi.customer_revenue_lifetime", start_date, end_date + ) + + +def test_incremental_by_time_model_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + assert updated_df["new_column"].dropna().tolist() == [3] + + with time_machine.travel("2023-01-11 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + CAST(4 AS STRING) as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 3 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + # The destructive change was ignored but this change is coercable and therefore we still return ints + assert updated_df["new_column"].dropna().tolist() == [3, 4] + + with time_machine.travel("2023-01-12 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + CAST(5 AS STRING) as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + # Make the change compatible since that means we will attempt and alter now that is considered additive + context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { + exp.DataType.build("INT"): {exp.DataType.build("STRING")} + } + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 4 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + # The change is now reflected since an additive alter could be performed + assert updated_df["new_column"].dropna().tolist() == ["3", "4", "5"] + + context.close() + + +def test_incremental_by_time_model_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column to the source table + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("ALTER TABLE source_table ADD COLUMN new_column INT") + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is removed since destructive is allowed + assert "name" not in updated_df.columns + # new_column is not added since additive is ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was applied + assert "name" not in updated_df.columns + # new_column is still not added since additive is ignored + assert "new_column" not in updated_df.columns + + with time_machine.travel("2023-01-11 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + CAST(1 AS STRING) as id, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { + exp.DataType.build("INT"): {exp.DataType.build("STRING")} + } + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 3 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is still not added since additive is ignored + assert "new_column" not in updated_df.columns + # The additive change was ignored since we set the change as compatible therefore + # instead of getting strings in the result we still return ints + assert updated_df["id"].tolist() == [1, 1, 1] + + with time_machine.travel("2023-01-12 00:00:00 UTC"): + updated_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change allow + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + CAST(1 AS STRING) as id, + 'other' as other_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(updated_model) + + context = Context(paths=[tmp_path], config=config) + # Make the change compatible since that means we will attempt and alter now that is considered additive + context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { + exp.DataType.build("INT"): {exp.DataType.build("STRING")} + } + context.plan("prod", auto_apply=True, no_prompts=True, run=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 4 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is now added since it is additive is now allowed + assert "new_column" in updated_df.columns + # The change is now reflected since an additive alter could be performed + assert updated_df["id"].dropna().tolist() == ["1", "1", "1", "1"] + + context.close() + + +def test_incremental_by_unique_key_model_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_incremental_by_unique_key_model_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key id, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_incremental_unmanaged_model_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_incremental_unmanaged_model_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_UNMANAGED( + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 2 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_scd_type_2_by_time_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_dt as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_dt as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_scd_type_2_by_time_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_dt as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_TIME ( + unique_key id, + updated_at_name ds, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_dt as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_scd_type_2_by_column_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [name], + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [new_column], + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_scd_type_2_by_column_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [stable], + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + 'stable' as stable, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind SCD_TYPE_2_BY_COLUMN ( + unique_key id, + columns [stable], + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'stable2' as stable, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was ignored + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_incremental_partition_ignore_destructive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + + context.close() + + +def test_incremental_partition_ignore_additive_change(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change allow, + on_additive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 'test_name' as name, + @start_ds as ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") + context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_PARTITION ( + on_destructive_change allow, + on_additive_change ignore + ), + partitioned_by [ds], + start '2023-01-01', + cron '@daily' + ); + + SELECT + *, + 1 as id, + 3 as new_column, + @start_ds as ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + + assert len(updated_df) == 1 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.run() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "source_id" in initial_df.columns + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not still in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + + context.close() + + +def test_incremental_by_time_model_ignore_destructive_change_unit_test(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + test_dir = tmp_path / "tests" + test_dir.mkdir() + test_filepath = test_dir / "test_test_model.yaml" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + name, + ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + initial_test = f""" + +test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + name: 'test_name' + ds: '2025-01-01' + outputs: + query: + - id: 1 + name: 'test_name' + ds: '2025-01-01' +""" + + # Write initial test + test_filepath.write_text(initial_test) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute( + "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" + ) + context.engine_adapter.execute( + "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" + ) + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + new_column, + ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + updated_test = f""" + + test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + new_column: 3 + ds: '2025-01-01' + outputs: + query: + - id: 1 + new_column: 3 + ds: '2025-01-01' + """ + + # Write initial test + test_filepath.write_text(updated_test) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 1 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") + context.run() + test_result = context.test() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still in table since destructive was ignored + assert "name" in updated_df.columns + # new_column is added since it is additive and allowed + assert "new_column" in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + +def test_incremental_by_time_model_ignore_additive_change_unit_test(tmp_path: Path): + models_dir = tmp_path / "models" + models_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + data_filepath = data_dir / "test.duckdb" + test_dir = tmp_path / "tests" + test_dir.mkdir() + test_filepath = test_dir / "test_test_model.yaml" + + config = Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(database=str(data_filepath)), + ) + + # Initial model with 3 columns + initial_model = f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + name, + ds + FROM + source_table; + """ + + # Write initial model + (models_dir / "test_model.sql").write_text(initial_model) + + initial_test = f""" + +test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + name: 'test_name' + ds: '2025-01-01' + outputs: + query: + - id: 1 + name: 'test_name' + ds: '2025-01-01' +""" + + # Write initial test + test_filepath.write_text(initial_test) + + with time_machine.travel("2023-01-08 00:00:00 UTC"): + # Create context and apply initial model + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute( + "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" + ) + context.engine_adapter.execute( + "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" + ) + + # Apply initial plan and load data + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify initial data was loaded + initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(initial_df) == 1 + assert "id" in initial_df.columns + assert "name" in initial_df.columns + assert "ds" in initial_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + # remove `name` column and add new column + initial_model = """ + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds, + forward_only true, + on_destructive_change allow, + on_additive_change ignore + ), + start '2023-01-01', + cron '@daily' + ); + + SELECT + id, + new_column, + ds + FROM + source_table; + """ + (models_dir / "test_model.sql").write_text(initial_model) + + # `new_column` is in the output since unit tests are based on the model definition that currently + # exists and doesn't take into account the historical changes to the table. Therefore `new_column` is + # not actually in the table but it is represented in the test + updated_test = f""" + test_test_model: + model: test_model + inputs: + source_table: + - id: 1 + new_column: 3 + ds: '2025-01-01' + outputs: + query: + - id: 1 + new_column: 3 + ds: '2025-01-01' + """ + + # Write initial test + test_filepath.write_text(updated_test) + + context = Context(paths=[tmp_path], config=config) + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + test_result = context.test() + + # Verify data loading continued to work + # The existing data should still be there and new data should be loaded + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 1 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is not in table since destructive was ignored + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + with time_machine.travel("2023-01-10 00:00:00 UTC"): + context = Context(paths=[tmp_path], config=config) + context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") + context.run() + test_result = context.test() + updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') + assert len(updated_df) == 2 + assert "id" in updated_df.columns + assert "ds" in updated_df.columns + # name is still not in table since destructive was allowed + assert "name" not in updated_df.columns + # new_column is not added since it is additive and ignored + assert "new_column" not in updated_df.columns + assert len(test_result.successes) == 1 + assert test_result.testsRun == len(test_result.successes) + + context.close() + + +@time_machine.travel("2020-01-01 00:00:00 UTC") +def test_scd_type_2_full_restatement_no_start_date(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Initial product catalog of 3 products + raw_products = d.parse(""" + MODEL ( + name memory.store.raw_products, + kind FULL + ); + + SELECT * FROM VALUES + (101, 'Laptop Pro', 1299.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), + (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), + (103, 'Office Chair', 199.99, 'Furniture', '2020-01-01 00:00:00'::TIMESTAMP) + AS t(product_id, product_name, price, category, last_updated); + """) + + # SCD Type 2 model for product history tracking + product_history = d.parse(""" + MODEL ( + name memory.store.product_history, + kind SCD_TYPE_2_BY_TIME ( + unique_key product_id, + updated_at_name last_updated, + disable_restatement false + ), + owner catalog_team, + cron '0 */6 * * *', + grain product_id, + description 'Product catalog change history' + ); + + SELECT + product_id::INT AS product_id, + product_name::TEXT AS product_name, + price::DECIMAL(10,2) AS price, + category::TEXT AS category, + last_updated AS last_updated + FROM + memory.store.raw_products; + """) + + raw_products_model = load_sql_based_model(raw_products) + product_history_model = load_sql_based_model(product_history) + context.upsert_model(raw_products_model) + context.upsert_model(product_history_model) + + # Initial plan and apply + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + query = "SELECT product_id, product_name, price, category, last_updated, valid_from, valid_to FROM memory.store.product_history ORDER BY product_id, valid_from" + initial_data = context.engine_adapter.fetchdf(query) + + # Validate initial state of 3 products all active + assert len(initial_data) == 3 + assert initial_data["valid_to"].isna().all() + initial_product_names = set(initial_data["product_name"].tolist()) + assert initial_product_names == {"Laptop Pro", "Wireless Mouse", "Office Chair"} + + # Price update and category change + with time_machine.travel("2020-01-15 12:00:00 UTC"): + raw_products_v2 = d.parse(""" + MODEL ( + name memory.store.raw_products, + kind FULL + ); + + SELECT * FROM VALUES + (101, 'Laptop Pro', 1199.99, 'Electronics', '2020-01-15 00:00:00'::TIMESTAMP), + (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), + (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP) + AS t(product_id, product_name, price, category, last_updated); + """) + raw_products_v2_model = load_sql_based_model(raw_products_v2) + context.upsert_model(raw_products_v2_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + context.run() + + data_after_first_change = context.engine_adapter.fetchdf(query) + + # Should have 5 records (3 original closed, 2 new activε, 1 unchanged) + assert len(data_after_first_change) == 5 + + # Second change + with time_machine.travel("2020-02-01 10:00:00 UTC"): + raw_products_v3 = d.parse(""" + MODEL ( + name memory.store.raw_products, + kind FULL + ); + + SELECT * FROM VALUES + (101, 'Laptop Pro Max', 1399.99, 'Electronics', '2020-02-01 00:00:00'::TIMESTAMP), + (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP), + (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP) + AS t(product_id, product_name, price, category, last_updated); + """) + raw_products_v3_model = load_sql_based_model(raw_products_v3) + context.upsert_model(raw_products_v3_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + context.run() + data_after_second_change = context.engine_adapter.fetchdf(query) + assert len(data_after_second_change) == 6 + + # Store the current state before full restatement + data_before_full_restatement = data_after_second_change.copy() + + # Perform full restatement (no start date provided) + with time_machine.travel("2020-02-01 15:00:00 UTC"): + plan = context.plan_builder( + "prod", skip_tests=True, restate_models=["memory.store.product_history"] + ).build() + context.apply(plan) + data_after_full_restatement = context.engine_adapter.fetchdf(query) + assert len(data_after_full_restatement) == 3 + + # Check that all currently active products before restatement are still active after restatement + active_before = data_before_full_restatement[ + data_before_full_restatement["valid_to"].isna() + ] + active_after = data_after_full_restatement + assert set(active_before["product_id"]) == set(active_after["product_id"]) + + expected_products = { + 101: { + "product_name": "Laptop Pro Max", + "price": 1399.99, + "category": "Electronics", + "last_updated": "2020-02-01", + }, + 102: { + "product_name": "Wireless Mouse", + "price": 49.99, + "category": "Electronics", + "last_updated": "2020-01-01", + }, + 103: { + "product_name": "Ergonomic Office Chair", + "price": 229.99, + "category": "Office Furniture", + "last_updated": "2020-01-15", + }, + } + for _, row in data_after_full_restatement.iterrows(): + pid = row["product_id"] + assert pid in expected_products + expected = expected_products[pid] + assert row["product_name"] == expected["product_name"] + assert float(row["price"]) == expected["price"] + assert row["category"] == expected["category"] + + # valid_from should be the epoch, valid_to should be NaT + assert str(row["valid_from"]) == "1970-01-01 00:00:00" + assert pd.isna(row["valid_to"]) + + +def test_plan_evaluator_correlation_id(tmp_path: Path): + def _correlation_id_in_sqls(correlation_id: CorrelationId, mock_logger): + sqls = [call[0][0] for call in mock_logger.call_args_list] + return any(f"/* {correlation_id} */" in sql for sql in sqls) + + ctx = Context(paths=[tmp_path], config=Config()) + + # Case: Ensure that the correlation id (plan_id) is included in the SQL for each plan + for i in range(2): + create_temp_file( + tmp_path, + Path("models", "test.sql"), + f"MODEL (name test.a, kind FULL); SELECT {i} AS col", + ) + + with mock.patch("sqlmesh.core.engine_adapter.base.EngineAdapter._log_sql") as mock_logger: + ctx.load() + plan = ctx.plan(auto_apply=True, no_prompts=True) + + correlation_id = CorrelationId.from_plan_id(plan.plan_id) + assert str(correlation_id) == f"SQLMESH_PLAN: {plan.plan_id}" + + assert _correlation_id_in_sqls(correlation_id, mock_logger) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_scd_type_2_regular_run_with_offset(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + raw_employee_status = d.parse(""" + MODEL ( + name memory.hr_system.raw_employee_status, + kind FULL + ); + + SELECT + 1001 AS employee_id, + 'engineering' AS department, + 'EMEA' AS region, + '2023-01-08 15:00:00 UTC' AS last_modified; + """) + + employee_history = d.parse(""" + MODEL ( + name memory.hr_system.employee_history, + kind SCD_TYPE_2_BY_TIME ( + unique_key employee_id, + updated_at_name last_modified, + disable_restatement false + ), + owner hr_analytics, + cron '0 7 * * *', + grain employee_id, + description 'Historical tracking of employee status changes' + ); + + SELECT + employee_id::INT AS employee_id, + department::TEXT AS department, + region::TEXT AS region, + last_modified AS last_modified + FROM + memory.hr_system.raw_employee_status; + """) + + raw_employee_status_model = load_sql_based_model(raw_employee_status) + employee_history_model = load_sql_based_model(employee_history) + context.upsert_model(raw_employee_status_model) + context.upsert_model(employee_history_model) + + # Initial plan and apply + plan = context.plan_builder("prod", skip_tests=True).build() + context.apply(plan) + + query = "SELECT employee_id, department, region, valid_from, valid_to FROM memory.hr_system.employee_history ORDER BY employee_id, valid_from" + initial_data = context.engine_adapter.fetchdf(query) + + assert len(initial_data) == 1 + assert initial_data["valid_to"].isna().all() + assert initial_data["department"].tolist() == ["engineering"] + assert initial_data["region"].tolist() == ["EMEA"] + + # Apply a future plan with source changes a few hours before the cron time of the SCD Type 2 model BUT on the same day + with time_machine.travel("2023-01-09 00:10:00 UTC"): + raw_employee_status_v2 = d.parse(""" + MODEL ( + name memory.hr_system.raw_employee_status, + kind FULL + ); + + SELECT + 1001 AS employee_id, + 'engineering' AS department, + 'AMER' AS region, + '2023-01-09 00:10:00 UTC' AS last_modified; + """) + raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) + context.upsert_model(raw_employee_status_v2_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + + # The 7th hour of the day the run is kicked off for the SCD Type 2 model + with time_machine.travel("2023-01-09 07:00:01 UTC"): + context.run() + data_after_change = context.engine_adapter.fetchdf(query) + + # Validate the SCD2 records for employee 1001 + assert len(data_after_change) == 2 + assert data_after_change.iloc[0]["employee_id"] == 1001 + assert data_after_change.iloc[0]["department"] == "engineering" + assert data_after_change.iloc[0]["region"] == "EMEA" + assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" + assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" + assert data_after_change.iloc[1]["employee_id"] == 1001 + assert data_after_change.iloc[1]["department"] == "engineering" + assert data_after_change.iloc[1]["region"] == "AMER" + assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" + assert pd.isna(data_after_change.iloc[1]["valid_to"]) + + # Update source model again a bit later on the same day + raw_employee_status_v2 = d.parse(""" + MODEL ( + name memory.hr_system.raw_employee_status, + kind FULL + ); + + SELECT + 1001 AS employee_id, + 'sales' AS department, + 'ANZ' AS region, + '2023-01-09 07:26:00 UTC' AS last_modified; + """) + raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) + context.upsert_model(raw_employee_status_v2_model) + context.plan( + auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() + ) + + # A day later the run is kicked off for the SCD Type 2 model again + with time_machine.travel("2023-01-10 07:00:00 UTC"): + context.run() + data_after_change = context.engine_adapter.fetchdf(query) + + # Validate the SCD2 history for employee 1001 after second change with the historical records intact + assert len(data_after_change) == 3 + assert data_after_change.iloc[0]["employee_id"] == 1001 + assert data_after_change.iloc[0]["department"] == "engineering" + assert data_after_change.iloc[0]["region"] == "EMEA" + assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" + assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" + assert data_after_change.iloc[1]["employee_id"] == 1001 + assert data_after_change.iloc[1]["department"] == "engineering" + assert data_after_change.iloc[1]["region"] == "AMER" + assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" + assert str(data_after_change.iloc[1]["valid_to"]) == "2023-01-09 07:26:00" + assert data_after_change.iloc[2]["employee_id"] == 1001 + assert data_after_change.iloc[2]["department"] == "sales" + assert data_after_change.iloc[2]["region"] == "ANZ" + assert str(data_after_change.iloc[2]["valid_from"]) == "2023-01-09 07:26:00" + assert pd.isna(data_after_change.iloc[2]["valid_to"]) + + # Now test restatement works (full restatement support currently) + with time_machine.travel("2023-01-10 07:38:00 UTC"): + plan = context.plan_builder( + "prod", + skip_tests=True, + restate_models=["memory.hr_system.employee_history"], + start="2023-01-09 00:10:00", + ).build() + context.apply(plan) + restated_data = context.engine_adapter.fetchdf(query) + + # Validate the SCD2 history after restatement has been wiped bar one + assert len(restated_data) == 1 + assert restated_data.iloc[0]["employee_id"] == 1001 + assert restated_data.iloc[0]["department"] == "sales" + assert restated_data.iloc[0]["region"] == "ANZ" + assert str(restated_data.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" + assert pd.isna(restated_data.iloc[0]["valid_to"]) + + +def test_seed_model_metadata_update_does_not_trigger_backfill(tmp_path: Path): + """ + Scenario: + - Create a seed model; perform initial population + - Modify the model with a metadata-only change and trigger a plan + + Outcome: + - The seed model is modified (metadata-only) but this should NOT trigger backfill + - There should be no missing_intervals on the plan to backfill + """ + + models_path = tmp_path / "models" + seeds_path = tmp_path / "seeds" + models_path.mkdir() + seeds_path.mkdir() + + seed_model_path = models_path / "seed.sql" + seed_path = seeds_path / "seed_data.csv" + + seed_path.write_text("\n".join(["id,name", "1,test"])) + + seed_model_path.write_text(""" + MODEL ( + name test.source_data, + kind SEED ( + path '../seeds/seed_data.csv' + ) + ); + """) + + config = Config( + gateways={"": GatewayConfig(connection=DuckDBConnectionConfig())}, + model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), + ) + ctx = Context(paths=tmp_path, config=config) + + plan = ctx.plan(auto_apply=True) + + original_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] + assert plan.directly_modified == {original_seed_snapshot.snapshot_id} + assert plan.metadata_updated == set() + assert plan.missing_intervals + + # prove data loaded + assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [ + (1, "test") + ] + + # prove no diff + ctx.load() + plan = ctx.plan(auto_apply=True) + assert not plan.has_changes + assert not plan.missing_intervals + + # make metadata-only change + seed_model_path.write_text(""" + MODEL ( + name test.source_data, + kind SEED ( + path '../seeds/seed_data.csv' + ), + description 'updated by test' + ); + """) + + ctx.load() + plan = ctx.plan(auto_apply=True) + assert plan.has_changes + + new_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] + assert ( + new_seed_snapshot.version == original_seed_snapshot.version + ) # should be using the same physical table + assert ( + new_seed_snapshot.snapshot_id != original_seed_snapshot.snapshot_id + ) # but still be different due to the metadata change + assert plan.directly_modified == set() + assert plan.metadata_updated == {new_seed_snapshot.snapshot_id} + + # there should be no missing intervals to backfill since all we did is update a description + assert not plan.missing_intervals + + # there should still be no diff or missing intervals in 3 days time + assert new_seed_snapshot.model.interval_unit.is_day + with time_machine.travel(timedelta(days=3)): + ctx.clear_caches() + ctx.load() + plan = ctx.plan(auto_apply=True) + assert not plan.has_changes + assert not plan.missing_intervals + + # change seed data + seed_path.write_text("\n".join(["id,name", "1,test", "2,updated"])) + + # new plan - NOW we should backfill because data changed + ctx.load() + plan = ctx.plan(auto_apply=True) + assert plan.has_changes + + updated_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"'] + + assert ( + updated_seed_snapshot.snapshot_id + != new_seed_snapshot.snapshot_id + != original_seed_snapshot.snapshot_id + ) + assert not updated_seed_snapshot.forward_only + assert plan.directly_modified == {updated_seed_snapshot.snapshot_id} + assert plan.metadata_updated == set() + assert plan.missing_intervals + + # prove backfilled data loaded + assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [ + (1, "test"), + (2, "updated"), + ] + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_seed_model_promote_to_prod_after_dev( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with open(context.path / "seeds" / "waiter_names.csv", "a") as f: + f.write("\n10,New Waiter") + + context.load() + + waiter_names_snapshot = context.get_snapshot("sushi.waiter_names") + plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + assert waiter_names_snapshot.snapshot_id in plan.directly_modified + + # Trigger a metadata change to reuse the previous version + waiter_names_model = waiter_names_snapshot.model.copy( + update={"description": "Updated description"} + ) + context.upsert_model(waiter_names_model) + context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) + + # Promote all changes to prod + waiter_names_snapshot = context.get_snapshot("sushi.waiter_names") + plan = context.plan_builder("prod", skip_tests=True).build() + # Clear the cache to source the dehydrated model instance from the state + context.clear_caches() + context.apply(plan) + + assert ( + context.engine_adapter.fetchone("SELECT COUNT(*) FROM sushi.waiter_names WHERE id = 10")[0] + == 1 + ) diff --git a/tests/core/integration/test_multi_repo.py b/tests/core/integration/test_multi_repo.py new file mode 100644 index 0000000000..4d72d137b3 --- /dev/null +++ b/tests/core/integration/test_multi_repo.py @@ -0,0 +1,561 @@ +from __future__ import annotations + +from unittest.mock import patch +from textwrap import dedent +import os +import pytest +from pathlib import Path +from sqlmesh.core.console import ( + get_console, +) +from sqlmesh.core.config.naming import NameInferenceConfig +from sqlmesh.core.model.common import ParsableSql +from sqlmesh.utils.concurrency import NodeExecutionFailedError + +from sqlmesh.core import constants as c +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.console import get_console +from sqlmesh.core.context import Context +from sqlmesh.utils.date import now +from tests.conftest import DuckDBMetadata +from tests.utils.test_helpers import use_terminal_console +from tests.core.integration.utils import validate_apply_basics + + +pytestmark = pytest.mark.slow + + +@use_terminal_console +def test_multi(mocker): + context = Context(paths=["examples/multi/repo_1", "examples/multi/repo_2"], gateway="memory") + + with patch.object(get_console(), "log_warning") as mock_logger: + context.plan_builder(environment="dev") + warnings = mock_logger.call_args[0][0] + repo1_path, repo2_path = context.configs.keys() + assert f"Linter warnings for {repo1_path}" in warnings + assert f"Linter warnings for {repo2_path}" not in warnings + + assert ( + context.render("bronze.a").sql() + == '''SELECT 1 AS "col_a", 'b' AS "col_b", 1 AS "one", 'repo_1' AS "dup"''' + ) + assert ( + context.render("silver.d").sql() + == '''SELECT "c"."col_a" AS "col_a", 2 AS "two", 'repo_2' AS "dup" FROM "memory"."silver"."c" AS "c"''' + ) + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 5 + context.apply(plan) + + # Ensure before_all, after_all statements for multiple repos have executed + environment_statements = context.state_reader.get_environment_statements(c.PROD) + assert len(environment_statements) == 2 + assert context.fetchdf("select * from before_1").to_dict()["1"][0] == 1 + assert context.fetchdf("select * from before_2").to_dict()["2"][0] == 2 + assert context.fetchdf("select * from after_1").to_dict()["repo_1"][0] == "repo_1" + assert context.fetchdf("select * from after_2").to_dict()["repo_2"][0] == "repo_2" + + old_context = context + context = Context( + paths=["examples/multi/repo_1"], + state_sync=old_context.state_sync, + gateway="memory", + ) + context._engine_adapter = old_context.engine_adapter + del context.engine_adapters + + model = context.get_model("bronze.a") + assert model.project == "repo_1" + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql(sql=model.query.select("'c' AS c").sql(dialect=model.dialect)) + } + ) + ) + plan = context.plan_builder().build() + + assert set(snapshot.name for snapshot in plan.directly_modified) == { + '"memory"."bronze"."a"', + '"memory"."bronze"."b"', + '"memory"."silver"."e"', + } + assert sorted([x.name for x in list(plan.indirectly_modified.values())[0]]) == [ + '"memory"."silver"."c"', + '"memory"."silver"."d"', + ] + assert len(plan.missing_intervals) == 3 + context.apply(plan) + validate_apply_basics(context, c.PROD, plan.snapshots.values()) + + # Ensure that before_all and after_all statements of both repos are there despite planning with repo_1 + environment_statements = context.state_reader.get_environment_statements(c.PROD) + assert len(environment_statements) == 2 + + # Ensure that environment statements have the project field set correctly + sorted_env_statements = sorted(environment_statements, key=lambda es: es.project) + assert sorted_env_statements[0].project == "repo_1" + assert sorted_env_statements[1].project == "repo_2" + + # Assert before_all and after_all for each project + assert sorted_env_statements[0].before_all == [ + "CREATE TABLE IF NOT EXISTS before_1 AS select @one()" + ] + assert sorted_env_statements[0].after_all == [ + "CREATE TABLE IF NOT EXISTS after_1 AS select @dup()" + ] + assert sorted_env_statements[1].before_all == [ + "CREATE TABLE IF NOT EXISTS before_2 AS select @two()" + ] + assert sorted_env_statements[1].after_all == [ + "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" + ] + + +@use_terminal_console +def test_multi_repo_single_project_environment_statements_update(copy_to_temp_path): + paths = copy_to_temp_path("examples/multi") + repo_1_path = f"{paths[0]}/repo_1" + repo_2_path = f"{paths[0]}/repo_2" + + context = Context(paths=[repo_1_path, repo_2_path], gateway="memory") + context._new_state_sync().reset(default_catalog=context.default_catalog) + + initial_plan = context.plan_builder().build() + context.apply(initial_plan) + + # Get initial statements + initial_statements = context.state_reader.get_environment_statements(c.PROD) + assert len(initial_statements) == 2 + + # Modify repo_1's config to add a new before_all statement + repo_1_config_path = f"{repo_1_path}/config.yaml" + with open(repo_1_config_path, "r") as f: + config_content = f.read() + + # Add a new before_all statement to repo_1 only + modified_config = config_content.replace( + "CREATE TABLE IF NOT EXISTS before_1 AS select @one()", + "CREATE TABLE IF NOT EXISTS before_1 AS select @one()\n - CREATE TABLE IF NOT EXISTS before_1_modified AS select 999", + ) + + with open(repo_1_config_path, "w") as f: + f.write(modified_config) + + # Create new context with modified config but only for repo_1 + context_repo_1_only = Context( + paths=[repo_1_path], state_sync=context.state_sync, gateway="memory" + ) + + # Plan with only repo_1, this should preserve repo_2's statements from state + repo_1_plan = context_repo_1_only.plan_builder(environment="dev").build() + context_repo_1_only.apply(repo_1_plan) + updated_statements = context_repo_1_only.state_reader.get_environment_statements("dev") + + # Should still have statements from both projects + assert len(updated_statements) == 2 + + # Sort by project + sorted_updated = sorted(updated_statements, key=lambda es: es.project or "") + + # Verify repo_1 has the new statement + repo_1_updated = sorted_updated[0] + assert repo_1_updated.project == "repo_1" + assert len(repo_1_updated.before_all) == 2 + assert "CREATE TABLE IF NOT EXISTS before_1_modified" in repo_1_updated.before_all[1] + + # Verify repo_2 statements are preserved from state + repo_2_preserved = sorted_updated[1] + assert repo_2_preserved.project == "repo_2" + assert len(repo_2_preserved.before_all) == 1 + assert "CREATE TABLE IF NOT EXISTS before_2" in repo_2_preserved.before_all[0] + assert "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" in repo_2_preserved.after_all[0] + + +@use_terminal_console +def test_multi_virtual_layer(copy_to_temp_path): + paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") + path = Path(paths[0]) + first_db_path = str(path / "db_1.db") + second_db_path = str(path / "db_2.db") + + config = Config( + gateways={ + "first": GatewayConfig( + connection=DuckDBConnectionConfig(database=first_db_path), + variables={"overriden_var": "gateway_1"}, + ), + "second": GatewayConfig( + connection=DuckDBConnectionConfig(database=second_db_path), + variables={"overriden_var": "gateway_2"}, + ), + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + model_naming=NameInferenceConfig(infer_names=True), + default_gateway="first", + gateway_managed_virtual_layer=True, + variables={"overriden_var": "global", "global_one": 88}, + ) + + context = Context(paths=paths, config=config) + assert context.default_catalog_per_gateway == {"first": "db_1", "second": "db_2"} + assert len(context.engine_adapters) == 2 + + # For the model without gateway the default should be used and the gateway variable should overide the global + assert ( + context.render("first_schema.model_one").sql() + == 'SELECT \'gateway_1\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' + ) + + # For model with gateway specified the appropriate variable should be used to overide + assert ( + context.render("db_2.second_schema.model_one").sql() + == 'SELECT \'gateway_2\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' + ) + + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 4 + context.apply(plan) + + # Validate the tables that source from the first tables are correct as well with evaluate + assert ( + context.evaluate( + "first_schema.model_two", start=now(), end=now(), execution_time=now() + ).to_string() + == " item_id global_one\n0 gateway_1 88" + ) + assert ( + context.evaluate( + "db_2.second_schema.model_two", start=now(), end=now(), execution_time=now() + ).to_string() + == " item_id global_one\n0 gateway_2 88" + ) + + assert sorted(set(snapshot.name for snapshot in plan.directly_modified)) == [ + '"db_1"."first_schema"."model_one"', + '"db_1"."first_schema"."model_two"', + '"db_2"."second_schema"."model_one"', + '"db_2"."second_schema"."model_two"', + ] + + model = context.get_model("db_1.first_schema.model_one") + + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder().build() + context.apply(plan) + + state_environments = context.state_reader.get_environments() + state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) + + assert state_environments[0].gateway_managed + assert len(state_snapshots) == len(state_environments[0].snapshots) + assert [snapshot.name for snapshot in plan.directly_modified] == [ + '"db_1"."first_schema"."model_one"' + ] + assert [x.name for x in list(plan.indirectly_modified.values())[0]] == [ + '"db_1"."first_schema"."model_two"' + ] + + assert len(plan.missing_intervals) == 1 + assert ( + context.evaluate( + "db_1.first_schema.model_one", start=now(), end=now(), execution_time=now() + ).to_string() + == " item_id global_one macro_one extra\n0 gateway_1 88 1 c" + ) + + # Create dev environment with changed models + model = context.get_model("db_2.second_schema.model_one") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) + ) + } + ) + ) + model = context.get_model("first_schema.model_two") + context.upsert_model( + model.copy( + update={ + "query_": ParsableSql( + sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) + ) + } + ) + ) + plan = context.plan_builder("dev").build() + context.apply(plan) + + dev_environment = context.state_sync.get_environment("dev") + assert dev_environment is not None + + metadata_engine_1 = DuckDBMetadata.from_context(context) + start_schemas_1 = set(metadata_engine_1.schemas) + assert sorted(start_schemas_1) == sorted( + {"first_schema__dev", "sqlmesh", "first_schema", "sqlmesh__first_schema"} + ) + + metadata_engine_2 = DuckDBMetadata(context._get_engine_adapter("second")) + start_schemas_2 = set(metadata_engine_2.schemas) + assert sorted(start_schemas_2) == sorted( + {"sqlmesh__second_schema", "second_schema", "second_schema__dev"} + ) + + # Invalidate dev environment + context.invalidate_environment("dev") + invalidate_environment = context.state_sync.get_environment("dev") + assert invalidate_environment is not None + assert invalidate_environment.expiration_ts < dev_environment.expiration_ts # type: ignore + assert sorted(start_schemas_1) == sorted(set(metadata_engine_1.schemas)) + assert sorted(start_schemas_2) == sorted(set(metadata_engine_2.schemas)) + + # Run janitor + context._run_janitor() + assert context.state_sync.get_environment("dev") is None + removed_schemas = start_schemas_1 - set(metadata_engine_1.schemas) + assert removed_schemas == {"first_schema__dev"} + removed_schemas = start_schemas_2 - set(metadata_engine_2.schemas) + assert removed_schemas == {"second_schema__dev"} + prod_environment = context.state_sync.get_environment("prod") + + # Remove the second gateway's second model and apply plan + second_model = path / "models/second_schema/model_two.sql" + os.remove(second_model) + assert not second_model.exists() + context = Context(paths=paths, config=config) + plan = context.plan_builder().build() + context.apply(plan) + prod_environment = context.state_sync.get_environment("prod") + assert len(prod_environment.snapshots_) == 3 + + # Changing the flag should show a diff + context.config.gateway_managed_virtual_layer = False + plan = context.plan_builder().build() + assert not plan.requires_backfill + assert ( + plan.context_diff.previous_gateway_managed_virtual_layer + != plan.context_diff.gateway_managed_virtual_layer + ) + assert plan.context_diff.has_changes + + # This should error since the default_gateway won't have access to create the view on a non-shared catalog + with pytest.raises(NodeExecutionFailedError, match=r"Execution failed for node SnapshotId*"): + context.apply(plan) + + +def test_multi_dbt(mocker): + context = Context(paths=["examples/multi_dbt/bronze", "examples/multi_dbt/silver"]) + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 4 + context.apply(plan) + validate_apply_basics(context, c.PROD, plan.snapshots.values()) + + environment_statements = context.state_sync.get_environment_statements(c.PROD) + assert len(environment_statements) == 2 + bronze_statements = environment_statements[0] + assert bronze_statements.before_all == [ + "JINJA_STATEMENT_BEGIN;\nCREATE TABLE IF NOT EXISTS analytic_stats (physical_table VARCHAR, evaluation_time VARCHAR);\nJINJA_END;" + ] + assert not bronze_statements.after_all + silver_statements = environment_statements[1] + assert not silver_statements.before_all + assert silver_statements.after_all == [ + "JINJA_STATEMENT_BEGIN;\n{{ store_schemas(schemas) }}\nJINJA_END;" + ] + assert "store_schemas" in silver_statements.jinja_macros.root_macros + analytics_table = context.fetchdf("select * from analytic_stats;") + assert sorted(analytics_table.columns) == sorted(["physical_table", "evaluation_time"]) + schema_table = context.fetchdf("select * from schema_table;") + assert sorted(schema_table.all_schemas[0]) == sorted(["bronze", "silver"]) + + +def test_multi_hybrid(mocker): + context = Context( + paths=["examples/multi_hybrid/dbt_repo", "examples/multi_hybrid/sqlmesh_repo"] + ) + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + + assert len(plan.new_snapshots) == 5 + assert context.dag.roots == {'"memory"."dbt_repo"."e"'} + assert context.dag.graph['"memory"."dbt_repo"."c"'] == {'"memory"."sqlmesh_repo"."b"'} + assert context.dag.graph['"memory"."sqlmesh_repo"."b"'] == {'"memory"."sqlmesh_repo"."a"'} + assert context.dag.graph['"memory"."sqlmesh_repo"."a"'] == {'"memory"."dbt_repo"."e"'} + assert context.dag.downstream('"memory"."dbt_repo"."e"') == [ + '"memory"."sqlmesh_repo"."a"', + '"memory"."sqlmesh_repo"."b"', + '"memory"."dbt_repo"."c"', + '"memory"."dbt_repo"."d"', + ] + + sqlmesh_model_a = context.get_model("sqlmesh_repo.a") + dbt_model_c = context.get_model("dbt_repo.c") + assert sqlmesh_model_a.project == "sqlmesh_repo" + + sqlmesh_rendered = ( + 'SELECT "e"."col_a" AS "col_a", "e"."col_b" AS "col_b" FROM "memory"."dbt_repo"."e" AS "e"' + ) + dbt_rendered = 'SELECT DISTINCT ROUND(CAST(("b"."col_a" / NULLIF(100, 0)) AS DECIMAL(16, 2)), 2) AS "rounded_col_a" FROM "memory"."sqlmesh_repo"."b" AS "b"' + assert sqlmesh_model_a.render_query().sql() == sqlmesh_rendered + assert dbt_model_c.render_query().sql() == dbt_rendered + + context.apply(plan) + validate_apply_basics(context, c.PROD, plan.snapshots.values()) + + +def test_multi_repo_no_project_to_project(copy_to_temp_path): + paths = copy_to_temp_path("examples/multi") + repo_1_path = f"{paths[0]}/repo_1" + repo_1_config_path = f"{repo_1_path}/config.yaml" + with open(repo_1_config_path, "r") as f: + config_content = f.read() + with open(repo_1_config_path, "w") as f: + f.write(config_content.replace("project: repo_1\n", "")) + + context = Context(paths=[repo_1_path], gateway="memory") + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + context.apply(plan) + + # initially models in prod have no project + prod_snapshots = context.state_reader.get_snapshots( + context.state_reader.get_environment(c.PROD).snapshots + ) + for snapshot in prod_snapshots.values(): + assert snapshot.node.project == "" + + # we now adopt multi project by adding a project name + with open(repo_1_config_path, "r") as f: + config_content = f.read() + with open(repo_1_config_path, "w") as f: + f.write("project: repo_1\n" + config_content) + + context_with_project = Context( + paths=[repo_1_path], + state_sync=context.state_sync, + gateway="memory", + ) + context_with_project._engine_adapter = context.engine_adapter + del context_with_project.engine_adapters + + # local models should take precedence to pick up the new project name + local_model_a = context_with_project.get_model("bronze.a") + assert local_model_a.project == "repo_1" + local_model_b = context_with_project.get_model("bronze.b") + assert local_model_b.project == "repo_1" + + # also verify the plan works + plan = context_with_project.plan_builder().build() + context_with_project.apply(plan) + validate_apply_basics(context_with_project, c.PROD, plan.snapshots.values()) + + +def test_multi_repo_local_model_overrides_prod_from_other_project(copy_to_temp_path): + paths = copy_to_temp_path("examples/multi") + repo_1_path = f"{paths[0]}/repo_1" + repo_2_path = f"{paths[0]}/repo_2" + + context = Context(paths=[repo_1_path, repo_2_path], gateway="memory") + context._new_state_sync().reset(default_catalog=context.default_catalog) + plan = context.plan_builder().build() + assert len(plan.new_snapshots) == 5 + context.apply(plan) + + prod_model_c = context.get_model("silver.c") + assert prod_model_c.project == "repo_2" + + with open(f"{repo_1_path}/models/c.sql", "w") as f: + f.write( + dedent("""\ + MODEL ( + name silver.c, + kind FULL + ); + + SELECT DISTINCT col_a, col_b + FROM bronze.a + """) + ) + + # silver.c exists locally in repo 1 now AND in prod under repo_2 + context_repo1 = Context( + paths=[repo_1_path], + state_sync=context.state_sync, + gateway="memory", + ) + context_repo1._engine_adapter = context.engine_adapter + del context_repo1.engine_adapters + + # local model should take precedence and its project should reflect the new project name + local_model_c = context_repo1.get_model("silver.c") + assert local_model_c.project == "repo_1" + + rendered = context_repo1.render("silver.c").sql() + assert "col_b" in rendered + + # its downstream dependencies though should still be picked up + plan = context_repo1.plan_builder().build() + directly_modified_names = {snapshot.name for snapshot in plan.directly_modified} + assert '"memory"."silver"."c"' in directly_modified_names + assert '"memory"."silver"."d"' in directly_modified_names + missing_interval_names = {s.snapshot_id.name for s in plan.missing_intervals} + assert '"memory"."silver"."c"' in missing_interval_names + assert '"memory"."silver"."d"' in missing_interval_names + + context_repo1.apply(plan) + validate_apply_basics(context_repo1, c.PROD, plan.snapshots.values()) + result = context_repo1.fetchdf("SELECT * FROM memory.silver.c") + assert "col_b" in result.columns + + +def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path): + paths = copy_to_temp_path("examples/multi") + repo_1_path = paths[0] / "repo_1" + repo_2_path = paths[0] / "repo_2" + + # Add an extra gateway to repo_2's config + repo_2_config_path = repo_2_path / "config.yaml" + config_content = repo_2_config_path.read_text() + + modified_config = config_content.replace( + "default_gateway: local", + dedent(""" + extra: + connection: + type: duckdb + database: extra.duckdb + + default_gateway: local + """), + ) + + repo_2_config_path.write_text(modified_config) + + # Create context with both repos but using the repo_1 path first + context = Context( + paths=(repo_1_path, repo_2_path), + gateway="memory", + ) + + # Verify all gateways from both repos are present + gathered_gateways = context.engine_adapters.keys() + expected_gateways = {"local", "memory", "extra"} + assert gathered_gateways == expected_gateways diff --git a/tests/core/integration/test_plan_options.py b/tests/core/integration/test_plan_options.py new file mode 100644 index 0000000000..a50dc145cd --- /dev/null +++ b/tests/core/integration/test_plan_options.py @@ -0,0 +1,529 @@ +from __future__ import annotations + +import typing as t +import pytest +from sqlmesh.core.console import ( + set_console, + get_console, + TerminalConsole, +) +import time_machine + +from sqlmesh.core import dialect as d +from sqlmesh.core.console import get_console +from sqlmesh.core.model import ( + SqlModel, + load_sql_based_model, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.utils.date import to_datetime, to_timestamp +from sqlmesh.utils.errors import ( + NoChangesPlanError, +) +from tests.core.integration.utils import ( + add_projection_to_model, +) + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_empty_backfill(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + plan = context.plan_builder("prod", skip_tests=True, empty_backfill=True).build() + assert plan.missing_intervals + assert plan.empty_backfill + assert not plan.requires_backfill + + context.apply(plan) + + for model in context.models.values(): + if model.is_seed or model.kind.is_symbolic: + continue + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM {model.name}")[0] + assert row_num == 0 + + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.requires_backfill + assert not plan.has_changes + assert not plan.missing_intervals + + snapshots = plan.snapshots + for snapshot in snapshots.values(): + if not snapshot.intervals: + continue + assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_empty_backfill_new_model(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + new_model = load_sql_based_model( + d.parse( + """ + MODEL ( + name memory.sushi.new_model, + kind FULL, + cron '0 8 * * *', + start '2023-01-01', + ); + + SELECT 1 AS one; + """ + ) + ) + new_model_name = context.upsert_model(new_model).fqn + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + plan = context.plan_builder("dev", skip_tests=True, empty_backfill=True).build() + assert plan.end == to_datetime("2023-01-09") + assert plan.missing_intervals + assert plan.empty_backfill + assert not plan.requires_backfill + + context.apply(plan) + + for model in context.models.values(): + if model.is_seed or model.kind.is_symbolic: + continue + row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.new_model")[ + 0 + ] + assert row_num == 0 + + plan = context.plan_builder("prod", skip_tests=True).build() + assert not plan.requires_backfill + assert not plan.missing_intervals + + snapshots = plan.snapshots + for snapshot in snapshots.values(): + if not snapshot.intervals: + continue + elif snapshot.name == new_model_name: + assert snapshot.intervals[-1][1] == to_timestamp("2023-01-09") + else: + assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_explain(init_and_plan_context: t.Callable): + old_console = get_console() + set_console(TerminalConsole()) + + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") + waiter_revenue_by_day_model = add_projection_to_model( + t.cast(SqlModel, waiter_revenue_by_day_model) + ) + context.upsert_model(waiter_revenue_by_day_model) + + waiter_revenue_by_day_snapshot = context.get_snapshot(waiter_revenue_by_day_model.name) + top_waiters_snapshot = context.get_snapshot("sushi.top_waiters") + + common_kwargs = dict(skip_tests=True, no_prompts=True, explain=True) + + # For now just making sure the plan doesn't error + context.plan("dev", **common_kwargs) + context.plan("dev", **common_kwargs, skip_backfill=True) + context.plan("dev", **common_kwargs, empty_backfill=True) + context.plan("dev", **common_kwargs, forward_only=True, enable_preview=True) + context.plan("prod", **common_kwargs) + context.plan("prod", **common_kwargs, forward_only=True) + context.plan("prod", **common_kwargs, restate_models=[waiter_revenue_by_day_model.name]) + + set_console(old_console) + + # Make sure that the now changes were actually applied + for target_env in ("dev", "prod"): + plan = context.plan_builder(target_env, skip_tests=True).build() + assert plan.has_changes + assert plan.missing_intervals + assert plan.directly_modified == {waiter_revenue_by_day_snapshot.snapshot_id} + assert len(plan.new_snapshots) == 2 + assert {s.snapshot_id for s in plan.new_snapshots} == { + waiter_revenue_by_day_snapshot.snapshot_id, + top_waiters_snapshot.snapshot_id, + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_ignore_cron( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + expressions = d.parse( + f""" + MODEL ( + name memory.sushi.test_allow_partials, + kind INCREMENTAL_UNMANAGED, + allow_partials true, + start '2023-01-01', + ); + + SELECT @end_ts AS end_ts + """ + ) + model = load_sql_based_model(expressions) + + context.upsert_model(model) + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + + assert ( + context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ + 0 + ] + == "2023-01-07 23:59:59.999999" + ) + + plan_no_ignore_cron = context.plan_builder( + "prod", run=True, ignore_cron=False, skip_tests=True + ).build() + assert not plan_no_ignore_cron.missing_intervals + + plan = context.plan_builder("prod", run=True, ignore_cron=True, skip_tests=True).build() + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot(model, raise_if_missing=True).snapshot_id, + intervals=[ + (to_timestamp("2023-01-08"), to_timestamp("2023-01-08 15:00:00")), + ], + ) + ] + context.apply(plan) + + assert ( + context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ + 0 + ] + == "2023-01-08 14:59:59.999999" + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_plan_with_run( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + plan = context.plan(run=True) + assert plan.has_changes + assert plan.missing_intervals + + context.apply(plan) + + snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) + assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { + '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), + '"memory"."sushi"."items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."latest_order"': to_timestamp("2023-01-09"), + '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), + '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-09"), + '"memory"."sushi"."marketing"': to_timestamp("2023-01-09"), + '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-09"), + '"memory"."raw"."demographics"': to_timestamp("2023-01-09"), + "assert_item_price_above_zero": to_timestamp("2023-01-09"), + '"memory"."sushi"."active_customers"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customers"': to_timestamp("2023-01-09"), + '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-09"), + '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-09"), + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_select_models(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Modify 2 models. + model = context.get_model("sushi.waiter_revenue_by_day") + kwargs = { + **model.dict(), + # Make a breaking change. + "query": model.query.order_by("waiter_id"), # type: ignore + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + model = context.get_model("sushi.customer_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + expected_intervals = [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ] + + waiter_revenue_by_day_snapshot_id = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id + + # Select one of the modified models. + plan_builder = context.plan_builder( + "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True + ) + snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] + plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) + plan = plan_builder.build() + + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert len(dev_df) == 7 + + # Make sure that we only create a view for the selected model. + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert len(schema_objects) == 1 + assert schema_objects[0].name == "waiter_revenue_by_day" + + # Validate the other modified model. + assert not context.get_snapshot("sushi.customer_revenue_by_day").change_category + assert not context.get_snapshot("sushi.customer_revenue_by_day").version + + # Validate the downstream model. + assert not context.engine_adapter.table_exists( + context.get_snapshot("sushi.top_waiters").table_name() + ) + assert not context.engine_adapter.table_exists( + context.get_snapshot("sushi.top_waiters").table_name(False) + ) + + # Make sure that tables are created when deploying to prod. + plan = context.plan("prod", skip_tests=True) + context.apply(plan) + assert context.engine_adapter.table_exists( + context.get_snapshot("sushi.top_waiters").table_name() + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_select_models_for_backfill(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + expected_intervals = [ + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ] + + plan = context.plan_builder( + "dev", backfill_models=["+*waiter_revenue_by_day"], skip_tests=True + ).build() + + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.items", raise_if_missing=True).snapshot_id, + intervals=expected_intervals, + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.order_items", raise_if_missing=True + ).snapshot_id, + intervals=expected_intervals, + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot("sushi.orders", raise_if_missing=True).snapshot_id, + intervals=expected_intervals, + ), + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + dev_df = context.engine_adapter.fetchdf( + "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" + ) + assert len(dev_df) == 1 + + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == { + "items", + "order_items", + "orders", + "waiter_revenue_by_day", + } + + assert not context.engine_adapter.table_exists( + context.get_snapshot("sushi.customer_revenue_by_day").table_name() + ) + + # Make sure that tables are created when deploying to prod. + plan = context.plan("prod") + context.apply(plan) + assert context.engine_adapter.table_exists( + context.get_snapshot("sushi.customer_revenue_by_day").table_name() + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_select_unchanged_model_for_backfill(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # Modify 2 models. + model = context.get_model("sushi.waiter_revenue_by_day") + kwargs = { + **model.dict(), + # Make a breaking change. + "query": d.parse_one( + f"{model.query.sql(dialect='duckdb')} ORDER BY waiter_id", dialect="duckdb" + ), + } + context.upsert_model(SqlModel.parse_obj(kwargs)) + + model = context.get_model("sushi.customer_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + expected_intervals = [ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ] + + waiter_revenue_by_day_snapshot_id = context.get_snapshot( + "sushi.waiter_revenue_by_day", raise_if_missing=True + ).snapshot_id + + # Select one of the modified models. + plan_builder = context.plan_builder( + "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True + ) + snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] + plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) + plan = plan_builder.build() + + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=waiter_revenue_by_day_snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + # Make sure that we only create a view for the selected model. + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == {"waiter_revenue_by_day"} + + # Now select a model downstream from the previously modified one in order to backfill it. + plan = context.plan_builder("dev", select_models=["*top_waiters"], skip_tests=True).build() + + assert not plan.has_changes + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=context.get_snapshot( + "sushi.top_waiters", raise_if_missing=True + ).snapshot_id, + intervals=expected_intervals, + ), + ] + + context.apply(plan) + + # Make sure that a view has been created for the downstream selected model. + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == {"waiter_revenue_by_day", "top_waiters"} + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_create_environment_no_changes_with_selector(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with pytest.raises(NoChangesPlanError): + context.plan_builder("dev").build() + + plan = context.plan_builder("dev", select_models=["*top_waiters"]).build() + assert not plan.missing_intervals + context.apply(plan) + + schema_objects = context.engine_adapter.get_data_objects("sushi__dev") + assert {o.name for o in schema_objects} == {"top_waiters"} + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_include_unmodified(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + plan = context.plan_builder( + "dev", + include_unmodified=True, + skip_tests=True, + ).build() + + all_snapshots = context.snapshots + + assert len(plan.environment.snapshots) == len(all_snapshots) + assert plan.environment.promoted_snapshot_ids is None + + context.apply(plan) + + data_objs = context.engine_adapter.get_data_objects("sushi__dev") + assert len(data_objs) == len( + [s for s in all_snapshots.values() if s.is_model and not s.is_symbolic] + ) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_select_models_with_include_unmodified(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + plan = context.plan_builder( + "dev", + select_models=["*top_waiters", "*customer_revenue_by_day"], + include_unmodified=True, + skip_tests=True, + ).build() + + assert len(plan.environment.snapshots) == len(context.snapshots) + + promoted_set = {s_id.name for s_id in plan.environment.promoted_snapshot_ids} + assert promoted_set == { + '"memory"."sushi"."customer_revenue_by_day"', + '"memory"."sushi"."top_waiters"', + } + + context.apply(plan) + + data_objs = context.engine_adapter.get_data_objects("sushi__dev") + assert len(data_objs) == 2 + assert {o.name for o in data_objs} == {"customer_revenue_by_day", "top_waiters"} diff --git a/tests/core/integration/test_restatement.py b/tests/core/integration/test_restatement.py new file mode 100644 index 0000000000..3694efce31 --- /dev/null +++ b/tests/core/integration/test_restatement.py @@ -0,0 +1,1935 @@ +from __future__ import annotations + +import typing as t +import pandas as pd # noqa: TID253 +import pytest +from pathlib import Path +from sqlmesh.core.console import ( + MarkdownConsole, + set_console, + get_console, + CaptureTerminalConsole, +) +import time_machine +from sqlglot import exp +import re +from concurrent.futures import ThreadPoolExecutor, TimeoutError +import time +import queue + +from sqlmesh.core import constants as c +from sqlmesh.core.config import ( + Config, + GatewayConfig, + ModelDefaultsConfig, + DuckDBConnectionConfig, +) +from sqlmesh.core.context import Context +from sqlmesh.core.model import ( + IncrementalByTimeRangeKind, + IncrementalUnmanagedKind, + SqlModel, +) +from sqlmesh.core.plan import SnapshotIntervals +from sqlmesh.core.snapshot import ( + Snapshot, + SnapshotId, +) +from sqlmesh.utils.date import to_timestamp +from sqlmesh.utils.errors import ( + ConflictingPlanError, +) +from tests.core.integration.utils import add_projection_to_model + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_plan_ignores_changes(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + restated_snapshot = context.get_snapshot("sushi.top_waiters") + + # Simulate a change. + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + plan = context.plan_builder(restate_models=["sushi.top_waiters"]).build() + assert plan.snapshots != context.snapshots + + assert not plan.directly_modified + assert not plan.has_changes + assert not plan.new_snapshots + assert plan.requires_backfill + assert plan.restatements == { + restated_snapshot.snapshot_id: (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) + } + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=restated_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ) + ] + + context.apply(plan) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_plan_across_environments_snapshot_with_shared_version( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + # Change kind to incremental unmanaged + model = context.get_model("sushi.waiter_revenue_by_day") + previous_kind = model.kind.copy(update={"forward_only": True}) + assert isinstance(previous_kind, IncrementalByTimeRangeKind) + + model = model.copy( + update={ + "kind": IncrementalUnmanagedKind(), + "physical_version": "pinned_version_12345", + "partitioned_by_": [exp.column("event_date")], + } + ) + context.upsert_model(model) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Make some change and deploy it to both dev and prod environments + model = add_projection_to_model(t.cast(SqlModel, model)) + context.upsert_model(model) + context.plan("dev_a", auto_apply=True, no_prompts=True) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Change the kind back to incremental by time range and deploy to prod + model = model.copy(update={"kind": previous_kind}) + context.upsert_model(model) + context.plan("prod", auto_apply=True, no_prompts=True) + + # Restate the model and verify that the interval hasn't been expanded because of the old snapshot + # with the same version + context.plan( + restate_models=["sushi.waiter_revenue_by_day"], + start="2023-01-06", + end="2023-01-08", + auto_apply=True, + no_prompts=True, + ) + + assert ( + context.fetchdf( + "SELECT COUNT(*) AS cnt FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL AND event_date < '2023-01-06'" + )["cnt"][0] + == 0 + ) + plan = context.plan_builder("prod").build() + assert not plan.missing_intervals + + +def test_restatement_plan_hourly_with_downstream_daily_restates_correct_intervals(tmp_path: Path): + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind FULL, + cron '@daily' + ); + + select account_id, ts from test.a; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply + ctx.plan(auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} wasnt cleared" + + # Put some data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 01:30:00", + "2024-01-01 23:30:00", + "2024-01-02 03:30:00", + "2024-01-03 12:30:00", + ], + } + ) + engine_adapter.replace_query( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # Restate A across a day boundary with the expectation that two day intervals in B are affected + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 02:00:00", + end="2024-01-02 04:00:00", + auto_apply=True, + no_prompts=True, + ) + + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", # present already + # "2024-01-01 02:30:00", #removed in last restatement + "2024-01-01 23:30:00", # added in last restatement + "2024-01-02 03:30:00", # added in last restatement + ], f"Table {tbl} wasnt cleared" + + +def test_restatement_plan_respects_disable_restatements(tmp_path: Path): + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01', + cron '@daily' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts", + disable_restatement true, + ), + start '2024-01-01', + cron '@daily' + ); + + select account_id, ts from test.a; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply + ctx.plan(auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + def get_snapshot_intervals(snapshot_id): + return list(ctx.state_sync.get_snapshots([snapshot_id]).values())[0].intervals + + # verify initial state + for tbl in ["test.a", "test.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A and expect b to be ignored + starting_b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01", + end="2024-01-02", + auto_apply=True, + no_prompts=True, + ) + + # verify A was changed and not b + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + assert _dates_in_table("test.b") == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # Verify B intervals were not touched + b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) + assert starting_b_intervals == b_intervals + + +def test_restatement_plan_clears_correct_intervals_across_environments(tmp_path: Path): + model1 = """ + MODEL ( + name test.incremental_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "date" + ), + start '2024-01-01', + cron '@daily' + ); + + select account_id, date from test.external_table; + """ + + model2 = """ + MODEL ( + name test.downstream_of_incremental, + kind FULL + ); + + select account_id, date from test.incremental_model; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "model1.sql", "w") as f: + f.write(model1) + + with open(models_dir / "model2.sql", "w") as f: + f.write(model2) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004, 1005], + "name": ["foo", "bar", "baz", "bing", "bong"], + "date": ["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05"], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "name": exp.DataType.build("varchar"), + "date": exp.DataType.build("date"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # first, create the prod models + ctx.plan(auto_apply=True, no_prompts=True) + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) + assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (5,) + assert not engine_adapter.table_exists("test__dev.incremental_model") + + # then, make a dev version + model1 = """ + MODEL ( + name test.incremental_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "date" + ), + start '2024-01-01', + cron '@daily' + ); + + select 1 as account_id, date from test.external_table; + """ + with open(models_dir / "model1.sql", "w") as f: + f.write(model1) + ctx.load() + + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + assert engine_adapter.table_exists("test__dev.incremental_model") + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (5,) + + # drop some source data so when we restate the interval it essentially clears it which is easy to verify + engine_adapter.execute("delete from test.external_table where date = '2024-01-01'") + assert engine_adapter.fetchone("select count(*) from test.external_table") == (4,) + + # now, restate intervals in dev and verify prod is NOT affected + ctx.plan( + environment="dev", + start="2024-01-01", + end="2024-01-02", + restate_models=["test.incremental_model"], + auto_apply=True, + no_prompts=True, + ) + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-01'" + ) == (1,) + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-01'" + ) == (0,) + + # prod still should not be affected by a run because the restatement only happened in dev + ctx.run() + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-01'" + ) == (1,) + + # drop another interval from the source data + engine_adapter.execute("delete from test.external_table where date = '2024-01-02'") + + # now, restate intervals in prod and verify that dev IS affected + ctx.plan( + start="2024-01-01", + end="2024-01-03", + restate_models=["test.incremental_model"], + auto_apply=True, + no_prompts=True, + ) + assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (3,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-01'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-02'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test.incremental_model where date = '2024-01-03'" + ) == (1,) + + # dev not affected yet until `sqlmesh run` is run + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-01'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-02'" + ) == (1,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-03'" + ) == (1,) + + # the restatement plan for prod should have cleared dev intervals too, which means this `sqlmesh run` re-runs 2024-01-01 and 2024-01-02 + ctx.run(environment="dev") + assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (3,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-01'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-02'" + ) == (0,) + assert engine_adapter.fetchone( + "select count(*) from test__dev.incremental_model where date = '2024-01-03'" + ) == (1,) + + # the downstream full model should always reflect whatever the incremental model is showing + assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (3,) + assert engine_adapter.fetchone("select count(*) from test__dev.downstream_of_incremental") == ( + 3, + ) + + +def test_prod_restatement_plan_clears_correct_intervals_in_derived_dev_tables(tmp_path: Path): + """ + Scenario: + I have models A[hourly] <- B[daily] <- C in prod + I create dev and add 2 new models D and E so that my dev DAG looks like A <- B <- C <- D[daily] <- E + I prod, I restate *one hour* of A + Outcome: + D and E should be restated in dev despite not being a part of prod + since B and D are daily, the whole day should be restated even though only 1hr of the upstream model was restated + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + def _derived_full_model_def(name: str, upstream: str) -> str: + return f""" + MODEL ( + name test.{name}, + kind FULL + ); + + select account_id, ts from test.{upstream}; + """ + + def _derived_incremental_model_def(name: str, upstream: str) -> str: + return f""" + MODEL ( + name test.{name}, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.{upstream} where ts between @start_ts and @end_ts; + """ + + model_b = _derived_incremental_model_def("b", upstream="a") + model_c = _derived_full_model_def("c", upstream="b") + + models_dir = tmp_path / "models" + models_dir.mkdir() + + for path, defn in {"a.sql": model_a, "b.sql": model_b, "c.sql": model_c}.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A, B, C in prod + ctx.plan(auto_apply=True, no_prompts=True) + + # add D[daily], E in dev + model_d = _derived_incremental_model_def("d", upstream="c") + model_e = _derived_full_model_def("e", upstream="d") + + for path, defn in { + "d.sql": model_d, + "e.sql": model_e, + }.items(): + with open(models_dir / path, "w") as f: + f.write(defn) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test.b", "test.c", "test__dev.d", "test__dev.e"]: + assert engine_adapter.table_exists(tbl) + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + for tbl in ["test.d", "test.e"]: + assert not engine_adapter.table_exists(tbl) + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + for tbl in ["test.a", "test.b", "test.c"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} wasnt cleared" + + # dev shouldnt have been affected yet + for tbl in ["test__dev.d", "test__dev.e"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} was prematurely cleared" + + # run dev to trigger the processing of the prod restatement + ctx.run(environment="dev") + + # data should now be cleared from dev + # note that D is a daily model, so clearing an hour interval from A should have triggered the full day in D + for tbl in ["test__dev.d", "test__dev.e"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], f"Table {tbl} wasnt cleared" + + +def test_prod_restatement_plan_clears_unaligned_intervals_in_derived_dev_tables(tmp_path: Path): + """ + Scenario: + I have a model A[hourly] in prod + I create dev and add a model B[daily] + I prod, I restate *one hour* of A + + Outcome: + The whole day for B should be restated. The restatement plan for prod has no hints about B's cadence because + B only exists in dev and there are no other downstream models in prod that would cause the restatement intervals + to be widened. + + Therefore, this test checks that SQLMesh does the right thing when an interval is partially cleared + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.a where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[hourly] in prod + ctx.plan(auto_apply=True, no_prompts=True) + + # add B[daily] in dev + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # dev shouldnt have been affected yet + assert _dates_in_table("test__dev.b") == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # mess with A independently of SQLMesh to prove a whole day gets restated for B instead of just 1hr + snapshot_table_name = ctx.table_name("test.a", "dev") + engine_adapter.execute( + f"delete from {snapshot_table_name} where cast(ts as date) == '2024-01-01'" + ) + engine_adapter.execute( + f"insert into {snapshot_table_name} (account_id, ts) values (1007, '2024-01-02 01:30:00')" + ) + + assert _dates_in_table("test.a") == ["2024-01-02 00:30:00", "2024-01-02 01:30:00"] + + # run dev to trigger the processing of the prod restatement + ctx.run(environment="dev") + + # B should now have no data for 2024-01-01 + # To prove a single day was restated vs the whole model, it also shouldnt have the '2024-01-02 01:30:00' record + assert _dates_in_table("test__dev.b") == ["2024-01-02 00:30:00"] + + +def test_prod_restatement_plan_causes_dev_intervals_to_be_processed_in_next_dev_plan( + tmp_path: Path, +): + """ + Scenario: + I have a model A[hourly] in prod + I create dev and add a model B[daily] + I prod, I restate *one hour* of A + In dev, I run a normal plan instead of a cadence run + + Outcome: + The whole day for B should be restated as part of a normal plan + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.a where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[hourly] in prod + ctx.plan(auto_apply=True, no_prompts=True) + + # add B[daily] in dev + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-01 01:00:00", + end="2024-01-01 02:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # dev shouldnt have been affected yet + assert _dates_in_table("test__dev.b") == [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + # plan dev which should trigger the missing intervals to get repopulated + ctx.plan(environment="dev", auto_apply=True, no_prompts=True) + + # dev should have the restated data + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ] + + +def test_prod_restatement_plan_causes_dev_intervals_to_be_widened_on_full_restatement_only_model( + tmp_path, +): + """ + Scenario: + I have am INCREMENTAL_BY_TIME_RANGE model A[daily] in prod + I create dev and add a INCREMENTAL_BY_UNIQUE_KEY model B (which supports full restatement only) + I prod, I restate one day of A which should cause intervals in dev to be cleared (but not processed) + In dev, I run a plan + + Outcome: + In the dev plan, the entire model for B should be rebuilt because it does not support partial restatement + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select account_id, ts from test.external_table where ts between @start_ts and @end_ts; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key (account_id, ts) + ), + cron '@daily' + ); + + select account_id, ts from test.a where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-02 01:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[daily] in prod + ctx.plan(auto_apply=True) + + # add B[daily] in dev + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + + # plan + apply dev + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + + def _dates_in_table(table_name: str) -> t.List[str]: + return [ + str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") + ] + + # verify initial state + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-02 01:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + # restate A in prod + engine_adapter.execute("delete from test.external_table where ts = '2024-01-02 01:30:00'") + ctx.plan( + restate_models=["test.a"], + start="2024-01-02 00:00:00", + end="2024-01-03 00:00:00", + auto_apply=True, + no_prompts=True, + ) + + # verify result + assert _dates_in_table("test.a") == [ + "2024-01-01 00:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + # dev shouldnt have been affected yet + assert _dates_in_table("test__dev.b") == [ + "2024-01-01 00:30:00", + "2024-01-02 01:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + # plan dev which should trigger the missing intervals to get repopulated + ctx.plan(environment="dev", auto_apply=True) + + # dev should have fully refreshed + # this is proven by the fact that INCREMENTAL_BY_UNIQUE_KEY cant propagate deletes, so if the + # model was not fully rebuilt, the deleted record would still be present + for tbl in ["test.a", "test__dev.b"]: + assert _dates_in_table(tbl) == [ + "2024-01-01 00:30:00", + "2024-01-03 02:30:00", + "2024-01-04 00:30:00", + ] + + +def test_prod_restatement_plan_missing_model_in_dev( + tmp_path: Path, +): + """ + Scenario: + I have a model B in prod but only model A in dev + I restate B in prod + + Outcome: + The A model should be ignore and the plan shouldn't fail + """ + + model_a = """ + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@hourly' + ); + + select account_id, ts from test.external_table; + """ + + model_b = """ + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ts + ), + cron '@daily' + ); + + select account_id, ts from test.external_table where ts between @start_ts and @end_ts; + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + with open(models_dir / "a.sql", "w") as f: + f.write(model_a) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + ctx = Context(paths=[tmp_path], config=config) + + engine_adapter = ctx.engine_adapter + engine_adapter.create_schema("test") + + # source data + df = pd.DataFrame( + { + "account_id": [1001, 1002, 1003, 1004], + "ts": [ + "2024-01-01 00:30:00", + "2024-01-01 01:30:00", + "2024-01-01 02:30:00", + "2024-01-02 00:30:00", + ], + } + ) + columns_to_types = { + "account_id": exp.DataType.build("int"), + "ts": exp.DataType.build("timestamp"), + } + external_table = exp.table_(table="external_table", db="test", quoted=True) + engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) + engine_adapter.insert_append( + table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types + ) + + # plan + apply A[hourly] in dev + ctx.plan("dev", auto_apply=True, no_prompts=True) + + # add B[daily] in prod and remove A + with open(models_dir / "b.sql", "w") as f: + f.write(model_b) + Path(models_dir / "a.sql").unlink() + + # plan + apply dev + ctx.load() + ctx.plan(auto_apply=True, no_prompts=True) + + # restate B in prod + ctx.plan( + restate_models=["test.b"], + start="2024-01-01", + end="2024-01-02", + auto_apply=True, + no_prompts=True, + ) + + +def test_prod_restatement_plan_includes_related_unpromoted_snapshots(tmp_path: Path): + """ + Scenario: + - I have models A <- B in prod + - I have models A <- B <- C in dev + - Both B and C have gone through a few iterations in dev so multiple snapshot versions exist + for them but not all of them are promoted / active + - I restate A in prod + + Outcome: + - Intervals should be cleared for all of the versions of B and C, regardless + of if they are active in any particular environment, in case they ever get made + active + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + (models_dir / "a.sql").write_text(""" + MODEL ( + name test.a, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select 1 as a, now() as ts; + """) + + (models_dir / "b.sql").write_text(""" + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select a, ts from test.a + """) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01")) + ctx = Context(paths=[tmp_path], config=config) + + def _all_snapshots() -> t.Dict[SnapshotId, Snapshot]: + all_snapshot_ids = [ + SnapshotId(name=name, identifier=identifier) + for (name, identifier) in ctx.state_sync.state_sync.engine_adapter.fetchall( # type: ignore + "select name, identifier from sqlmesh._snapshots" + ) + ] + return ctx.state_sync.get_snapshots(all_snapshot_ids) + + # plan + apply prod + ctx.plan(environment="prod", auto_apply=True) + assert len(_all_snapshots()) == 2 + + # create dev with new version of B + (models_dir / "b.sql").write_text(""" + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select a, ts, 'b dev 1' as change from test.a + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + assert len(_all_snapshots()) == 3 + + # update B (new version) and create C + (models_dir / "b.sql").write_text(""" + MODEL ( + name test.b, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column "ts" + ), + start '2024-01-01 00:00:00', + cron '@daily' + ); + + select a, ts, 'b dev 2' as change from test.a + """) + + (models_dir / "c.sql").write_text(""" + MODEL ( + name test.c, + kind FULL, + cron '@daily' + ); + + select *, 'c initial' as val from test.b + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + assert len(_all_snapshots()) == 5 + + # update C (new version), create D (unrelated) + (models_dir / "c.sql").write_text(""" + MODEL ( + name test.c, + kind FULL, + cron '@daily' + ); + + select *, 'c updated' as val from test.b + """) + + (models_dir / "d.sql").write_text(""" + MODEL ( + name test.d, + cron '@daily' + ); + + select 1 as unrelated + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + all_snapshots_prior_to_restatement = _all_snapshots() + assert len(all_snapshots_prior_to_restatement) == 7 + + def _snapshot_instances(lst: t.Dict[SnapshotId, Snapshot], name_match: str) -> t.List[Snapshot]: + return [s for s_id, s in lst.items() if name_match in s_id.name] + + # verify initial state + + # 1 instance of A (prod) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"a"')) == 1 + + # 3 instances of B (original in prod + 2 updates in dev) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"b"')) == 3 + + # 2 instances of C (initial + update in dev) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"c"')) == 2 + + # 1 instance of D (initial - dev) + assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"d"')) == 1 + + # restate A in prod + ctx.plan(environment="prod", restate_models=['"memory"."test"."a"'], auto_apply=True) + + all_snapshots_after_restatement = _all_snapshots() + + # All versions of B and C in dev should have had intervals cleared + # D in dev should not be touched and A + B in prod shoud also not be touched + a = _snapshot_instances(all_snapshots_after_restatement, '"a"') + assert len(a) == 1 + + b = _snapshot_instances(all_snapshots_after_restatement, '"b"') + # the 1 B instance in prod should be populated and 2 in dev (1 active) should be cleared + assert len(b) == 3 + assert len([s for s in b if not s.intervals]) == 2 + + c = _snapshot_instances(all_snapshots_after_restatement, '"c"') + # the 2 instances of C in dev (1 active) should be cleared + assert len(c) == 2 + assert len([s for s in c if not s.intervals]) == 2 + + d = _snapshot_instances(all_snapshots_after_restatement, '"d"') + # D should not be touched since it's in no way downstream of A in prod + assert len(d) == 1 + assert d[0].intervals + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_dev_restatement_of_prod_model(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + model = context.get_model("sushi.waiter_revenue_by_day") + context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) + + context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) + + restatement_plan = context.plan_builder("dev", restate_models=["*"]).build() + assert set(restatement_plan.restatements) == { + context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, + context.get_snapshot("sushi.top_waiters").snapshot_id, + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_of_full_model_with_start(init_and_plan_context: t.Callable): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + restatement_plan = context.plan( + restate_models=["sushi.customers"], + start="2023-01-07", + auto_apply=True, + no_prompts=True, + ) + + sushi_customer_interval = restatement_plan.restatements[ + context.get_snapshot("sushi.customers").snapshot_id + ] + assert sushi_customer_interval == (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) + waiter_by_day_interval = restatement_plan.restatements[ + context.get_snapshot("sushi.waiter_as_customer_by_day").snapshot_id + ] + assert waiter_by_day_interval == (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_should_not_override_environment_statements(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + context.config.before_all = ["SELECT 'test_before_all';", *context.config.before_all] + context.load() + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + prod_env_statements = context.state_reader.get_environment_statements(c.PROD) + assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" + + context.plan( + restate_models=["sushi.waiter_revenue_by_day"], + start="2023-01-07", + auto_apply=True, + no_prompts=True, + ) + + prod_env_statements = context.state_reader.get_environment_statements(c.PROD) + assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_shouldnt_backfill_beyond_prod_intervals(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.top_waiters") + context.upsert_model(SqlModel.parse_obj({**model.dict(), "cron": "@hourly"})) + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + context.run() + + with time_machine.travel("2023-01-09 02:00:00 UTC"): + # It's time to backfill the waiter_revenue_by_day model but it hasn't run yet + restatement_plan = context.plan( + restate_models=["sushi.waiter_revenue_by_day"], + no_prompts=True, + skip_tests=True, + ) + intervals_by_id = {i.snapshot_id: i for i in restatement_plan.missing_intervals} + # Make sure the intervals don't go beyond the prod intervals + assert intervals_by_id[context.get_snapshot("sushi.top_waiters").snapshot_id].intervals[-1][ + 1 + ] == to_timestamp("2023-01-08 15:00:00 UTC") + assert intervals_by_id[ + context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id + ].intervals[-1][1] == to_timestamp("2023-01-08 00:00:00 UTC") + + +def test_restatement_plan_interval_external_visibility(tmp_path: Path): + """ + Scenario: + - `prod` environment exists, models A <- B + - `dev` environment created, models A <- B(dev) <- C (dev) + - Restatement plan is triggered against `prod` for model A + - During restatement, a new dev environment `dev_2` is created with a new version of B(dev_2) + + Outcome: + - At no point are the prod_intervals considered "missing" from state for A + - The intervals for B(dev) and C(dev) are cleared + - The intervals for B(dev_2) are also cleared even though the environment didnt exist at the time the plan was started, + because they are based on the data from a partially restated version of A + """ + + models_dir = tmp_path / "models" + models_dir.mkdir() + + lock_file_path = tmp_path / "test.lock" # python model blocks while this file is present + + evaluation_lock_file_path = ( + tmp_path / "evaluation.lock" + ) # python model creates this file if it's in the wait loop and deletes it once done + + # Note: to make execution block so we can test stuff, we use a Python model that blocks until it no longer detects the presence of a file + (models_dir / "model_a.py").write_text(f""" +from sqlmesh.core.model import model +from sqlmesh.core.macros import MacroEvaluator + +@model( + "test.model_a", + is_sql=True, + kind="FULL" +) +def entrypoint(evaluator: MacroEvaluator) -> str: + from pathlib import Path + import time + + if evaluator.runtime_stage == 'evaluating': + while True: + if Path("{str(lock_file_path)}").exists(): + Path("{str(evaluation_lock_file_path)}").touch() + print("lock exists; sleeping") + time.sleep(2) + else: + Path("{str(evaluation_lock_file_path)}").unlink(missing_ok=True) + break + + return "select 'model_a' as m" +""") + + (models_dir / "model_b.sql").write_text(""" + MODEL ( + name test.model_b, + kind FULL + ); + + select a.m as m, 'model_b' as mb from test.model_a as a + """) + + config = Config( + gateways={ + "": GatewayConfig( + connection=DuckDBConnectionConfig(database=str(tmp_path / "db.db")), + state_connection=DuckDBConnectionConfig(database=str(tmp_path / "state.db")), + ) + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), + ) + ctx = Context(paths=[tmp_path], config=config) + + ctx.plan(environment="prod", auto_apply=True) + + assert len(ctx.snapshots) == 2 + assert all(s.intervals for s in ctx.snapshots.values()) + + prod_model_a_snapshot_id = ctx.snapshots['"db"."test"."model_a"'].snapshot_id + prod_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id + + # dev models + # new version of B + (models_dir / "model_b.sql").write_text(""" + MODEL ( + name test.model_b, + kind FULL + ); + + select a.m as m, 'model_b' as mb, 'dev' as dev_version from test.model_a as a + """) + + # add C + (models_dir / "model_c.sql").write_text(""" + MODEL ( + name test.model_c, + kind FULL + ); + + select b.*, 'model_c' as mc from test.model_b as b + """) + + ctx.load() + ctx.plan(environment="dev", auto_apply=True) + + dev_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id + dev_model_c_snapshot_id = ctx.snapshots['"db"."test"."model_c"'].snapshot_id + + assert dev_model_b_snapshot_id != prod_model_b_snapshot_id + + # now, we restate A in prod but touch the lockfile so it hangs during evaluation + # we also have to do it in its own thread due to the hang + lock_file_path.touch() + + def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue): + q.put("thread_started") + + # give this thread its own Context object to prevent segfaulting the Python interpreter + restatement_ctx = Context(paths=[tmp_path], config=config) + + # dev2 not present before the restatement plan starts + assert restatement_ctx.state_sync.get_environment("dev2") is None + + q.put("plan_started") + plan = restatement_ctx.plan( + environment="prod", restate_models=['"db"."test"."model_a"'], auto_apply=True + ) + q.put("plan_completed") + + # dev2 was created during the restatement plan + assert restatement_ctx.state_sync.get_environment("dev2") is not None + + return plan + + executor = ThreadPoolExecutor() + q: queue.Queue = queue.Queue() + restatement_plan_future = executor.submit(_run_restatement_plan, tmp_path, config, q) + assert q.get() == "thread_started" + + try: + if e := restatement_plan_future.exception(timeout=1): + # abort early if the plan thread threw an exception + raise e + except TimeoutError: + # that's ok, we dont actually expect the plan to have finished in 1 second + pass + + # while that restatement is running, we can simulate another process and check that it sees no empty intervals + assert q.get() == "plan_started" + + # dont check for potentially missing intervals until the plan is in the evaluation loop + attempts = 0 + while not evaluation_lock_file_path.exists(): + time.sleep(2) + attempts += 1 + if attempts > 10: + raise ValueError("Gave up waiting for evaluation loop") + + ctx.clear_caches() # get rid of the file cache so that data is re-fetched from state + prod_models_from_state = ctx.state_sync.get_snapshots( + snapshot_ids=[prod_model_a_snapshot_id, prod_model_b_snapshot_id] + ) + + # prod intervals should be present still + assert all(m.intervals for m in prod_models_from_state.values()) + + # so should dev intervals since prod restatement is still running + assert all(m.intervals for m in ctx.snapshots.values()) + + # now, lets create a new dev environment "dev2", while the prod restatement plan is still running, + # that changes model_b while still being based on the original version of model_a + (models_dir / "model_b.sql").write_text(""" + MODEL ( + name test.model_b, + kind FULL + ); + + select a.m as m, 'model_b' as mb, 'dev2' as dev_version from test.model_a as a + """) + ctx.load() + ctx.plan(environment="dev2", auto_apply=True) + + dev2_model_b_snapshot_id = ctx.snapshots['"db"."test"."model_b"'].snapshot_id + assert dev2_model_b_snapshot_id != dev_model_b_snapshot_id + assert dev2_model_b_snapshot_id != prod_model_b_snapshot_id + + # as at this point, everything still has intervals + ctx.clear_caches() + assert all( + s.intervals + for s in ctx.state_sync.get_snapshots( + snapshot_ids=[ + prod_model_a_snapshot_id, + prod_model_b_snapshot_id, + dev_model_b_snapshot_id, + dev_model_c_snapshot_id, + dev2_model_b_snapshot_id, + ] + ).values() + ) + + # now, we finally let that restatement plan complete + # first, verify it's still blocked where it should be + assert not restatement_plan_future.done() + + lock_file_path.unlink() # remove lock file, plan should be able to proceed now + + if e := restatement_plan_future.exception(): # blocks until future complete + raise e + + assert restatement_plan_future.result() + assert q.get() == "plan_completed" + + ctx.clear_caches() + + # check that intervals in prod are present + assert all( + s.intervals + for s in ctx.state_sync.get_snapshots( + snapshot_ids=[ + prod_model_a_snapshot_id, + prod_model_b_snapshot_id, + ] + ).values() + ) + + # check that intervals in dev have been cleared, including the dev2 env that + # was created after the restatement plan started + assert all( + not s.intervals + for s in ctx.state_sync.get_snapshots( + snapshot_ids=[ + dev_model_b_snapshot_id, + dev_model_c_snapshot_id, + dev2_model_b_snapshot_id, + ] + ).values() + ) + + executor.shutdown() + + +def test_restatement_plan_detects_prod_deployment_during_restatement(tmp_path: Path): + """ + Scenario: + - `prod` environment exists, model A + - `dev` environment created, model A(dev) + - Restatement plan is triggered against `prod` for model A + - During restatement, someone else deploys A(dev) to prod, replacing the model that is currently being restated. + + Outcome: + - The deployment plan for dev -> prod should succeed in deploying the new version of A + - The prod restatement plan should fail with a ConflictingPlanError and warn about the model that got updated while undergoing restatement + - The new version of A should have no intervals cleared. The user needs to rerun the restatement if the intervals should still be cleared + """ + orig_console = get_console() + console = CaptureTerminalConsole() + set_console(console) + + models_dir = tmp_path / "models" + models_dir.mkdir() + + lock_file_path = tmp_path / "test.lock" # python model blocks while this file is present + + evaluation_lock_file_path = ( + tmp_path / "evaluation.lock" + ) # python model creates this file if it's in the wait loop and deletes it once done + + # Note: to make execution block so we can test stuff, we use a Python model that blocks until it no longer detects the presence of a file + (models_dir / "model_a.py").write_text(f""" +from sqlmesh.core.model import model +from sqlmesh.core.macros import MacroEvaluator + +@model( + "test.model_a", + is_sql=True, + kind="FULL" +) +def entrypoint(evaluator: MacroEvaluator) -> str: + from pathlib import Path + import time + + if evaluator.runtime_stage == 'evaluating': + while True: + if Path("{str(lock_file_path)}").exists(): + Path("{str(evaluation_lock_file_path)}").touch() + print("lock exists; sleeping") + time.sleep(2) + else: + Path("{str(evaluation_lock_file_path)}").unlink(missing_ok=True) + break + + return "select 'model_a' as m" +""") + + config = Config( + gateways={ + "": GatewayConfig( + connection=DuckDBConnectionConfig(database=str(tmp_path / "db.db")), + state_connection=DuckDBConnectionConfig(database=str(tmp_path / "state.db")), + ) + }, + model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"), + ) + ctx = Context(paths=[tmp_path], config=config) + + # create prod + ctx.plan(environment="prod", auto_apply=True) + original_prod = ctx.state_sync.get_environment("prod") + assert original_prod + + # update model_a for dev + (models_dir / "model_a.py").unlink() + (models_dir / "model_a.sql").write_text(""" + MODEL ( + name test.model_a, + kind FULL + ); + + select 1 as changed + """) + + # create dev + ctx.load() + plan = ctx.plan(environment="dev", auto_apply=True) + assert len(plan.modified_snapshots) == 1 + new_model_a_snapshot_id = list(plan.modified_snapshots)[0] + + # now, trigger a prod restatement plan in a different thread and block it to simulate a long restatement + thread_console = None + + def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue): + nonlocal thread_console + q.put("thread_started") + + # Give this thread its own markdown console to avoid Rich LiveError + thread_console = MarkdownConsole() + set_console(thread_console) + + # give this thread its own Context object to prevent segfaulting the Python interpreter + restatement_ctx = Context(paths=[tmp_path], config=config) + + # ensure dev is present before the restatement plan starts + assert restatement_ctx.state_sync.get_environment("dev") is not None + + q.put("plan_started") + expected_error = None + try: + restatement_ctx.plan( + environment="prod", restate_models=['"db"."test"."model_a"'], auto_apply=True + ) + except ConflictingPlanError as e: + expected_error = e + + q.put("plan_completed") + return expected_error + + executor = ThreadPoolExecutor() + q: queue.Queue = queue.Queue() + lock_file_path.touch() + + restatement_plan_future = executor.submit(_run_restatement_plan, tmp_path, config, q) + restatement_plan_future.add_done_callback(lambda _: executor.shutdown()) + + assert q.get() == "thread_started" + + try: + if e := restatement_plan_future.exception(timeout=1): + # abort early if the plan thread threw an exception + raise e + except TimeoutError: + # that's ok, we dont actually expect the plan to have finished in 1 second + pass + + assert q.get() == "plan_started" + + # ok, now the prod restatement plan is running, let's deploy dev to prod + ctx.plan(environment="prod", auto_apply=True) + + new_prod = ctx.state_sync.get_environment("prod") + assert new_prod + assert new_prod.plan_id != original_prod.plan_id + assert new_prod.previous_plan_id == original_prod.plan_id + + # new prod is deployed but restatement plan is still running + assert not restatement_plan_future.done() + + # allow restatement plan to complete + lock_file_path.unlink() + + plan_error = restatement_plan_future.result() + assert isinstance(plan_error, ConflictingPlanError) + assert "please re-apply your plan" in repr(plan_error).lower() + + output = " ".join(re.split("\\s+", thread_console.captured_output, flags=re.UNICODE)) # type: ignore + assert ( + f"The following models had new versions deployed while data was being restated: └── test.model_a" + in output + ) + + # check that no intervals have been cleared from the model_a currently in prod + model_a = ctx.state_sync.get_snapshots(snapshot_ids=[new_model_a_snapshot_id])[ + new_model_a_snapshot_id + ] + assert isinstance(model_a.node, SqlModel) + assert model_a.node.render_query_or_raise().sql() == 'SELECT 1 AS "changed"' + assert len(model_a.intervals) + + set_console(orig_console) + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_restatement_plan_outside_parent_date_range(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + context.upsert_model("sushi.items", start="2023-01-06") + context.upsert_model("sushi.orders", start="2023-01-06") + # One of the parents should derive the start from its own parents for the issue + # to reproduce + context.upsert_model("sushi.order_items", start=None) + context.upsert_model("sushi.waiter_revenue_by_day", start="2023-01-01", audits=[]) + + context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) + + restated_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") + downstream_snapshot = context.get_snapshot("sushi.top_waiters") + + plan = context.plan_builder( + restate_models=["sushi.waiter_revenue_by_day"], + start="2023-01-01", + end="2023-01-01", + min_intervals=0, + ).build() + assert plan.snapshots != context.snapshots + + assert plan.requires_backfill + assert plan.restatements == { + restated_snapshot.snapshot_id: (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + downstream_snapshot.snapshot_id: (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")), + } + assert plan.missing_intervals == [ + SnapshotIntervals( + snapshot_id=downstream_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), + (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), + (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), + (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), + (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), + (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), + ], + ), + SnapshotIntervals( + snapshot_id=restated_snapshot.snapshot_id, + intervals=[ + (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + ], + ), + ] + + context.apply(plan) diff --git a/tests/core/integration/test_run.py b/tests/core/integration/test_run.py new file mode 100644 index 0000000000..c3e6626ad0 --- /dev/null +++ b/tests/core/integration/test_run.py @@ -0,0 +1,248 @@ +from __future__ import annotations + +import typing as t +import pytest +import time_machine +from pytest_mock.plugin import MockerFixture + +from sqlmesh.core import constants as c +from sqlmesh.core import dialect as d +from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.model import ( + SqlModel, + PythonModel, + load_sql_based_model, +) +from sqlmesh.utils.date import to_timestamp + +if t.TYPE_CHECKING: + pass + +pytestmark = pytest.mark.slow + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_run_with_select_models( + init_and_plan_context: t.Callable, +): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + assert context.run(select_models=["*waiter_revenue_by_day"]) + + snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) + # Only waiter_revenue_by_day and its parents should be backfilled up to 2023-01-09. + assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { + '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), + '"memory"."sushi"."items"': to_timestamp("2023-01-09"), + '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), + '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), + '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), + "assert_item_price_above_zero": to_timestamp("2023-01-08"), + '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_run_with_select_models_no_auto_upstream( + init_and_plan_context: t.Callable, +): + context, _ = init_and_plan_context("examples/sushi") + + model = context.get_model("sushi.waiter_revenue_by_day") + model = SqlModel.parse_obj({**model.dict(), "audits": []}) + context.upsert_model(model) + + context.plan("prod", no_prompts=True, skip_tests=True, auto_apply=True) + + with time_machine.travel("2023-01-09 00:00:00 UTC"): + assert context.run(select_models=["*waiter_revenue_by_day"], no_auto_upstream=True) + + snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) + # Only waiter_revenue_by_day should be backfilled up to 2023-01-09. + assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { + '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), + '"memory"."sushi"."order_items"': to_timestamp("2023-01-08"), + '"memory"."sushi"."orders"': to_timestamp("2023-01-08"), + '"memory"."sushi"."items"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), + '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), + '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), + '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), + '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), + "assert_item_price_above_zero": to_timestamp("2023-01-08"), + '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), + '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), + } + + +@time_machine.travel("2023-01-08 15:00:00 UTC") +def test_run_respects_excluded_transitive_dependencies(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context("examples/sushi") + + # Graph: C <- B <- A + # B is a transitive dependency linking A and C + # Note that the alphabetical ordering of the model names is intentional and helps + # surface the problem + expressions_a = d.parse( + f""" + MODEL ( + name memory.sushi.test_model_c, + kind FULL, + allow_partials true, + cron '@hourly', + start '2023-01-01', + ); + + SELECT @execution_ts AS execution_ts + """ + ) + model_c = load_sql_based_model(expressions_a) + context.upsert_model(model_c) + + # A VIEW model with no partials allowed and a daily cron instead of hourly. + expressions_b = d.parse( + f""" + MODEL ( + name memory.sushi.test_model_b, + kind VIEW, + allow_partials false, + cron '@daily', + ); + + SELECT * FROM memory.sushi.test_model_c + """ + ) + model_b = load_sql_based_model(expressions_b) + context.upsert_model(model_b) + + expressions_a = d.parse( + f""" + MODEL ( + name memory.sushi.test_model_a, + kind FULL, + allow_partials true, + cron '@hourly', + ); + + SELECT * FROM memory.sushi.test_model_b + """ + ) + model_a = load_sql_based_model(expressions_a) + context.upsert_model(model_a) + + context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) + assert ( + context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_c")["execution_ts"].iloc[ + 0 + ] + == "2023-01-08 15:00:00" + ) + + with time_machine.travel("2023-01-08 17:00:00 UTC", tick=False): + context.run( + "prod", + select_models=["*test_model_c", "*test_model_a"], + no_auto_upstream=True, + ignore_cron=True, + ) + assert ( + context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_a")[ + "execution_ts" + ].iloc[0] + == "2023-01-08 17:00:00" + ) + + +@time_machine.travel("2023-01-08 00:00:00 UTC") +def test_snapshot_triggers(init_and_plan_context: t.Callable, mocker: MockerFixture): + context, plan = init_and_plan_context("examples/sushi") + context.apply(plan) + + # auto-restatement triggers + orders = context.get_model("sushi.orders") + orders_kind = { + **orders.kind.dict(), + "auto_restatement_cron": "@hourly", + } + orders_kwargs = { + **orders.dict(), + "kind": orders_kind, + } + context.upsert_model(PythonModel.parse_obj(orders_kwargs)) + + order_items = context.get_model("sushi.order_items") + order_items_kind = { + **order_items.kind.dict(), + "auto_restatement_cron": "@hourly", + } + order_items_kwargs = { + **order_items.dict(), + "kind": order_items_kind, + } + context.upsert_model(PythonModel.parse_obj(order_items_kwargs)) + + waiter_revenue_by_day = context.get_model("sushi.waiter_revenue_by_day") + waiter_revenue_by_day_kind = { + **waiter_revenue_by_day.kind.dict(), + "auto_restatement_cron": "@hourly", + } + waiter_revenue_by_day_kwargs = { + **waiter_revenue_by_day.dict(), + "kind": waiter_revenue_by_day_kind, + } + context.upsert_model(SqlModel.parse_obj(waiter_revenue_by_day_kwargs)) + + context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) + + scheduler = context.scheduler() + + import sqlmesh + + spy = mocker.spy(sqlmesh.core.scheduler.Scheduler, "run_merged_intervals") + + with time_machine.travel("2023-01-09 00:00:01 UTC"): + scheduler.run( + environment=c.PROD, + start="2023-01-01", + auto_restatement_enabled=True, + ) + + assert spy.called + + actual_triggers = spy.call_args.kwargs["auto_restatement_triggers"] + actual_triggers = {k: v for k, v in actual_triggers.items() if v} + assert len(actual_triggers) == 12 + + for id, trigger in actual_triggers.items(): + model_name = id.name.replace('"memory"."sushi".', "").replace('"', "") + auto_restatement_triggers = [ + t.name.replace('"memory"."sushi".', "").replace('"', "") for t in trigger + ] + + if model_name in ("orders", "order_items", "waiter_revenue_by_day"): + assert auto_restatement_triggers == [model_name] + elif model_name in ("customer_revenue_lifetime", "customer_revenue_by_day"): + assert sorted(auto_restatement_triggers) == sorted(["orders", "order_items"]) + elif model_name == "top_waiters": + assert auto_restatement_triggers == ["waiter_revenue_by_day"] + else: + assert auto_restatement_triggers == ["orders"] diff --git a/tests/core/integration/utils.py b/tests/core/integration/utils.py new file mode 100644 index 0000000000..ba233080b5 --- /dev/null +++ b/tests/core/integration/utils.py @@ -0,0 +1,353 @@ +from __future__ import annotations + +import typing as t +from sqlmesh.core.model.common import ParsableSql +from sqlglot import exp +from sqlglot.expressions import DataType + +from sqlmesh.core import constants as c +from sqlmesh.core.context import Context +from sqlmesh.core.engine_adapter import EngineAdapter +from sqlmesh.core.environment import EnvironmentNamingInfo +from sqlmesh.core.model import ( + IncrementalByTimeRangeKind, + IncrementalByUniqueKeyKind, + ModelKind, + ModelKindName, + SqlModel, + TimeColumn, +) +from sqlmesh.core.model.kind import model_kind_type_from_name +from sqlmesh.core.plan import Plan, PlanBuilder +from sqlmesh.core.snapshot import ( + DeployabilityIndex, + Snapshot, + SnapshotChangeCategory, + SnapshotId, + SnapshotInfoLike, + SnapshotTableInfo, +) +from sqlmesh.utils.date import TimeLike + + +def select_all(table: str, adapter: EngineAdapter) -> t.Iterable: + return adapter.fetchall(f"select * from {table} order by 1") + + +def snapshots_to_versions(snapshots: t.Iterable[Snapshot]) -> t.Dict[str, str]: + return {snapshot.name: snapshot.version or "" for snapshot in snapshots} + + +def to_snapshot_info(snapshot: SnapshotInfoLike) -> SnapshotTableInfo: + return snapshot.table_info + + +def start(context: Context) -> TimeLike: + env = context.state_sync.get_environment("prod") + assert env + return env.start_at + + +def add_projection_to_model(model: SqlModel, literal: bool = True) -> SqlModel: + one_expr = exp.Literal.number(1).as_("one") if literal else exp.column("one") + kwargs = { + **model.dict(), + "query": model.query.select(one_expr), # type: ignore + } + return SqlModel.parse_obj(kwargs) + + +def plan_choice(plan_builder: PlanBuilder, choice: SnapshotChangeCategory) -> None: + for snapshot in plan_builder.build().snapshots.values(): + if not snapshot.version: + plan_builder.set_choice(snapshot, choice) + + +def apply_to_environment( + context: Context, + environment: str, + choice: t.Optional[SnapshotChangeCategory] = None, + plan_validators: t.Optional[t.Iterable[t.Callable]] = None, + apply_validators: t.Optional[t.Iterable[t.Callable]] = None, + plan_start: t.Optional[TimeLike] = None, + allow_destructive_models: t.Optional[t.List[str]] = None, + enable_preview: bool = False, +): + plan_validators = plan_validators or [] + apply_validators = apply_validators or [] + + plan_builder = context.plan_builder( + environment, + start=plan_start or start(context) if environment != c.PROD else None, + forward_only=choice == SnapshotChangeCategory.FORWARD_ONLY, + include_unmodified=True, + allow_destructive_models=allow_destructive_models if allow_destructive_models else [], + enable_preview=enable_preview, + ) + if environment != c.PROD: + plan_builder.set_start(plan_start or start(context)) + + if choice: + if choice == SnapshotChangeCategory.FORWARD_ONLY: + # FORWARD_ONLY is deprecated, fallback to NON_BREAKING to keep the existing tests + choice = SnapshotChangeCategory.NON_BREAKING + plan_choice(plan_builder, choice) + for validator in plan_validators: + validator(context, plan_builder.build()) + + plan = plan_builder.build() + context.apply(plan) + + validate_apply_basics(context, environment, plan.snapshots.values(), plan.deployability_index) + for validator in apply_validators: + validator(context) + return plan + + +def change_data_type( + context: Context, + model_name: str, + old_type: exp.DType, + new_type: exp.DType, +) -> None: + model = context.get_model(model_name) + assert model is not None + + if isinstance(model, SqlModel): + query = model.query.copy() + data_types = query.find_all(DataType) + for data_type in data_types: + if data_type.this == old_type: + data_type.set("this", new_type) + context.upsert_model(model_name, query_=ParsableSql(sql=query.sql(dialect=model.dialect))) + elif model.columns_to_types_ is not None: + for k, v in model.columns_to_types_.items(): + if v.this == old_type: + model.columns_to_types_[k] = DataType.build(new_type) + context.upsert_model(model_name, columns=model.columns_to_types_) + + +def validate_snapshots_in_state_sync(snapshots: t.Iterable[Snapshot], context: Context) -> None: + snapshot_infos = map(to_snapshot_info, snapshots) + state_sync_table_infos = map( + to_snapshot_info, context.state_reader.get_snapshots(snapshots).values() + ) + assert set(snapshot_infos) == set(state_sync_table_infos) + + +def validate_state_sync_environment( + snapshots: t.Iterable[Snapshot], env: str, context: Context +) -> None: + environment = context.state_reader.get_environment(env) + assert environment + snapshot_infos = map(to_snapshot_info, snapshots) + environment_table_infos = map(to_snapshot_info, environment.snapshots) + assert set(snapshot_infos) == set(environment_table_infos) + + +def validate_tables( + snapshots: t.Iterable[Snapshot], + context: Context, + deployability_index: t.Optional[DeployabilityIndex] = None, +) -> None: + adapter = context.engine_adapter + deployability_index = deployability_index or DeployabilityIndex.all_deployable() + for snapshot in snapshots: + is_deployable = deployability_index.is_representative(snapshot) + if not snapshot.is_model or snapshot.is_external: + continue + table_should_exist = not snapshot.is_embedded + assert adapter.table_exists(snapshot.table_name(is_deployable)) == table_should_exist + if table_should_exist: + assert select_all(snapshot.table_name(is_deployable), adapter) + + +def validate_environment_views( + snapshots: t.Iterable[Snapshot], + environment: str, + context: Context, + deployability_index: t.Optional[DeployabilityIndex] = None, +) -> None: + adapter = context.engine_adapter + deployability_index = deployability_index or DeployabilityIndex.all_deployable() + for snapshot in snapshots: + is_deployable = deployability_index.is_representative(snapshot) + if not snapshot.is_model or snapshot.is_symbolic: + continue + view_name = snapshot.qualified_view_name.for_environment( + EnvironmentNamingInfo.from_environment_catalog_mapping( + context.config.environment_catalog_mapping, + name=environment, + suffix_target=context.config.environment_suffix_target, + ) + ) + + assert adapter.table_exists(view_name) + assert select_all(snapshot.table_name(is_deployable), adapter) == select_all( + view_name, adapter + ) + + +def validate_apply_basics( + context: Context, + environment: str, + snapshots: t.Iterable[Snapshot], + deployability_index: t.Optional[DeployabilityIndex] = None, +) -> None: + validate_snapshots_in_state_sync(snapshots, context) + validate_state_sync_environment(snapshots, environment, context) + validate_tables(snapshots, context, deployability_index) + validate_environment_views(snapshots, environment, context, deployability_index) + + +def validate_plan_changes( + plan: Plan, + *, + added: t.Optional[t.Iterable[SnapshotId]] = None, + modified: t.Optional[t.Iterable[str]] = None, + removed: t.Optional[t.Iterable[SnapshotId]] = None, +) -> None: + added = added or [] + modified = modified or [] + removed = removed or [] + assert set(added) == plan.context_diff.added + assert set(modified) == set(plan.context_diff.modified_snapshots) + assert set(removed) == set(plan.context_diff.removed_snapshots) + + +def validate_versions_same( + model_names: t.List[str], + versions: t.Dict[str, str], + other_versions: t.Dict[str, str], +) -> None: + for name in model_names: + assert versions[name] == other_versions[name] + + +def validate_versions_different( + model_names: t.List[str], + versions: t.Dict[str, str], + other_versions: t.Dict[str, str], +) -> None: + for name in model_names: + assert versions[name] != other_versions[name] + + +def validate_query_change( + context: Context, + environment: str, + change_category: SnapshotChangeCategory, + logical: bool, +): + versions = snapshots_to_versions(context.snapshots.values()) + + change_data_type( + context, + "sushi.items", + DataType.Type.DOUBLE, + DataType.Type.FLOAT, + ) + + directly_modified = ['"memory"."sushi"."items"'] + indirectly_modified = [ + '"memory"."sushi"."order_items"', + '"memory"."sushi"."waiter_revenue_by_day"', + '"memory"."sushi"."customer_revenue_by_day"', + '"memory"."sushi"."customer_revenue_lifetime"', + '"memory"."sushi"."top_waiters"', + "assert_item_price_above_zero", + ] + not_modified = [ + snapshot.name + for snapshot in context.snapshots.values() + if snapshot.name not in directly_modified and snapshot.name not in indirectly_modified + ] + + if change_category == SnapshotChangeCategory.BREAKING and not logical: + models_same = not_modified + models_different = directly_modified + indirectly_modified + elif change_category == SnapshotChangeCategory.FORWARD_ONLY: + models_same = not_modified + directly_modified + indirectly_modified + models_different = [] + else: + models_same = not_modified + indirectly_modified + models_different = directly_modified + + def _validate_plan(context, plan): + validate_plan_changes(plan, modified=directly_modified + indirectly_modified) + assert bool(plan.missing_intervals) != logical + + def _validate_apply(context): + current_versions = snapshots_to_versions(context.snapshots.values()) + validate_versions_same(models_same, versions, current_versions) + validate_versions_different(models_different, versions, current_versions) + + apply_to_environment( + context, + environment, + change_category, + plan_validators=[_validate_plan], + apply_validators=[_validate_apply], + ) + + +def initial_add(context: Context, environment: str): + assert not context.state_reader.get_environment(environment) + + plan = context.plan(environment, start=start(context), create_from="nonexistent_env") + validate_plan_changes(plan, added={x.snapshot_id for x in context.snapshots.values()}) + + context.apply(plan) + validate_apply_basics(context, environment, plan.snapshots.values()) + + +def change_model_kind(context: Context, kind: ModelKindName): + if kind in (ModelKindName.VIEW, ModelKindName.EMBEDDED, ModelKindName.FULL): + context.upsert_model( + "sushi.items", + partitioned_by=[], + ) + context.upsert_model("sushi.items", kind=model_kind_type_from_name(kind)()) # type: ignore + + +def validate_model_kind_change( + kind_name: ModelKindName, + context: Context, + environment: str, + *, + logical: bool, +): + directly_modified = ['"memory"."sushi"."items"'] + indirectly_modified = [ + '"memory"."sushi"."order_items"', + '"memory"."sushi"."waiter_revenue_by_day"', + '"memory"."sushi"."customer_revenue_by_day"', + '"memory"."sushi"."customer_revenue_lifetime"', + '"memory"."sushi"."top_waiters"', + "assert_item_price_above_zero", + ] + if kind_name == ModelKindName.INCREMENTAL_BY_TIME_RANGE: + kind: ModelKind = IncrementalByTimeRangeKind(time_column=TimeColumn(column="event_date")) + elif kind_name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: + kind = IncrementalByUniqueKeyKind(unique_key="id") + else: + kind = model_kind_type_from_name(kind_name)() # type: ignore + + def _validate_plan(context, plan): + validate_plan_changes(plan, modified=directly_modified + indirectly_modified) + assert ( + next( + snapshot + for snapshot in plan.snapshots.values() + if snapshot.name == '"memory"."sushi"."items"' + ).model.kind.name + == kind.name + ) + assert bool(plan.missing_intervals) != logical + + apply_to_environment( + context, + environment, + SnapshotChangeCategory.NON_BREAKING, + plan_validators=[_validate_plan], + ) diff --git a/tests/core/linter/test_builtin.py b/tests/core/linter/test_builtin.py index 1a19d036b5..0ff91470ff 100644 --- a/tests/core/linter/test_builtin.py +++ b/tests/core/linter/test_builtin.py @@ -172,3 +172,63 @@ def test_no_missing_external_models_with_existing_file_not_ending_in_newline( ) fix_path = sushi_path / "external_models.yaml" assert edit.path == fix_path + + +def test_no_missing_unit_tests(tmp_path, copy_to_temp_path): + """ + Tests that the NoMissingUnitTest linter rule correctly identifies models + without corresponding unit tests in the tests/ directory + + This test checks the sushi example project, enables the linter, + and verifies that the linter raises a rule violation for the models + that do not have a unit test + """ + sushi_paths = copy_to_temp_path("examples/sushi") + sushi_path = sushi_paths[0] + + # Override the config.py to turn on lint + with open(sushi_path / "config.py", "r") as f: + read_file = f.read() + + before = """ linter=LinterConfig( + enabled=False, + rules=[ + "ambiguousorinvalidcolumn", + "invalidselectstarexpansion", + "noselectstar", + "nomissingaudits", + "nomissingowner", + "nomissingexternalmodels", + ], + ),""" + after = """linter=LinterConfig(enabled=True, rules=["nomissingunittest"]),""" + read_file = read_file.replace(before, after) + assert after in read_file + with open(sushi_path / "config.py", "w") as f: + f.writelines(read_file) + + # Load the context with the temporary sushi path + context = Context(paths=[sushi_path]) + + # Lint the models + lints = context.lint_models(raise_on_error=False) + + # Should have violations for models without tests (most models except customers) + assert len(lints) >= 1 + + # Check that we get violations for models without tests + violation_messages = [lint.violation_msg for lint in lints] + assert any("is missing unit test(s)" in msg for msg in violation_messages) + + # Check that models with existing tests don't have violations + models_with_tests = ["customer_revenue_by_day", "customer_revenue_lifetime", "order_items"] + + for model_name in models_with_tests: + model_violations = [ + lint + for lint in lints + if model_name in lint.violation_msg and "is missing unit test(s)" in lint.violation_msg + ] + assert len(model_violations) == 0, ( + f"Model {model_name} should not have a violation since it has a test" + ) diff --git a/tests/core/state_sync/test_state_sync.py b/tests/core/state_sync/test_state_sync.py index 51a646ce5d..88e168c216 100644 --- a/tests/core/state_sync/test_state_sync.py +++ b/tests/core/state_sync/test_state_sync.py @@ -13,19 +13,17 @@ from sqlmesh.core import constants as c from sqlmesh.core.config import EnvironmentSuffixTarget -from sqlmesh.core.dialect import parse_one, schema_ +from sqlmesh.core.dialect import parse_one from sqlmesh.core.engine_adapter import create_engine_adapter from sqlmesh.core.environment import Environment, EnvironmentStatements from sqlmesh.core.model import ( FullKind, IncrementalByTimeRangeKind, - ModelKindName, Seed, SeedKind, SeedModel, SqlModel, ) -from sqlmesh.core.model.definition import ExternalModel from sqlmesh.core.snapshot import ( Snapshot, SnapshotChangeCategory, @@ -38,20 +36,37 @@ from sqlmesh.core.state_sync import ( CachingStateSync, EngineAdapterStateSync, - cleanup_expired_views, ) from sqlmesh.core.state_sync.base import ( SCHEMA_VERSION, SQLGLOT_VERSION, - PromotionResult, Versions, ) +from sqlmesh.core.state_sync.common import ( + ExpiredBatchRange, + LimitBoundary, + PromotionResult, + RowBoundary, +) from sqlmesh.utils.date import now_timestamp, to_datetime, to_timestamp from sqlmesh.utils.errors import SQLMeshError, StateMigrationError pytestmark = pytest.mark.slow +def _get_cleanup_tasks( + state_sync: EngineAdapterStateSync, + *, + limit: int = 1000, + ignore_ttl: bool = False, +) -> t.List[SnapshotTableCleanupTask]: + batch = state_sync.get_expired_snapshots( + ignore_ttl=ignore_ttl, + batch_range=ExpiredBatchRange.init_batch_range(batch_size=limit), + ) + return [] if batch is None else batch.cleanup_tasks + + @pytest.fixture def state_sync(duck_conn, tmp_path): state_sync = EngineAdapterStateSync( @@ -1156,15 +1171,356 @@ def test_delete_expired_snapshots(state_sync: EngineAdapterStateSync, make_snaps new_snapshot.snapshot_id, } - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True), SnapshotTableCleanupTask(snapshot=new_snapshot.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) +def test_get_expired_snapshot_batch(state_sync: EngineAdapterStateSync, make_snapshot: t.Callable): + now_ts = now_timestamp() + + snapshots = [] + for idx in range(3): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + batch = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange.init_batch_range(batch_size=2), + ) + assert batch is not None + assert len(batch.expired_snapshot_ids) == 2 + assert len(batch.cleanup_tasks) == 2 + + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=RowBoundary.lowest_boundary(), + end=batch.batch_range.end, + ), + ) + + next_batch = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert next_batch is not None + assert len(next_batch.expired_snapshot_ids) == 1 + + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=next_batch.batch_range.start, + end=next_batch.batch_range.end, + ), + ) + + assert ( + state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=next_batch.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + is None + ) + + +def test_get_expired_snapshot_batch_same_timestamp( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable +): + """Test that pagination works correctly when multiple snapshots have the same updated_ts.""" + now_ts = now_timestamp() + same_timestamp = now_ts - 20000 + + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx:02d}", # Zero-padded to ensure deterministic name ordering + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + # All snapshots have the same updated_ts + snapshot.updated_ts = same_timestamp + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Fetch first batch of 2 + batch1 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange.init_batch_range(batch_size=2), + ) + assert batch1 is not None + assert len(batch1.expired_snapshot_ids) == 2 + assert sorted([x.name for x in batch1.expired_snapshot_ids]) == [ + '"model_00"', + '"model_01"', + ] + + # Fetch second batch of 2 using cursor from batch1 + batch2 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch1.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch2 is not None + assert len(batch2.expired_snapshot_ids) == 2 + assert sorted([x.name for x in batch2.expired_snapshot_ids]) == [ + '"model_02"', + '"model_03"', + ] + + # Fetch third batch of 2 using cursor from batch2 + batch3 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch2.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch3 is not None + assert sorted([x.name for x in batch3.expired_snapshot_ids]) == [ + '"model_04"', + ] + + +def test_delete_expired_snapshots_batching_with_deletion( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable +): + """Test that delete_expired_snapshots properly deletes batches as it pages through them.""" + now_ts = now_timestamp() + + # Create 5 expired snapshots with different timestamps + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Verify all 5 snapshots exist + assert len(state_sync.get_snapshots(snapshots)) == 5 + + # Get first batch of 2 + batch1 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange.init_batch_range(batch_size=2), + ) + assert batch1 is not None + assert len(batch1.expired_snapshot_ids) == 2 + + # Delete the first batch using batch_range + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch1.batch_range.start, + end=batch1.batch_range.end, + ), + ) + + # Verify first 2 snapshots (model_0 and model_1, the oldest) are deleted and last 3 remain + remaining = state_sync.get_snapshots(snapshots) + assert len(remaining) == 3 + assert snapshots[0].snapshot_id in remaining # model_0 (newest) + assert snapshots[1].snapshot_id in remaining # model_1 + assert snapshots[2].snapshot_id in remaining # model_2 + assert snapshots[3].snapshot_id not in remaining # model_3 + assert snapshots[4].snapshot_id not in remaining # model_4 (oldest) + + # Get next batch of 2 (should start after batch1's boundary) + batch2 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch1.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch2 is not None + assert len(batch2.expired_snapshot_ids) == 2 + + # Delete the second batch + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch2.batch_range.start, + end=batch2.batch_range.end, + ), + ) + + # Verify only the last snapshot remains + remaining = state_sync.get_snapshots(snapshots) + assert len(remaining) == 1 + assert snapshots[0].snapshot_id in remaining # model_0 (newest) + assert snapshots[1].snapshot_id not in remaining # model_1 + assert snapshots[2].snapshot_id not in remaining # model_2 + assert snapshots[3].snapshot_id not in remaining # model_3 + assert snapshots[4].snapshot_id not in remaining # model_4 (oldest) + + # Get final batch + batch3 = state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch2.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + assert batch3 is not None + assert len(batch3.expired_snapshot_ids) == 1 + + # Delete the final batch + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch3.batch_range.start, + end=batch3.batch_range.end, + ), + ) + + # Verify all snapshots are deleted + assert len(state_sync.get_snapshots(snapshots)) == 0 + + # Verify no more expired snapshots exist + assert ( + state_sync.get_expired_snapshots( + batch_range=ExpiredBatchRange( + start=batch3.batch_range.end, + end=LimitBoundary(batch_size=2), + ), + ) + is None + ) + + +def test_iterator_expired_snapshot_batch( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable +): + """Test the for_each_expired_snapshot_batch helper function.""" + from sqlmesh.core.state_sync.common import iter_expired_snapshot_batches + + now_ts = now_timestamp() + + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Track all batches processed + batches_processed = [] + + # Process with batch size of 2 + for batch in iter_expired_snapshot_batches( + state_sync, + current_ts=now_ts, + ignore_ttl=False, + batch_size=2, + ): + batches_processed.append(batch) + + # Should have processed 3 batches (2 + 2 + 1) + assert len(batches_processed) == 3 + assert len(batches_processed[0].expired_snapshot_ids) == 2 + assert len(batches_processed[1].expired_snapshot_ids) == 2 + assert len(batches_processed[2].expired_snapshot_ids) == 1 + + # Verify all snapshots were processed + all_processed_ids = set() + for batch in batches_processed: + all_processed_ids.update(batch.expired_snapshot_ids) + + expected_ids = {s.snapshot_id for s in snapshots} + assert all_processed_ids == expected_ids + + +@pytest.mark.parametrize( + "start_boundary,end_boundary,expected_sql", + [ + # Test with GT only (when end is LimitBoundary) + ( + RowBoundary(updated_ts=0, name="", identifier=""), + LimitBoundary(batch_size=100), + "updated_ts > 0 OR (updated_ts = 0 AND name > '') OR (updated_ts = 0 AND name = '' AND identifier > '')", + ), + # Test with GT and LTE (when both are RowBoundary) + ( + RowBoundary(updated_ts=1000, name="model_a", identifier="abc"), + RowBoundary(updated_ts=2000, name="model_z", identifier="xyz"), + "(updated_ts > 1000 OR (updated_ts = 1000 AND name > 'model_a') OR (updated_ts = 1000 AND name = 'model_a' AND identifier > 'abc')) AND (updated_ts < 2000 OR (updated_ts = 2000 AND name < 'model_z') OR (updated_ts = 2000 AND name = 'model_z' AND identifier <= 'xyz'))", + ), + # Test with zero timestamp + ( + RowBoundary(updated_ts=0, name="", identifier=""), + RowBoundary(updated_ts=1234567890, name="model_x", identifier="id_123"), + "(updated_ts > 0 OR (updated_ts = 0 AND name > '') OR (updated_ts = 0 AND name = '' AND identifier > '')) AND (updated_ts < 1234567890 OR (updated_ts = 1234567890 AND name < 'model_x') OR (updated_ts = 1234567890 AND name = 'model_x' AND identifier <= 'id_123'))", + ), + # Test with same timestamp, different names + ( + RowBoundary(updated_ts=5000, name="model_a", identifier="id_1"), + RowBoundary(updated_ts=5000, name="model_b", identifier="id_2"), + "(updated_ts > 5000 OR (updated_ts = 5000 AND name > 'model_a') OR (updated_ts = 5000 AND name = 'model_a' AND identifier > 'id_1')) AND (updated_ts < 5000 OR (updated_ts = 5000 AND name < 'model_b') OR (updated_ts = 5000 AND name = 'model_b' AND identifier <= 'id_2'))", + ), + # Test with same timestamp and name, different identifiers + ( + RowBoundary(updated_ts=7000, name="model_x", identifier="id_a"), + RowBoundary(updated_ts=7000, name="model_x", identifier="id_b"), + "(updated_ts > 7000 OR (updated_ts = 7000 AND name > 'model_x') OR (updated_ts = 7000 AND name = 'model_x' AND identifier > 'id_a')) AND (updated_ts < 7000 OR (updated_ts = 7000 AND name < 'model_x') OR (updated_ts = 7000 AND name = 'model_x' AND identifier <= 'id_b'))", + ), + # Test all_batch_range use case + ( + RowBoundary(updated_ts=0, name="", identifier=""), + RowBoundary(updated_ts=253_402_300_799_999, name="", identifier=""), + "(updated_ts > 0 OR (updated_ts = 0 AND name > '') OR (updated_ts = 0 AND name = '' AND identifier > '')) AND (updated_ts < 253402300799999 OR (updated_ts = 253402300799999 AND name < '') OR (updated_ts = 253402300799999 AND name = '' AND identifier <= ''))", + ), + ], +) +def test_expired_batch_range_where_filter(start_boundary, end_boundary, expected_sql): + """Test ExpiredBatchRange.where_filter generates correct SQL for various boundary combinations.""" + batch_range = ExpiredBatchRange(start=start_boundary, end=end_boundary) + result = batch_range.where_filter + assert result.sql() == expected_sql + + +def test_expired_batch_range_where_filter_with_limit(): + """Test that where_filter correctly handles LimitBoundary (only start condition, no end condition).""" + batch_range = ExpiredBatchRange( + start=RowBoundary(updated_ts=1000, name="model_a", identifier="abc"), + end=LimitBoundary(batch_size=50), + ) + result = batch_range.where_filter + # When end is LimitBoundary, should only have the start (GT) condition + assert ( + result.sql() + == "updated_ts > 1000 OR (updated_ts = 1000 AND name > 'model_a') OR (updated_ts = 1000 AND name = 'model_a' AND identifier > 'abc')" + ) + + def test_delete_expired_snapshots_seed( state_sync: EngineAdapterStateSync, make_snapshot: t.Callable ): @@ -1187,10 +1543,10 @@ def test_delete_expired_snapshots_seed( state_sync.push_snapshots(all_snapshots) assert set(state_sync.get_snapshots(all_snapshots)) == {snapshot.snapshot_id} - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) @@ -1228,11 +1584,11 @@ def test_delete_expired_snapshots_batching( snapshot_b.snapshot_id, } - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot_a.table_info, dev_table_only=False), SnapshotTableCleanupTask(snapshot=snapshot_b.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) @@ -1265,8 +1621,8 @@ def test_delete_expired_snapshots_promoted( state_sync.promote(env) all_snapshots = [snapshot] - assert not state_sync.get_expired_snapshots() - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert set(state_sync.get_snapshots(all_snapshots)) == {snapshot.snapshot_id} env.snapshots_ = [] @@ -1275,13 +1631,90 @@ def test_delete_expired_snapshots_promoted( now_timestamp_mock = mocker.patch("sqlmesh.core.state_sync.db.facade.now_timestamp") now_timestamp_mock.return_value = now_timestamp() + 11000 - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=False) ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots(all_snapshots) +def test_delete_expired_snapshots_previous_finalized_snapshots( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable +): + """Test that expired snapshots are protected if they are part of previous finalized snapshots + in a non-finalized environment.""" + now_ts = now_timestamp() + + # Create an old snapshot that will be expired + old_snapshot = make_snapshot( + SqlModel( + name="a", + query=parse_one("select a, ds"), + ), + ) + old_snapshot.ttl = "in 10 seconds" + old_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + # Create a new snapshot + new_snapshot = make_snapshot( + SqlModel( + name="a", + query=parse_one("select a, b, ds"), + ), + ) + new_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + state_sync.push_snapshots([old_snapshot, new_snapshot]) + + # Promote the old snapshot to an environment and finalize it + env = Environment( + name="test_environment", + snapshots=[old_snapshot.table_info], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="test_plan_id", + previous_plan_id="test_plan_id", + ) + state_sync.promote(env) + state_sync.finalize(env) + + # Verify old snapshot is not cleaned up because it's in a finalized environment + assert not _get_cleanup_tasks(state_sync) + + # Now promote the new snapshot to the same environment (this simulates a new plan) + # The environment will have previous_finalized_snapshots set to the old snapshot + # and will not be finalized yet + env = Environment( + name="test_environment", + snapshots=[new_snapshot.table_info], + previous_finalized_snapshots=[old_snapshot.table_info], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="new_plan_id", + previous_plan_id="test_plan_id", + ) + state_sync.promote(env) + + # Manually update the snapshots updated_ts to simulate expiration + state_sync.engine_adapter.execute( + f"UPDATE sqlmesh._snapshots SET updated_ts = {now_ts - 15000} WHERE name = '{old_snapshot.name}' AND identifier = '{old_snapshot.identifier}'" + ) + + # The old snapshot should still not be cleaned up because it's part of + # previous_finalized_snapshots in a non-finalized environment + assert not _get_cleanup_tasks(state_sync) + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) + assert state_sync.snapshots_exist([old_snapshot.snapshot_id]) == {old_snapshot.snapshot_id} + + # Once the environment is finalized, the expired snapshot should be removed successfully + state_sync.finalize(env) + assert _get_cleanup_tasks(state_sync) == [ + SnapshotTableCleanupTask(snapshot=old_snapshot.table_info, dev_table_only=False), + ] + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) + assert not state_sync.snapshots_exist([old_snapshot.snapshot_id]) + + def test_delete_expired_snapshots_dev_table_cleanup_only( state_sync: EngineAdapterStateSync, make_snapshot: t.Callable ): @@ -1315,10 +1748,10 @@ def test_delete_expired_snapshots_dev_table_cleanup_only( new_snapshot.snapshot_id, } - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True) ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert set(state_sync.get_snapshots(all_snapshots)) == {new_snapshot.snapshot_id} @@ -1357,8 +1790,8 @@ def test_delete_expired_snapshots_shared_dev_table( new_snapshot.snapshot_id, } - assert not state_sync.get_expired_snapshots() # No dev table cleanup - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) # No dev table cleanup + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert set(state_sync.get_snapshots(all_snapshots)) == {new_snapshot.snapshot_id} @@ -1403,16 +1836,18 @@ def test_delete_expired_snapshots_ignore_ttl( state_sync.promote(env) # default TTL = 1 week, nothing to clean up yet if we take TTL into account - assert not state_sync.get_expired_snapshots() - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert state_sync.snapshots_exist([snapshot_c.snapshot_id]) == {snapshot_c.snapshot_id} # If we ignore TTL, only snapshot_c should get cleaned up because snapshot_a and snapshot_b are part of an environment assert snapshot_a.table_info != snapshot_b.table_info != snapshot_c.table_info - assert state_sync.get_expired_snapshots(ignore_ttl=True) == [ + assert _get_cleanup_tasks(state_sync, ignore_ttl=True) == [ SnapshotTableCleanupTask(snapshot=snapshot_c.table_info, dev_table_only=False) ] - state_sync.delete_expired_snapshots(ignore_ttl=True) + state_sync.delete_expired_snapshots( + batch_range=ExpiredBatchRange.all_batch_range(), ignore_ttl=True + ) assert not state_sync.snapshots_exist([snapshot_c.snapshot_id]) @@ -1476,11 +1911,11 @@ def test_delete_expired_snapshots_cleanup_intervals( ] assert not stored_new_snapshot.dev_intervals - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True), SnapshotTableCleanupTask(snapshot=new_snapshot.table_info, dev_table_only=False), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not get_snapshot_intervals(snapshot) @@ -1564,10 +1999,10 @@ def test_delete_expired_snapshots_cleanup_intervals_shared_version( ) # Delete the expired snapshot - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=True), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots([snapshot]) # Check new snapshot's intervals @@ -1684,8 +2119,8 @@ def test_delete_expired_snapshots_cleanup_intervals_shared_dev_version( ) # Delete the expired snapshot - assert state_sync.get_expired_snapshots() == [] - state_sync.delete_expired_snapshots() + assert not _get_cleanup_tasks(state_sync) + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert not state_sync.get_snapshots([snapshot]) # Check new snapshot's intervals @@ -1778,10 +2213,10 @@ def test_compact_intervals_after_cleanup( state_sync.add_interval(snapshot_c, "2023-01-07", "2023-01-09", is_dev=True) # Only the dev table of the original snapshot should be deleted - assert state_sync.get_expired_snapshots() == [ + assert _get_cleanup_tasks(state_sync) == [ SnapshotTableCleanupTask(snapshot=snapshot_a.table_info, dev_table_only=True), ] - state_sync.delete_expired_snapshots() + state_sync.delete_expired_snapshots(batch_range=ExpiredBatchRange.all_batch_range()) assert state_sync.engine_adapter.fetchone("SELECT COUNT(*) FROM sqlmesh._intervals")[0] == 5 # type: ignore @@ -2580,105 +3015,6 @@ def test_cache(state_sync, make_snapshot, mocker): mock.assert_called() -def test_cleanup_expired_views( - mocker: MockerFixture, state_sync: EngineAdapterStateSync, make_snapshot: t.Callable -): - adapter = mocker.MagicMock() - adapter.dialect = None - snapshot_a = make_snapshot(SqlModel(name="catalog.schema.a", query=parse_one("select 1, ds"))) - snapshot_a.categorize_as(SnapshotChangeCategory.BREAKING) - snapshot_b = make_snapshot(SqlModel(name="catalog.schema.b", query=parse_one("select 1, ds"))) - snapshot_b.categorize_as(SnapshotChangeCategory.BREAKING) - # Make sure that we don't drop schemas from external models - snapshot_external_model = make_snapshot( - ExternalModel(name="catalog.external_schema.external_table", kind=ModelKindName.EXTERNAL) - ) - snapshot_external_model.categorize_as(SnapshotChangeCategory.BREAKING) - schema_environment = Environment( - name="test_environment", - suffix_target=EnvironmentSuffixTarget.SCHEMA, - snapshots=[ - snapshot_a.table_info, - snapshot_b.table_info, - snapshot_external_model.table_info, - ], - start_at="2022-01-01", - end_at="2022-01-01", - plan_id="test_plan_id", - previous_plan_id="test_plan_id", - catalog_name_override="catalog_override", - ) - snapshot_c = make_snapshot(SqlModel(name="catalog.schema.c", query=parse_one("select 1, ds"))) - snapshot_c.categorize_as(SnapshotChangeCategory.BREAKING) - snapshot_d = make_snapshot(SqlModel(name="catalog.schema.d", query=parse_one("select 1, ds"))) - snapshot_d.categorize_as(SnapshotChangeCategory.BREAKING) - table_environment = Environment( - name="test_environment", - suffix_target=EnvironmentSuffixTarget.TABLE, - snapshots=[ - snapshot_c.table_info, - snapshot_d.table_info, - snapshot_external_model.table_info, - ], - start_at="2022-01-01", - end_at="2022-01-01", - plan_id="test_plan_id", - previous_plan_id="test_plan_id", - catalog_name_override="catalog_override", - ) - cleanup_expired_views(adapter, {}, [schema_environment, table_environment]) - assert adapter.drop_schema.called - assert adapter.drop_view.called - assert adapter.drop_schema.call_args_list == [ - call( - schema_("schema__test_environment", "catalog_override"), - ignore_if_not_exists=True, - cascade=True, - ) - ] - assert sorted(adapter.drop_view.call_args_list) == [ - call("catalog_override.schema.c__test_environment", ignore_if_not_exists=True), - call("catalog_override.schema.d__test_environment", ignore_if_not_exists=True), - ] - - -@pytest.mark.parametrize( - "suffix_target", [EnvironmentSuffixTarget.SCHEMA, EnvironmentSuffixTarget.TABLE] -) -def test_cleanup_expired_environment_schema_warn_on_delete_failure( - mocker: MockerFixture, make_snapshot: t.Callable, suffix_target: EnvironmentSuffixTarget -): - adapter = mocker.MagicMock() - adapter.dialect = None - adapter.drop_schema.side_effect = Exception("Failed to drop the schema") - adapter.drop_view.side_effect = Exception("Failed to drop the view") - - snapshot = make_snapshot( - SqlModel(name="test_catalog.test_schema.test_model", query=parse_one("select 1, ds")) - ) - snapshot.categorize_as(SnapshotChangeCategory.BREAKING) - schema_environment = Environment( - name="test_environment", - suffix_target=suffix_target, - snapshots=[snapshot.table_info], - start_at="2022-01-01", - end_at="2022-01-01", - plan_id="test_plan_id", - previous_plan_id="test_plan_id", - catalog_name_override="catalog_override", - ) - - with pytest.raises(SQLMeshError, match="Failed to drop the expired environment .*"): - cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=False) - - cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=True) - - if suffix_target == EnvironmentSuffixTarget.SCHEMA: - assert adapter.drop_schema.called - else: - assert adapter.drop_view.called - - def test_max_interval_end_per_model( state_sync: EngineAdapterStateSync, make_snapshot: t.Callable ) -> None: diff --git a/tests/core/test_audit.py b/tests/core/test_audit.py index ed67975e9e..90ac655cc6 100644 --- a/tests/core/test_audit.py +++ b/tests/core/test_audit.py @@ -5,6 +5,7 @@ from sqlmesh.core import constants as c from sqlmesh.core.config.model import ModelDefaultsConfig from sqlmesh.core.context import Context +from sqlmesh.core.node import DbtNodeInfo from sqlmesh.core.audit import ( ModelAudit, StandaloneAudit, @@ -12,7 +13,7 @@ load_audit, load_multiple_audits, ) -from sqlmesh.core.dialect import parse +from sqlmesh.core.dialect import parse, jinja_query from sqlmesh.core.model import ( FullKind, IncrementalByTimeRangeKind, @@ -328,7 +329,7 @@ def test_load_with_dictionary_defaults(): audit = load_audit(expressions, dialect="spark") assert audit.defaults.keys() == {"field1", "field2"} for value in audit.defaults.values(): - assert isinstance(value, exp.Expression) + assert isinstance(value, exp.Expr) def test_load_with_single_defaults(): @@ -349,7 +350,7 @@ def test_load_with_single_defaults(): audit = load_audit(expressions, dialect="duckdb") assert audit.defaults.keys() == {"field1"} for value in audit.defaults.values(): - assert isinstance(value, exp.Expression) + assert isinstance(value, exp.Expr) def test_no_audit_statement(): @@ -396,7 +397,7 @@ def test_no_query(): def test_macro(model: Model): - expected_query = """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE "a" IS NULL""" + expected_query = """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE "a" IS NULL""" audit = ModelAudit( name="test_audit", @@ -455,7 +456,7 @@ def test_not_null_audit(model: Model): ) assert ( rendered_query_a.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE "a" IS NULL AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE "a" IS NULL AND TRUE""" ) rendered_query_a_and_b = model.render_audit_query( @@ -464,7 +465,7 @@ def test_not_null_audit(model: Model): ) assert ( rendered_query_a_and_b.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE ("a" IS NULL OR "b" IS NULL) AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE ("a" IS NULL OR "b" IS NULL) AND TRUE""" ) @@ -475,7 +476,7 @@ def test_not_null_audit_default_catalog(model_default_catalog: Model): ) assert ( rendered_query_a.sql() - == """SELECT * FROM (SELECT * FROM "test_catalog"."db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE "a" IS NULL AND TRUE""" + == """SELECT * FROM (SELECT * FROM "test_catalog"."db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE "a" IS NULL AND TRUE""" ) rendered_query_a_and_b = model_default_catalog.render_audit_query( @@ -484,7 +485,7 @@ def test_not_null_audit_default_catalog(model_default_catalog: Model): ) assert ( rendered_query_a_and_b.sql() - == """SELECT * FROM (SELECT * FROM "test_catalog"."db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE ("a" IS NULL OR "b" IS NULL) AND TRUE""" + == """SELECT * FROM (SELECT * FROM "test_catalog"."db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE ("a" IS NULL OR "b" IS NULL) AND TRUE""" ) @@ -494,7 +495,7 @@ def test_unique_values_audit(model: Model): ) assert ( rendered_query_a.sql() - == 'SELECT * FROM (SELECT ROW_NUMBER() OVER (PARTITION BY "a" ORDER BY "a") AS "rank_a" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE "b" IS NULL) AS "_q_1" WHERE "rank_a" > 1' + == 'SELECT * FROM (SELECT ROW_NUMBER() OVER (PARTITION BY "a" ORDER BY "a") AS "rank_a" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE "b" IS NULL) AS "_1" WHERE "rank_a" > 1' ) rendered_query_a_and_b = model.render_audit_query( @@ -502,7 +503,7 @@ def test_unique_values_audit(model: Model): ) assert ( rendered_query_a_and_b.sql() - == 'SELECT * FROM (SELECT ROW_NUMBER() OVER (PARTITION BY "a" ORDER BY "a") AS "rank_a", ROW_NUMBER() OVER (PARTITION BY "b" ORDER BY "b") AS "rank_b" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE TRUE) AS "_q_1" WHERE "rank_a" > 1 OR "rank_b" > 1' + == 'SELECT * FROM (SELECT ROW_NUMBER() OVER (PARTITION BY "a" ORDER BY "a") AS "rank_a", ROW_NUMBER() OVER (PARTITION BY "b" ORDER BY "b") AS "rank_b" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE TRUE) AS "_1" WHERE "rank_a" > 1 OR "rank_b" > 1' ) @@ -514,7 +515,7 @@ def test_accepted_values_audit(model: Model): ) assert ( rendered_query.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE NOT "a" IN ('value_a', 'value_b') AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE NOT "a" IN ('value_a', 'value_b') AND TRUE""" ) @@ -525,7 +526,7 @@ def test_number_of_rows_audit(model: Model): ) assert ( rendered_query.sql() - == """SELECT COUNT(*) FROM (SELECT 1 FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE TRUE LIMIT 0 + 1) AS "_q_1" HAVING COUNT(*) <= 0""" + == """SELECT COUNT(*) FROM (SELECT 1 FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE TRUE LIMIT 0 + 1) AS "_1" HAVING COUNT(*) <= 0""" ) @@ -536,7 +537,7 @@ def test_forall_audit(model: Model): ) assert ( rendered_query_a.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE NOT ("a" >= "b") AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE NOT ("a" >= "b") AND TRUE""" ) rendered_query_a = model.render_audit_query( @@ -545,7 +546,7 @@ def test_forall_audit(model: Model): ) assert ( rendered_query_a.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE (NOT ("a" >= "b") OR NOT ("c" + "d" - "e" < 1.0)) AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE (NOT ("a" >= "b") OR NOT ("c" + "d" - "e" < 1.0)) AND TRUE""" ) rendered_query_a = model.render_audit_query( @@ -555,7 +556,7 @@ def test_forall_audit(model: Model): ) assert ( rendered_query_a.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE (NOT ("a" >= "b") OR NOT ("c" + "d" - "e" < 1.0)) AND "f" = 42""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE (NOT ("a" >= "b") OR NOT ("c" + "d" - "e" < 1.0)) AND "f" = 42""" ) @@ -565,21 +566,21 @@ def test_accepted_range_audit(model: Model): ) assert ( rendered_query.sql() - == 'SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE "a" < 0 AND TRUE' + == 'SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE "a" < 0 AND TRUE' ) rendered_query = model.render_audit_query( builtin.accepted_range_audit, column=exp.to_column("a"), max_v=100, inclusive=exp.false() ) assert ( rendered_query.sql() - == 'SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE "a" >= 100 AND TRUE' + == 'SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE "a" >= 100 AND TRUE' ) rendered_query = model.render_audit_query( builtin.accepted_range_audit, column=exp.to_column("a"), min_v=100, max_v=100 ) assert ( rendered_query.sql() - == 'SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE ("a" < 100 OR "a" > 100) AND TRUE' + == 'SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE ("a" < 100 OR "a" > 100) AND TRUE' ) @@ -590,7 +591,7 @@ def test_at_least_one_audit(model: Model): ) assert ( rendered_query.sql() - == 'SELECT 1 AS "1" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE TRUE GROUP BY 1 HAVING COUNT("a") = 0' + == 'SELECT 1 AS "1" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE TRUE GROUP BY 1 HAVING COUNT("a") = 0' ) @@ -602,7 +603,7 @@ def test_mutually_exclusive_ranges_audit(model: Model): ) assert ( rendered_query.sql() - == '''WITH "window_functions" AS (SELECT "a" AS "lower_bound", "a" AS "upper_bound", LEAD("a") OVER (ORDER BY "a", "a") AS "next_lower_bound", ROW_NUMBER() OVER (ORDER BY "a" DESC, "a" DESC) = 1 AS "is_last_record" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE TRUE), "calc" AS (SELECT *, COALESCE("lower_bound" <= "upper_bound", FALSE) AS "lower_bound_lte_upper_bound", COALESCE("upper_bound" <= "next_lower_bound", "is_last_record", FALSE) AS "upper_bound_lte_next_lower_bound" FROM "window_functions" AS "window_functions"), "validation_errors" AS (SELECT * FROM "calc" AS "calc" WHERE NOT ("lower_bound_lte_upper_bound" AND "upper_bound_lte_next_lower_bound")) SELECT * FROM "validation_errors" AS "validation_errors"''' + == '''WITH "window_functions" AS (SELECT "a" AS "lower_bound", "a" AS "upper_bound", LEAD("a") OVER (ORDER BY "a", "a") AS "next_lower_bound", ROW_NUMBER() OVER (ORDER BY "a" DESC, "a" DESC) = 1 AS "is_last_record" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE TRUE), "calc" AS (SELECT *, COALESCE("lower_bound" <= "upper_bound", FALSE) AS "lower_bound_lte_upper_bound", COALESCE("upper_bound" <= "next_lower_bound", "is_last_record", FALSE) AS "upper_bound_lte_next_lower_bound" FROM "window_functions" AS "window_functions"), "validation_errors" AS (SELECT * FROM "calc" AS "calc" WHERE NOT ("lower_bound_lte_upper_bound" AND "upper_bound_lte_next_lower_bound")) SELECT * FROM "validation_errors" AS "validation_errors"''' ) @@ -613,7 +614,7 @@ def test_sequential_values_audit(model: Model): ) assert ( rendered_query.sql() - == '''WITH "windowed" AS (SELECT "a", LAG("a") OVER (ORDER BY "a") AS "prv" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE TRUE), "validation_errors" AS (SELECT * FROM "windowed" AS "windowed" WHERE NOT ("a" = "prv" + 1)) SELECT * FROM "validation_errors" AS "validation_errors"''' + == '''WITH "windowed" AS (SELECT "a", LAG("a") OVER (ORDER BY "a") AS "prv" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE TRUE), "validation_errors" AS (SELECT * FROM "windowed" AS "windowed" WHERE NOT ("a" = "prv" + 1)) SELECT * FROM "validation_errors" AS "validation_errors"''' ) @@ -626,7 +627,7 @@ def test_chi_square_audit(model: Model): ) assert ( rendered_query.sql() - == """WITH "samples" AS (SELECT "a" AS "x_a", "b" AS "x_b" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE (NOT "a" IS NULL AND NOT "b" IS NULL) AND TRUE), "contingency_table" AS (SELECT "x_a", "x_b", COUNT(*) AS "observed", (SELECT COUNT(*) FROM "samples" AS "t" WHERE "r"."x_a" = "t"."x_a") AS "tot_a", (SELECT COUNT(*) FROM "samples" AS "t" WHERE "r"."x_b" = "t"."x_b") AS "tot_b", (SELECT COUNT(*) FROM "samples" AS "samples") AS "g_t" /* g_t is the grand total */ FROM "samples" AS "r" GROUP BY "x_a", "x_b") SELECT ((SELECT COUNT(DISTINCT "x_a") FROM "contingency_table" AS "contingency_table") - 1) * ((SELECT COUNT(DISTINCT "x_b") FROM "contingency_table" AS "contingency_table") - 1) AS "degrees_of_freedom", SUM(("observed" - ("tot_a" * "tot_b" / "g_t")) * ("observed" - ("tot_a" * "tot_b" / "g_t")) / ("tot_a" * "tot_b" / "g_t")) AS "chi_square" FROM "contingency_table" AS "contingency_table" /* H0: the two variables are independent */ /* H1: the two variables are dependent */ /* if chi_square > critical_value, reject H0 */ /* if chi_square <= critical_value, fail to reject H0 */ HAVING NOT "chi_square" > 9.48773""" + == """WITH "samples" AS (SELECT "a" AS "x_a", "b" AS "x_b" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE (NOT "a" IS NULL AND NOT "b" IS NULL) AND TRUE), "contingency_table" AS (SELECT "x_a", "x_b", COUNT(*) AS "observed", (SELECT COUNT(*) FROM "samples" AS "t" WHERE "r"."x_a" = "t"."x_a") AS "tot_a", (SELECT COUNT(*) FROM "samples" AS "t" WHERE "r"."x_b" = "t"."x_b") AS "tot_b", (SELECT COUNT(*) FROM "samples" AS "samples") AS "g_t" /* g_t is the grand total */ FROM "samples" AS "r" GROUP BY "x_a", "x_b") SELECT ((SELECT COUNT(DISTINCT "x_a") FROM "contingency_table" AS "contingency_table") - 1) * ((SELECT COUNT(DISTINCT "x_b") FROM "contingency_table" AS "contingency_table") - 1) AS "degrees_of_freedom", SUM(("observed" - ("tot_a" * "tot_b" / "g_t")) * ("observed" - ("tot_a" * "tot_b" / "g_t")) / ("tot_a" * "tot_b" / "g_t")) AS "chi_square" FROM "contingency_table" AS "contingency_table" /* H0: the two variables are independent */ /* H1: the two variables are dependent */ /* if chi_square > critical_value, reject H0 */ /* if chi_square <= critical_value, fail to reject H0 */ HAVING NOT "chi_square" > 9.48773""" ) @@ -638,7 +639,7 @@ def test_pattern_audits(model: Model): ) assert ( rendered_query.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE (NOT REGEXP_LIKE("a", \'^\\d.*\') AND NOT REGEXP_LIKE("a", \'.*!$\')) AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE (NOT REGEXP_LIKE("a", \'^\\d.*\') AND NOT REGEXP_LIKE("a", \'.*!$\')) AND TRUE""" ) rendered_query = model.render_audit_query( @@ -648,7 +649,7 @@ def test_pattern_audits(model: Model): ) assert ( rendered_query.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE (REGEXP_LIKE("a", \'^\\d.*\') OR REGEXP_LIKE("a", \'.*!$\')) AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE (REGEXP_LIKE("a", \'^\\d.*\') OR REGEXP_LIKE("a", \'.*!$\')) AND TRUE""" ) rendered_query = model.render_audit_query( @@ -658,7 +659,7 @@ def test_pattern_audits(model: Model): ) assert ( rendered_query.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE (NOT "a" LIKE \'jim%\' AND NOT "a" LIKE \'pam%\') AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE (NOT "a" LIKE \'jim%\' AND NOT "a" LIKE \'pam%\') AND TRUE""" ) rendered_query = model.render_audit_query( @@ -668,7 +669,7 @@ def test_pattern_audits(model: Model): ) assert ( rendered_query.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_q_0" WHERE ("a" LIKE \'jim%\' OR "a" LIKE \'pam%\') AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN \'1970-01-01\' AND \'1970-01-01\') AS "_0" WHERE ("a" LIKE \'jim%\' OR "a" LIKE \'pam%\') AND TRUE""" ) @@ -730,6 +731,27 @@ def test_render_definition(): assert "def test_macro(evaluator, v):" in format_model_expressions(audit.render_definition()) +def test_render_definition_dbt_node_info(): + node_info = DbtNodeInfo( + unique_id="test.project.my_audit", name="my_audit", fqn="project.my_audit" + ) + + audit = StandaloneAudit(name="my_audit", dbt_node_info=node_info, query=jinja_query("select 1")) + + assert ( + audit.render_definition()[0].sql(pretty=True) + == """AUDIT ( + name my_audit, + dbt_node_info ( + fqn := 'project.my_audit', + name := 'my_audit', + unique_id := 'test.project.my_audit' + ), + standalone TRUE +)""" + ) + + def test_text_diff(): expressions = parse( """ @@ -792,7 +814,7 @@ def test_string_length_between_audit(model: Model): ) assert ( rendered_query.sql() - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE (LENGTH("x") < 1 OR LENGTH("x") > 5) AND TRUE""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE (LENGTH("x") < 1 OR LENGTH("x") > 5) AND TRUE""" ) @@ -802,7 +824,7 @@ def test_not_constant_audit(model: Model): ) assert ( rendered_query.sql() - == """SELECT 1 AS "1" FROM (SELECT COUNT(DISTINCT "x") AS "t_cardinality" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE "x" > 1) AS "r" WHERE "r"."t_cardinality" <= 1""" + == """SELECT 1 AS "1" FROM (SELECT COUNT(DISTINCT "x") AS "t_cardinality" FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE "x" > 1) AS "r" WHERE "r"."t_cardinality" <= 1""" ) @@ -814,7 +836,7 @@ def test_condition_with_macro_var(model: Model): ) assert ( rendered_query.sql(dialect="duckdb") - == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_q_0" WHERE "x" IS NULL AND "dt" BETWEEN CAST('1970-01-01 00:00:00+00:00' AS TIMESTAMPTZ) AND CAST('1970-01-01 23:59:59.999999+00:00' AS TIMESTAMPTZ)""" + == """SELECT * FROM (SELECT * FROM "db"."test_model" AS "test_model" WHERE "ds" BETWEEN '1970-01-01' AND '1970-01-01') AS "_0" WHERE "x" IS NULL AND "dt" BETWEEN CAST('1970-01-01 00:00:00+00:00' AS TIMESTAMPTZ) AND CAST('1970-01-01 23:59:59.999999+00:00' AS TIMESTAMPTZ)""" ) @@ -885,7 +907,7 @@ def test_load_inline_audits(assert_exp_eq): def test_model_inline_audits(sushi_context: Context): model_name = "sushi.waiter_names" - expected_query = 'SELECT * FROM (SELECT * FROM "memory"."sushi"."waiter_names" AS "waiter_names") AS "_q_0" WHERE "id" < 0' + expected_query = 'SELECT * FROM (SELECT * FROM "memory"."sushi"."waiter_names" AS "waiter_names") AS "_0" WHERE "id" < 0' model = sushi_context.get_snapshot(model_name, raise_if_missing=True).node assert isinstance(model, SeedModel) diff --git a/tests/core/test_config.py b/tests/core/test_config.py index d0fad16e76..8c81a90b8d 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -570,7 +570,8 @@ def test_variables(): assert config.get_gateway("local").variables == {"uppercase_var": 2} with pytest.raises( - ConfigError, match="Unsupported variable value type: " + ConfigError, + match=r"Unsupported variable value type: ", ): Config(variables={"invalid_var": exp.column("sqlglot_expr")}) @@ -862,6 +863,39 @@ def test_trino_schema_location_mapping_syntax(tmp_path): assert len(conn.schema_location_mapping) == 2 +def test_trino_source_option(tmp_path): + config_path = tmp_path / "config_trino_source.yaml" + with open(config_path, "w", encoding="utf-8") as fd: + fd.write( + """ + gateways: + trino: + connection: + type: trino + user: trino + host: trino + catalog: trino + source: my_sqlmesh_source + + default_gateway: trino + + model_defaults: + dialect: trino + """ + ) + + config = load_config_from_paths( + Config, + project_paths=[config_path], + ) + + from sqlmesh.core.config.connection import TrinoConnectionConfig + + conn = config.gateways["trino"].connection + assert isinstance(conn, TrinoConnectionConfig) + assert conn.source == "my_sqlmesh_source" + + def test_gcp_postgres_ip_and_scopes(tmp_path): config_path = tmp_path / "config_gcp_postgres.yaml" with open(config_path, "w", encoding="utf-8") as fd: @@ -1017,7 +1051,7 @@ def test_environment_statements_config(tmp_path): ] -# https://github.com/TobikoData/sqlmesh/pull/4049 +# https://github.com/SQLMesh/sqlmesh/pull/4049 def test_pydantic_import_error() -> None: class TestConfig(DuckDBConnectionConfig): pass diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 907d1b70cc..2ff95525f7 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -4,6 +4,7 @@ import pytest from _pytest.fixtures import FixtureRequest +from sqlglot import exp from unittest.mock import patch, MagicMock from sqlmesh.core.config.connection import ( @@ -425,6 +426,82 @@ def test_trino_schema_location_mapping(make_config): assert all((isinstance(v, str) for v in config.schema_location_mapping.values())) +def test_trino_catalog_type_override(make_config): + required_kwargs = dict( + type="trino", + user="user", + host="host", + catalog="catalog", + ) + + config = make_config( + **required_kwargs, + catalog_type_overrides={"my_catalog": "iceberg"}, + ) + + assert config.catalog_type_overrides is not None + assert len(config.catalog_type_overrides) == 1 + + assert config.catalog_type_overrides == {"my_catalog": "iceberg"} + + +def test_trino_timestamp_mapping(make_config): + required_kwargs = dict( + type="trino", + user="user", + host="host", + catalog="catalog", + ) + + # Test config without timestamp_mapping + config = make_config(**required_kwargs) + assert config.timestamp_mapping is None + + # Test config with timestamp_mapping + config = make_config( + **required_kwargs, + timestamp_mapping={ + "TIMESTAMP": "TIMESTAMP(6)", + "TIMESTAMP(3)": "TIMESTAMP WITH TIME ZONE", + }, + ) + + assert config.timestamp_mapping is not None + assert config.timestamp_mapping[exp.DataType.build("TIMESTAMP")] == exp.DataType.build( + "TIMESTAMP(6)" + ) + + # Test with invalid source type + with pytest.raises(ConfigError) as exc_info: + make_config( + **required_kwargs, + timestamp_mapping={ + "INVALID_TYPE": "TIMESTAMP", + }, + ) + assert "Invalid SQL type string" in str(exc_info.value) + assert "INVALID_TYPE" in str(exc_info.value) + + # Test with invalid target type (not a valid SQL type) + with pytest.raises(ConfigError) as exc_info: + make_config( + **required_kwargs, + timestamp_mapping={ + "TIMESTAMP": "INVALID_TARGET_TYPE", + }, + ) + assert "Invalid SQL type string" in str(exc_info.value) + assert "INVALID_TARGET_TYPE" in str(exc_info.value) + + # Test with empty mapping + config = make_config( + **required_kwargs, + timestamp_mapping={}, + ) + assert config.timestamp_mapping is not None + assert config.timestamp_mapping == {} + + def test_duckdb(make_config): config = make_config( type="duckdb", @@ -803,6 +880,37 @@ def test_ducklake_attach_add_ducklake_prefix(): ) +def test_ducklake_metadata_schema(): + # Test that metadata_schema parameter is included when specified + options = DuckDBAttachOptions( + type="ducklake", path="catalog.ducklake", metadata_schema="custom_schema" + ) + assert ( + options.to_sql(alias="my_ducklake") + == "ATTACH IF NOT EXISTS 'ducklake:catalog.ducklake' AS my_ducklake (METADATA_SCHEMA 'custom_schema')" + ) + + # Test that metadata_schema is not included when not specified (default behavior) + options = DuckDBAttachOptions(type="ducklake", path="catalog.ducklake") + assert ( + options.to_sql(alias="my_ducklake") + == "ATTACH IF NOT EXISTS 'ducklake:catalog.ducklake' AS my_ducklake" + ) + + # Test metadata_schema with other ducklake options + options = DuckDBAttachOptions( + type="ducklake", + path="catalog.ducklake", + data_path="/path/to/data", + encrypted=True, + metadata_schema="workspace_schema", + ) + assert ( + options.to_sql(alias="my_ducklake") + == "ATTACH IF NOT EXISTS 'ducklake:catalog.ducklake' AS my_ducklake (DATA_PATH '/path/to/data', ENCRYPTED, METADATA_SCHEMA 'workspace_schema')" + ) + + def test_duckdb_config_json_strings(make_config): config = make_config( type="duckdb", @@ -925,42 +1033,86 @@ def test_motherduck_token_mask(make_config): assert isinstance(config_1, MotherDuckConnectionConfig) assert isinstance(config_2, MotherDuckConnectionConfig) assert isinstance(config_3, MotherDuckConnectionConfig) - assert config_1._mask_motherduck_token(config_1.database) == "whodunnit" + + # motherduck format + assert config_1._mask_sensitive_data(config_1.database) == "whodunnit" assert ( - config_1._mask_motherduck_token(f"md:{config_1.database}?motherduck_token={config_1.token}") - == "md:whodunnit?motherduck_token=*****" + config_1._mask_sensitive_data(f"md:{config_1.database}?motherduck_token={config_1.token}") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token( + config_1._mask_sensitive_data( f"md:{config_1.database}?attach_mode=single&motherduck_token={config_1.token}" ) - == "md:whodunnit?attach_mode=single&motherduck_token=*****" + == "md:whodunnit?attach_mode=single&motherduck_token=********" ) assert ( - config_2._mask_motherduck_token(f"md:{config_2.database}?motherduck_token={config_2.token}") - == "md:whodunnit?motherduck_token=******************" + config_2._mask_sensitive_data(f"md:{config_2.database}?motherduck_token={config_2.token}") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_3._mask_motherduck_token(f"md:?motherduck_token={config_3.token}") - == "md:?motherduck_token=**********" + config_3._mask_sensitive_data(f"md:?motherduck_token={config_3.token}") + == "md:?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("?motherduck_token=secret1235") - == "?motherduck_token=**********" + config_1._mask_sensitive_data("?motherduck_token=secret1235") + == "?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=short") - == "md:whodunnit?motherduck_token=*****" + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=short") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=longtoken123456789") - == "md:whodunnit?motherduck_token=******************" + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=longtoken123456789") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=") + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=") == "md:whodunnit?motherduck_token=" ) - assert config_1._mask_motherduck_token(":memory:") == ":memory:" + assert config_1._mask_sensitive_data(":memory:") == ":memory:" + + # postgres format + assert ( + config_1._mask_sensitive_data( + "postgres:dbname=mydb user=myuser password=secret123 host=localhost" + ) + == "postgres:dbname=mydb user=myuser password=******** host=localhost" + ) + + assert ( + config_1._mask_sensitive_data( + "dbname=postgres user=postgres password=pg_secret host=127.0.0.1" + ) + == "dbname=postgres user=postgres password=******** host=127.0.0.1" + ) + assert ( + config_1._mask_sensitive_data( + "postgres:dbname=testdb password=verylongpassword123 user=admin" + ) + == "postgres:dbname=testdb password=******** user=admin" + ) + assert config_1._mask_sensitive_data("postgres:password=short") == "postgres:password=********" + assert ( + config_1._mask_sensitive_data("postgres:host=localhost password=p@ssw0rd! dbname=db") + == "postgres:host=localhost password=******** dbname=db" + ) + + assert ( + config_1._mask_sensitive_data("postgres:dbname=mydb user=myuser host=localhost") + == "postgres:dbname=mydb user=myuser host=localhost" + ) + + assert ( + config_1._mask_sensitive_data("md:db?motherduck_token=token123 postgres:password=secret") + == "md:db?motherduck_token=******** postgres:password=********" + ) + + # MySQL format + assert ( + config_1._mask_sensitive_data("host=localhost user=root password=mysql123 database=mydb") + == "host=localhost user=root password=******** database=mydb" + ) def test_bigquery(make_config): @@ -979,6 +1131,27 @@ def test_bigquery(make_config): assert config.get_catalog() == "project" assert config.is_recommended_for_state_sync is False + # Test reservation + config_with_reservation = make_config( + type="bigquery", + project="project", + reservation="projects/my-project/locations/us-central1/reservations/my-reservation", + check_import=False, + ) + assert isinstance(config_with_reservation, BigQueryConnectionConfig) + assert ( + config_with_reservation.reservation + == "projects/my-project/locations/us-central1/reservations/my-reservation" + ) + + # Test that reservation is included in _extra_engine_config + extra_config = config_with_reservation._extra_engine_config + assert "reservation" in extra_config + assert ( + extra_config["reservation"] + == "projects/my-project/locations/us-central1/reservations/my-reservation" + ) + with pytest.raises(ConfigError, match="you must also specify the `project` field"): make_config(type="bigquery", execution_project="execution_project", check_import=False) diff --git a/tests/core/test_context.py b/tests/core/test_context.py index a9d6f7967f..c3d88e205e 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -62,6 +62,7 @@ NoChangesPlanError, ) from sqlmesh.utils.metaprogramming import Executable +from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path from tests.utils.test_helpers import use_terminal_console from tests.utils.test_filesystem import create_temp_file @@ -700,6 +701,45 @@ def test_clear_caches(tmp_path: pathlib.Path): assert not cache_dir.exists() +def test_clear_caches_with_long_base_path(tmp_path: pathlib.Path): + base_path = tmp_path / ("abcde" * 50) + assert ( + len(str(base_path.absolute())) > 260 + ) # Paths longer than 260 chars trigger problems on Windows + + default_cache_dir = base_path / c.CACHE + custom_cache_dir = base_path / ".test_cache" + + # note: we create the Context here so it doesnt get passed any "fixed" paths + ctx = Context(config=Config(cache_dir=str(custom_cache_dir)), paths=base_path) + + if IS_WINDOWS: + # fix these so we can use them in this test + default_cache_dir = fix_windows_path(default_cache_dir) + custom_cache_dir = fix_windows_path(custom_cache_dir) + + default_cache_dir.mkdir(parents=True) + custom_cache_dir.mkdir(parents=True) + + default_cache_file = default_cache_dir / "cache.txt" + custom_cache_file = custom_cache_dir / "cache.txt" + + default_cache_file.write_text("test") + custom_cache_file.write_text("test") + + assert default_cache_file.exists() + assert custom_cache_file.exists() + assert default_cache_dir.exists() + assert custom_cache_dir.exists() + + ctx.clear_caches() + + assert not default_cache_file.exists() + assert not custom_cache_file.exists() + assert not default_cache_dir.exists() + assert not custom_cache_dir.exists() + + def test_cache_path_configurations(tmp_path: pathlib.Path): project_dir = tmp_path / "project" project_dir.mkdir(parents=True) @@ -1030,7 +1070,7 @@ def test_janitor(sushi_context, mocker: MockerFixture) -> None: sushi_context._engine_adapter = adapter_mock sushi_context.engine_adapters = {sushi_context.config.default_gateway: adapter_mock} sushi_context._state_sync = state_sync_mock - state_sync_mock.get_expired_snapshots.return_value = [] + state_sync_mock.get_expired_snapshots.return_value = None sushi_context._run_janitor() # Assert that the schemas are dropped just twice for the schema based environment @@ -1117,6 +1157,72 @@ def test_plan_start_ahead_of_end(copy_to_temp_path): context.close() +@pytest.mark.slow +def test_plan_seed_model_excluded_from_default_end(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("examples/sushi") + with time_machine.travel("2024-06-01 00:00:00 UTC"): + context = Context(paths=path, gateway="duckdb_persistent") + context.plan("prod", no_prompts=True, auto_apply=True) + max_ends = context.state_sync.max_interval_end_per_model("prod") + seed_fqns = [k for k in max_ends if "waiter_names" in k] + assert len(seed_fqns) == 1 + assert max_ends[seed_fqns[0]] == to_timestamp("2024-06-01") + context.close() + + with time_machine.travel("2026-03-01 00:00:00 UTC"): + context = Context(paths=path, gateway="duckdb_persistent") + + # a model that depends on this seed but has no interval in prod yet so only the seed would contribute to max_interval_end_per_model + context.upsert_model( + load_sql_based_model( + parse( + """ + MODEL( + name sushi.waiter_summary, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds + ), + start '2025-01-01', + cron '@daily' + ); + + SELECT + id, + name, + @start_ds AS ds + FROM + sushi.waiter_names + WHERE + @start_ds BETWEEN @start_ds AND @end_ds + """ + ), + default_catalog=context.default_catalog, + ) + ) + + # the seed's interval end would still be 2024-06-01 + max_ends = context.state_sync.max_interval_end_per_model("prod") + seed_fqns = [k for k in max_ends if "waiter_names" in k] + assert len(seed_fqns) == 1 + assert max_ends[seed_fqns[0]] == to_timestamp("2024-06-01") + + # the plan start date 2025-01-01 is after the seeds end date but shouldnt cause the plan to fail + plan = context.plan( + "dev", start="2025-01-01", no_prompts=True, select_models=["*waiter_summary"] + ) + + # the end should fall back to execution_time rather than seeds end + assert plan.models_to_backfill == { + '"duckdb"."sushi"."waiter_names"', + '"duckdb"."sushi"."waiter_summary"', + } + assert plan.provided_end is None + assert plan.provided_start == "2025-01-01" + assert to_timestamp(plan.end) == to_timestamp("2026-03-01") + assert to_timestamp(plan.start) == to_timestamp("2025-01-01") + context.close() + + @pytest.mark.slow def test_schema_error_no_default(sushi_context_pre_scheduling) -> None: context = sushi_context_pre_scheduling @@ -1466,6 +1572,8 @@ def test_requirements(copy_to_temp_path: t.Callable): "dev", no_prompts=True, skip_tests=True, skip_backfill=True, auto_apply=True ).environment requirements = {"ipywidgets", "numpy", "pandas", "test_package"} + if IS_WINDOWS: + requirements.add("pendulum") assert environment.requirements["pandas"] == "2.2.2" assert set(environment.requirements) == requirements @@ -1473,7 +1581,10 @@ def test_requirements(copy_to_temp_path: t.Callable): context._excluded_requirements = {"ipywidgets", "ruamel.yaml", "ruamel.yaml.clib"} diff = context.plan_builder("dev", skip_tests=True, skip_backfill=True).build().context_diff assert set(diff.previous_requirements) == requirements - assert set(diff.requirements) == {"numpy", "pandas"} + reqs = {"numpy", "pandas"} + if IS_WINDOWS: + reqs.add("pendulum") + assert set(diff.requirements) == reqs def test_deactivate_automatic_requirement_inference(copy_to_temp_path: t.Callable): @@ -1593,7 +1704,7 @@ def test_raw_code_handling(sushi_test_dbt_context: Context): hook = model.render_pre_statements()[0] assert ( hook.sql() - == f'''CREATE TABLE "t" AS SELECT 'Length is {raw_code_length}' AS "length_col"''' + == f'''CREATE TABLE IF NOT EXISTS "t" AS SELECT 'Length is {raw_code_length}' AS "length_col"''' ) @@ -1945,7 +2056,7 @@ def access_adapter(evaluator): assert ( model.pre_statements[0].sql() - == "@IF(@runtime_stage IN ('evaluating', 'creating'), SET VARIABLE stats_model_start = NOW())" + == "@IF(@runtime_stage IN ('evaluating', 'creating'), SET stats_model_start = NOW())" ) assert ( model.post_statements[0].sql() @@ -2297,13 +2408,13 @@ def test_plan_audit_intervals(tmp_path: pathlib.Path, caplog): # Case 1: The timestamp audit should be in the inclusive range ['2025-02-01 00:00:00', '2025-02-01 23:59:59.999999'] assert ( - f"""SELECT COUNT(*) FROM (SELECT "timestamp_id" AS "timestamp_id" FROM (SELECT * FROM "sqlmesh__sqlmesh_audit"."sqlmesh_audit__timestamp_example__{timestamp_snapshot.version}" AS "sqlmesh_audit__timestamp_example__{timestamp_snapshot.version}" WHERE "timestamp_id" BETWEEN CAST('2025-02-01 00:00:00' AS TIMESTAMP) AND CAST('2025-02-01 23:59:59.999999' AS TIMESTAMP)) AS "_q_0" WHERE TRUE GROUP BY "timestamp_id" HAVING COUNT(*) > 1) AS "audit\"""" + f"""SELECT COUNT(*) FROM (SELECT "timestamp_id" AS "timestamp_id" FROM (SELECT * FROM "sqlmesh__sqlmesh_audit"."sqlmesh_audit__timestamp_example__{timestamp_snapshot.version}" AS "sqlmesh_audit__timestamp_example__{timestamp_snapshot.version}" WHERE "timestamp_id" BETWEEN CAST('2025-02-01 00:00:00' AS TIMESTAMP) AND CAST('2025-02-01 23:59:59.999999' AS TIMESTAMP)) AS "_0" WHERE TRUE GROUP BY "timestamp_id" HAVING COUNT(*) > 1) AS "audit\"""" in caplog.text ) # Case 2: The date audit should be in the inclusive range ['2025-02-01', '2025-02-01'] assert ( - f"""SELECT COUNT(*) FROM (SELECT "date_id" AS "date_id" FROM (SELECT * FROM "sqlmesh__sqlmesh_audit"."sqlmesh_audit__date_example__{date_snapshot.version}" AS "sqlmesh_audit__date_example__{date_snapshot.version}" WHERE "date_id" BETWEEN CAST('2025-02-01' AS DATE) AND CAST('2025-02-01' AS DATE)) AS "_q_0" WHERE TRUE GROUP BY "date_id" HAVING COUNT(*) > 1) AS "audit\"""" + f"""SELECT COUNT(*) FROM (SELECT "date_id" AS "date_id" FROM (SELECT * FROM "sqlmesh__sqlmesh_audit"."sqlmesh_audit__date_example__{date_snapshot.version}" AS "sqlmesh_audit__date_example__{date_snapshot.version}" WHERE "date_id" BETWEEN CAST('2025-02-01' AS DATE) AND CAST('2025-02-01' AS DATE)) AS "_0" WHERE TRUE GROUP BY "date_id" HAVING COUNT(*) > 1) AS "audit\"""" in caplog.text ) @@ -3050,9 +3161,10 @@ def test_uppercase_gateway_external_models(tmp_path): # Check that the column types are properly loaded (not UNKNOWN) external_model = gateway_specific_models[0] column_types = {name: str(dtype) for name, dtype in external_model.columns_to_types.items()} - assert column_types == {"id": "INT", "name": "TEXT"}, ( - f"External model column types should not be UNKNOWN, got: {column_types}" - ) + assert column_types == { + "id": "INT", + "name": "TEXT", + }, f"External model column types should not be UNKNOWN, got: {column_types}" # Test that when using a different case for the gateway parameter, we get the same results context_mixed_case = Context( @@ -3177,3 +3289,55 @@ def test_lint_model_projections(tmp_path: Path): with pytest.raises(LinterError, match=config_err): prod_plan = context.plan(no_prompts=True, auto_apply=True) + + +def test_grants_through_plan_apply(sushi_context, mocker): + from sqlmesh.core.engine_adapter.duckdb import DuckDBEngineAdapter + from sqlmesh.core.model.meta import GrantsTargetLayer + + model = sushi_context.get_model("sushi.waiter_revenue_by_day") + + mocker.patch.object(DuckDBEngineAdapter, "SUPPORTS_GRANTS", True) + sync_grants_mock = mocker.patch.object(DuckDBEngineAdapter, "sync_grants_config") + + model_with_grants = model.copy( + update={ + "grants": {"select": ["analyst", "reporter"]}, + "grants_target_layer": GrantsTargetLayer.ALL, + } + ) + sushi_context.upsert_model(model_with_grants) + + sushi_context.plan("dev", no_prompts=True, auto_apply=True) + + # When planning for dev env w/ metadata only changes, + # only virtual layer is updated, so no physical grants are applied + assert sync_grants_mock.call_count == 1 + assert all( + call[0][1] == {"select": ["analyst", "reporter"]} + for call in sync_grants_mock.call_args_list + ) + + sync_grants_mock.reset_mock() + + new_grants = ({"select": ["analyst", "reporter", "manager"], "insert": ["etl_user"]},) + model_updated = model_with_grants.copy( + update={ + "query": parse_one(model.query.sql() + " LIMIT 1000"), + "grants": new_grants, + # force model update, hence new physical table creation + "stamp": "update model and grants", + } + ) + sushi_context.upsert_model(model_updated) + sushi_context.plan("dev", no_prompts=True, auto_apply=True) + + # Applies grants 2 times: 1 x physical, 1 x virtual + assert sync_grants_mock.call_count == 2 + assert all(call[0][1] == new_grants for call in sync_grants_mock.call_args_list) + + sync_grants_mock.reset_mock() + + # plan for prod + sushi_context.plan(no_prompts=True, auto_apply=True) + assert sync_grants_mock.call_count == 2 diff --git a/tests/core/test_dialect.py b/tests/core/test_dialect.py index 11ffec3720..02068b1c59 100644 --- a/tests/core/test_dialect.py +++ b/tests/core/test_dialect.py @@ -16,6 +16,8 @@ from sqlmesh.core.model import SqlModel, load_sql_based_model from sqlmesh.core.config.connection import DIALECT_TO_TYPE +pytestmark = pytest.mark.dialect_isolated + def test_format_model_expressions(): x = format_model_expressions( @@ -717,3 +719,16 @@ def test_sqlglot_extended_correctly(dialect: str) -> None: assert isinstance(value, exp.Table) assert value.sql() == "foo" assert ast.sql(dialect=dialect) == "MODEL (\nname foo\n)" + + +def test_connected_identifier(): + ast = d.parse_one("""SELECT ("x"at time zone 'utc')::timestamp as x""", "redshift") + assert ast.sql("redshift") == """SELECT CAST(("x" AT TIME ZONE 'utc') AS TIMESTAMP) AS x""" + + +def test_pipe_syntax(): + ast = d.parse_one("SELECT * FROM (FROM t2 |> SELECT id)", "bigquery") + assert ( + ast.sql("bigquery") + == "SELECT * FROM (WITH __tmp1 AS (SELECT id FROM t2) SELECT * FROM __tmp1)" + ) diff --git a/tests/core/test_format.py b/tests/core/test_format.py index 9b51220a9f..7d544eadf0 100644 --- a/tests/core/test_format.py +++ b/tests/core/test_format.py @@ -28,7 +28,7 @@ def test_format_files(tmp_path: pathlib.Path, mocker: MockerFixture): f3 = create_temp_file( tmp_path, pathlib.Path(audits_dir, "audit_1.sql"), - "AUDIT(name assert_positive_id, dialect 'duckdb'); SELECT * FROM @this_model WHERE \"CaseSensitive\"_item_id < 0;", + "AUDIT(name assert_positive_id, dialect 'duckdb'); SELECT * FROM @this_model WHERE \"CaseSensitive_item_id\" < 0;", ) f4 = create_temp_file( tmp_path, diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py deleted file mode 100644 index ef7c59ea7d..0000000000 --- a/tests/core/test_integration.py +++ /dev/null @@ -1,10183 +0,0 @@ -from __future__ import annotations - -import typing as t -import json -from collections import Counter -from datetime import timedelta -from unittest import mock -from unittest.mock import patch -import logging -from textwrap import dedent -import os -import numpy as np # noqa: TID253 -import pandas as pd # noqa: TID253 -import pytest -from pytest import MonkeyPatch -from pathlib import Path -from sqlmesh.core.console import set_console, get_console, TerminalConsole -from sqlmesh.core.config.naming import NameInferenceConfig -from sqlmesh.core.model.common import ParsableSql -from sqlmesh.utils.concurrency import NodeExecutionFailedError -import time_machine -from pytest_mock.plugin import MockerFixture -from sqlglot import exp -from sqlglot.expressions import DataType -import re -from IPython.utils.capture import capture_output - - -from sqlmesh import CustomMaterialization -from sqlmesh.cli.project_init import init_example_project -from sqlmesh.core import constants as c -from sqlmesh.core import dialect as d -from sqlmesh.core.config import ( - AutoCategorizationMode, - Config, - GatewayConfig, - ModelDefaultsConfig, - DuckDBConnectionConfig, - TableNamingConvention, -) -from sqlmesh.core.config.common import EnvironmentSuffixTarget, VirtualEnvironmentMode -from sqlmesh.core.console import Console, get_console -from sqlmesh.core.context import Context -from sqlmesh.core.config.categorizer import CategorizerConfig -from sqlmesh.core.config.plan import PlanConfig -from sqlmesh.core.engine_adapter import EngineAdapter, DuckDBEngineAdapter -from sqlmesh.core.environment import EnvironmentNamingInfo -from sqlmesh.core.macros import macro -from sqlmesh.core.model import ( - FullKind, - IncrementalByTimeRangeKind, - IncrementalByUniqueKeyKind, - IncrementalUnmanagedKind, - Model, - ModelKind, - ModelKindName, - SqlModel, - PythonModel, - ViewKind, - CustomKind, - TimeColumn, - load_sql_based_model, -) -from sqlmesh.core.model.kind import model_kind_type_from_name -from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals -from sqlmesh.core.snapshot import ( - DeployabilityIndex, - Snapshot, - SnapshotChangeCategory, - SnapshotId, - SnapshotInfoLike, - SnapshotTableInfo, -) -from sqlmesh.utils.date import TimeLike, now, to_date, to_datetime, to_timestamp -from sqlmesh.utils.errors import NoChangesPlanError, SQLMeshError, PlanError, ConfigError -from sqlmesh.utils.pydantic import validate_string -from tests.conftest import DuckDBMetadata, SushiDataValidator -from sqlmesh.utils import CorrelationId -from tests.utils.test_helpers import use_terminal_console -from tests.utils.test_filesystem import create_temp_file - -if t.TYPE_CHECKING: - from sqlmesh import QueryOrDF - -pytestmark = pytest.mark.slow - - -@pytest.fixture(autouse=True) -def mock_choices(mocker: MockerFixture): - mocker.patch("sqlmesh.core.console.TerminalConsole._get_snapshot_change_category") - mocker.patch("sqlmesh.core.console.TerminalConsole._prompt_backfill") - - -def plan_choice(plan_builder: PlanBuilder, choice: SnapshotChangeCategory) -> None: - for snapshot in plan_builder.build().snapshots.values(): - if not snapshot.version: - plan_builder.set_choice(snapshot, choice) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@pytest.mark.parametrize( - "context_fixture", - ["sushi_context", "sushi_no_default_catalog"], -) -def test_forward_only_plan_with_effective_date(context_fixture: Context, request): - context = request.getfixturevalue(context_fixture) - model_name = "sushi.waiter_revenue_by_day" - model = context.get_model(model_name) - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model)), start="2023-01-01") - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan_builder = context.plan_builder("dev", skip_tests=True, forward_only=True) - plan = plan_builder.build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only - - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - ] - - plan = plan_builder.set_effective_from("2023-01-05").build() - # Default start should be set to effective_from - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - plan = plan_builder.set_start("2023-01-06").build() - # Start override should take precedence - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - plan = plan_builder.set_effective_from("2023-01-04").build() - # Start should remain unchanged - assert plan.start == "2023-01-06" - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [ - pd.to_datetime("2023-01-06"), - pd.to_datetime("2023-01-07"), - ] - - prod_plan = context.plan_builder(skip_tests=True).build() - # Make sure that the previously set effective_from is respected - assert prod_plan.start == to_timestamp("2023-01-04") - assert prod_plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(prod_plan) - - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert prod_df["event_date"].tolist() == [ - pd.to_datetime(x) for x in ["2023-01-04", "2023-01-05", "2023-01-06", "2023-01-07"] - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_model_regular_plan(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = add_projection_to_model(t.cast(SqlModel, model)) - forward_only_kind = model.kind.copy(update={"forward_only": True}) - model = model.copy(update={"kind": forward_only_kind}) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only - - assert plan.start == to_datetime("2023-01-01") - assert not plan.missing_intervals - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert not dev_df["event_date"].tolist() - - # Run a restatement plan to preview changes - plan_builder = context.plan_builder( - "dev", skip_tests=True, restate_models=[model_name], enable_preview=False - ) - plan_builder.set_start("2023-01-06") - assert plan_builder.build().missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Make sure that changed start is reflected in missing intervals - plan_builder.set_start("2023-01-07") - assert plan_builder.build().missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan_builder.build()) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] - - # Promote changes to prod - prod_plan = context.plan_builder(skip_tests=True).build() - assert not prod_plan.missing_intervals - - context.apply(prod_plan) - - # The change was applied in a forward-only manner so no values in the new column should be populated - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert not prod_df["event_date"].tolist() - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_model_regular_plan_preview_enabled(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = add_projection_to_model(t.cast(SqlModel, model)) - forward_only_kind = model.kind.copy(update={"forward_only": True}) - model = model.copy(update={"kind": forward_only_kind}) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].is_forward_only - - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_model_restate_full_history_in_dev(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - model_name = "memory.sushi.customer_max_revenue" - expressions = d.parse( - f""" - MODEL ( - name {model_name}, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key customer_id, - forward_only true, - ), - ); - - SELECT - customer_id, MAX(revenue) AS max_revenue - FROM memory.sushi.customer_revenue_lifetime - GROUP BY 1; - """ - ) - - model = load_sql_based_model(expressions) - assert model.forward_only - assert model.kind.full_history_restatement_only - context.upsert_model(model) - - context.plan("prod", skip_tests=True, auto_apply=True, enable_preview=False) - - model_kwargs = { - **model.dict(), - # Make a breaking change. - "query": model.query.order_by("customer_id"), # type: ignore - } - context.upsert_model(SqlModel.parse_obj(model_kwargs)) - - # Apply the model change in dev - plan = context.plan_builder( - "dev", - skip_tests=True, - enable_preview=False, - categorizer_config=CategorizerConfig.all_full(), - ).build() - assert not plan.missing_intervals - context.apply(plan) - - snapshot = context.get_snapshot(model, raise_if_missing=True) - snapshot_table_name = snapshot.table_name(False) - - # Manually insert a dummy value to check that the table is recreated during the restatement - context.engine_adapter.insert_append( - snapshot_table_name, - pd.DataFrame({"customer_id": [-1], "max_revenue": [100]}), - ) - df = context.engine_adapter.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" - ) - assert df["cnt"][0] == 1 - - # Apply a restatement plan in dev - plan = context.plan("dev", restate_models=[model.name], auto_apply=True, enable_preview=False) - assert len(plan.missing_intervals) == 1 - - # Check that the dummy value is not present - df = context.engine_adapter.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue WHERE customer_id = -1" - ) - assert df["cnt"][0] == 0 - - # Check that the table is not empty - df = context.engine_adapter.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi__dev.customer_max_revenue" - ) - assert df["cnt"][0] > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_full_history_restatement_model_regular_plan_preview_enabled( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.marketing" # SCD2 model - - model = context.get_model(model_name) - model = add_projection_to_model(t.cast(SqlModel, model)) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - customers_snapshot = context.get_snapshot("sushi.customers", raise_if_missing=True) - active_customers_snapshot = context.get_snapshot( - "sushi.active_customers", raise_if_missing=True - ) - waiter_as_customer_snapshot = context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - - assert len(plan.new_snapshots) == 6 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[customers_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[active_customers_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[waiter_as_customer_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert all(s.is_forward_only for s in plan.new_snapshots) - - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_metadata_changed_regular_plan_preview_enabled(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = model.copy(update={"owner": "new_owner"}) - - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.METADATA - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.METADATA - ) - assert not plan.missing_intervals - assert not plan.restatements - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_hourly_model_with_lookback_no_backfill_in_dev(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - model = SqlModel.parse_obj( - { - **model.dict(), - "kind": model.kind.copy(update={"lookback": 1}), - "cron": "@hourly", - "audits": [], - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - top_waiters_model = context.get_model("sushi.top_waiters") - top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) - context.upsert_model(top_waiters_model) - - context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - with time_machine.travel(now() + timedelta(hours=2)): - plan = context.plan_builder("dev", skip_tests=True).build() - # Make sure the waiter_revenue_by_day model is not backfilled. - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC", tick=False) -def test_parent_cron_after_child(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj( - { - **model.dict(), - "cron": "50 23 * * *", - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) - assert waiter_revenue_by_day_snapshot.intervals == [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) - ] - - top_waiters_model = context.get_model("sushi.top_waiters") - top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=True) - context.upsert_model(top_waiters_model) - - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - with time_machine.travel("2023-01-08 23:55:00 UTC"): # Past parent's cron, but before child's - plan = context.plan_builder("dev", skip_tests=True).build() - # Make sure the waiter_revenue_by_day model is not backfilled. - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -@pytest.mark.parametrize( - "forward_only, expected_intervals", - [ - ( - False, - [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - ], - ), - ( - True, - [ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - ], - ), - ], -) -def test_cron_not_aligned_with_day_boundary( - init_and_plan_context: t.Callable, - forward_only: bool, - expected_intervals: t.List[t.Tuple[int, int]], -): - context, plan = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj( - { - **model.dict(), - "kind": model.kind.copy(update={"forward_only": forward_only}), - "cron": "0 12 * * *", - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) - assert waiter_revenue_by_day_snapshot.intervals == [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-07")) - ] - - model = add_projection_to_model(t.cast(SqlModel, model), literal=True) - context.upsert_model(model) - - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - - with time_machine.travel("2023-01-08 00:10:00 UTC"): # Past model's cron. - plan = context.plan_builder( - "dev", select_models=[model.name], skip_tests=True, enable_preview=True - ).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=expected_intervals, - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_cron_not_aligned_with_day_boundary_new_model(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - existing_model = context.get_model("sushi.waiter_revenue_by_day") - existing_model = SqlModel.parse_obj( - { - **existing_model.dict(), - "kind": existing_model.kind.copy(update={"forward_only": True}), - } - ) - context.upsert_model(existing_model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - # Add a new model and make a change to a forward-only model. - # The cron of the new model is not aligned with the day boundary. - new_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind FULL, - cron '0 8 * * *', - start '2023-01-01', - ); - - SELECT 1 AS one; - """ - ) - ) - context.upsert_model(new_model) - - existing_model = add_projection_to_model(t.cast(SqlModel, existing_model), literal=True) - context.upsert_model(existing_model) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "memory.sushi.new_model", raise_if_missing=True - ).snapshot_id, - intervals=[(to_timestamp("2023-01-06"), to_timestamp("2023-01-07"))], - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id, - intervals=[ - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_forward_only_preview_child_that_runs_before_parent(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - # This model runs at minute 30 of every hour - upstream_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.upstream_model, - kind FULL, - cron '30 * * * *', - start '2023-01-01', - ); - - SELECT 1 AS a; - """ - ) - ) - context.upsert_model(upstream_model) - - # This model runs at minute 0 of every hour, so it runs before the upstream model - downstream_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.downstream_model, - kind INCREMENTAL_BY_TIME_RANGE( - time_column event_date, - forward_only True, - ), - cron '0 * * * *', - start '2023-01-01', - ); - - SELECT a, '2023-01-06' AS event_date FROM memory.sushi.upstream_model; - """ - ) - ) - context.upsert_model(downstream_model) - - context.plan("prod", skip_tests=True, auto_apply=True) - - with time_machine.travel("2023-01-08 00:05:00 UTC"): - # The downstream model runs but not the upstream model - context.run("prod") - - # Now it's time for the upstream model to run but it hasn't run yet - with time_machine.travel("2023-01-08 00:35:00 UTC"): - # Make a change to the downstream model. - downstream_model = add_projection_to_model(t.cast(SqlModel, downstream_model), literal=True) - context.upsert_model(downstream_model) - - # The plan should only backfill the downstream model despite upstream missing intervals - plan = context.plan_builder("dev", skip_tests=True, enable_preview=True).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot( - downstream_model.name, raise_if_missing=True - ).snapshot_id, - intervals=[ - (to_timestamp("2023-01-07 23:00:00"), to_timestamp("2023-01-08 00:00:00")) - ], - ), - ] - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_forward_only_monthly_model(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj( - { - **model.dict(), - "kind": model.kind.copy(update={"forward_only": True}), - "cron": "0 0 1 * *", - "start": "2022-01-01", - "audits": [], - } - ) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - waiter_revenue_by_day_snapshot = context.get_snapshot(model.name, raise_if_missing=True) - assert waiter_revenue_by_day_snapshot.intervals == [ - (to_timestamp("2022-01-01"), to_timestamp("2023-01-01")) - ] - - model = add_projection_to_model(t.cast(SqlModel, model), literal=True) - context.upsert_model(model) - - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - - plan = context.plan_builder( - "dev", select_models=[model.name], skip_tests=True, enable_preview=True - ).build() - assert to_timestamp(plan.start) == to_timestamp("2022-12-01") - assert to_timestamp(plan.end) == to_timestamp("2023-01-08") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[(to_timestamp("2022-12-01"), to_timestamp("2023-01-01"))], - ), - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_forward_only_parent_created_in_dev_child_created_in_prod( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") - waiter_revenue_by_day_model = add_projection_to_model( - t.cast(SqlModel, waiter_revenue_by_day_model) - ) - forward_only_kind = waiter_revenue_by_day_model.kind.copy(update={"forward_only": True}) - waiter_revenue_by_day_model = waiter_revenue_by_day_model.copy( - update={"kind": forward_only_kind} - ) - context.upsert_model(waiter_revenue_by_day_model) - - waiter_revenue_by_day_snapshot = context.get_snapshot( - waiter_revenue_by_day_model, raise_if_missing=True - ) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert all(s.is_forward_only for s in plan.new_snapshots) - assert plan.start == to_datetime("2023-01-01") - assert not plan.missing_intervals - - context.apply(plan) - - # Update the child to refer to a newly added column. - top_waiters_model = context.get_model("sushi.top_waiters") - top_waiters_model = add_projection_to_model(t.cast(SqlModel, top_waiters_model), literal=False) - context.upsert_model(top_waiters_model) - - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 1 - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - - context.apply(plan) - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_new_forward_only_model(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, enable_preview=False) - - snapshot = context.get_snapshot("sushi.marketing") - - # The deployable table should not exist yet - assert not context.engine_adapter.table_exists(snapshot.table_name()) - assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) - - context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) - - assert context.engine_adapter.table_exists(snapshot.table_name()) - assert context.engine_adapter.table_exists(snapshot.table_name(is_deployable=False)) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_set_choice_is_reflected_in_missing_intervals(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) - context.plan("prod", skip_tests=True, no_prompts=True, auto_apply=True) - - model_name = "sushi.waiter_revenue_by_day" - - model = context.get_model(model_name) - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - snapshot = context.get_snapshot(model, raise_if_missing=True) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan_builder = context.plan_builder("dev", skip_tests=True) - plan = plan_builder.build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Change the category to BREAKING - plan = plan_builder.set_choice( - plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.BREAKING - ).build() - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_BREAKING - ) - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Change the category back to NON_BREAKING - plan = plan_builder.set_choice( - plan.context_diff.snapshots[snapshot.snapshot_id], SnapshotChangeCategory.NON_BREAKING - ).build() - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [ - pd.to_datetime(x) - for x in [ - "2023-01-01", - "2023-01-02", - "2023-01-03", - "2023-01-04", - "2023-01-05", - "2023-01-06", - "2023-01-07", - ] - ] - - # Promote changes to prod - prod_plan = context.plan_builder(skip_tests=True).build() - assert not prod_plan.missing_intervals - - context.apply(prod_plan) - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert prod_df["event_date"].tolist() == [ - pd.to_datetime(x) - for x in [ - "2023-01-01", - "2023-01-02", - "2023-01-03", - "2023-01-04", - "2023-01-05", - "2023-01-06", - "2023-01-07", - ] - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) -@pytest.mark.parametrize("has_view_binding", [False, True]) -def test_non_breaking_change_after_forward_only_in_dev( - init_and_plan_context: t.Callable, has_view_binding: bool -): - context, plan = init_and_plan_context("examples/sushi") - context.snapshot_evaluator.adapter.HAS_VIEW_BINDING = has_view_binding - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert all(s.is_forward_only for s in plan.new_snapshots) - assert to_timestamp(plan.start) == to_timestamp("2023-01-07") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - ] - - # Apply the forward-only changes first. - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert dev_df["event_date"].tolist() == [pd.to_datetime("2023-01-07")] - - # Make a non-breaking change to a model downstream. - model = context.get_model("sushi.top_waiters") - # Select 'one' column from the updated upstream model. - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model), literal=False)) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True).build() - assert len(plan.new_snapshots) == 1 - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert to_timestamp(plan.start) == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Apply the non-breaking changes. - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT waiter_id FROM sushi__dev.top_waiters WHERE one IS NOT NULL" - ) - assert not dev_df.empty - - prod_df = context.engine_adapter.fetchdf("DESCRIBE sushi.top_waiters") - assert "one" not in prod_df["column_name"].tolist() - - # Deploy both changes to prod. - plan = context.plan_builder("prod", skip_tests=True).build() - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL ORDER BY event_date" - ) - assert prod_df.empty - - prod_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT waiter_id FROM sushi.top_waiters WHERE one IS NOT NULL" - ) - assert prod_df.empty - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_change_after_forward_only_in_dev(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - # Make sure that the most downstream model is a materialized model. - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - # Make sushi.orders a forward-only model. - model = context.get_model("sushi.orders") - updated_model_kind = model.kind.copy(update={"forward_only": True}) - model = model.copy(update={"stamp": "force new version", "kind": updated_model_kind}) - context.upsert_model(model) - snapshot = context.get_snapshot(model, raise_if_missing=True) - - plan = context.plan_builder( - "dev", - skip_tests=True, - enable_preview=False, - categorizer_config=CategorizerConfig.all_full(), - ).build() - assert ( - plan.context_diff.snapshots[snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert plan.context_diff.snapshots[snapshot.snapshot_id].is_forward_only - assert not plan.requires_backfill - context.apply(plan) - - # Make a non-breaking change to a model. - model = context.get_model("sushi.top_waiters") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 1 - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Apply the non-breaking changes. - context.apply(plan) - - # Make a non-breaking change upstream from the previously modified model. - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - waiter_revenue_by_day_snapshot = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder("dev", skip_tests=True, enable_preview=False).build() - assert len(plan.new_snapshots) == 2 - assert ( - plan.context_diff.snapshots[waiter_revenue_by_day_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiters_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - # Apply the upstream non-breaking changes. - context.apply(plan) - assert not context.plan_builder("dev", skip_tests=True).build().requires_backfill - - # Deploy everything to prod. - plan = context.plan_builder("prod", skip_tests=True, enable_preview=False).build() - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiters_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - assert ( - not context.plan_builder("prod", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_changes_downstream_of_indirect_non_breaking_snapshot_without_intervals( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Make a breaking change first but don't backfill it - model = context.get_model("sushi.orders") - model = model.copy(update={"stamp": "force new version"}) - context.upsert_model(model) - plan_builder = context.plan_builder( - "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True - ) - plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.BREAKING) - context.apply(plan_builder.build()) - - # Now make a non-breaking change to the same snapshot. - model = model.copy(update={"stamp": "force another new version"}) - context.upsert_model(model) - plan_builder = context.plan_builder( - "dev", skip_backfill=True, skip_tests=True, no_auto_categorization=True - ) - plan_builder.set_choice(context.get_snapshot(model), SnapshotChangeCategory.NON_BREAKING) - context.apply(plan_builder.build()) - - # Now make a change to a model downstream of the above model. - downstream_model = context.get_model("sushi.top_waiters") - downstream_model = downstream_model.copy(update={"stamp": "yet another new version"}) - context.upsert_model(downstream_model) - plan = context.plan_builder("dev", skip_tests=True).build() - - # If the parent is not representative then the child cannot be deployable - deployability_index = plan.deployability_index - assert not deployability_index.is_representative( - context.get_snapshot("sushi.waiter_revenue_by_day") - ) - assert not deployability_index.is_deployable(context.get_snapshot("sushi.top_waiters")) - - -@time_machine.travel("2023-01-08 15:00:00 UTC", tick=True) -def test_metadata_change_after_forward_only_results_in_migration(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Make a forward-only change - model = context.get_model("sushi.waiter_revenue_by_day") - model = model.copy(update={"kind": model.kind.copy(update={"forward_only": True})}) - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) - assert len(plan.new_snapshots) == 2 - assert all(s.is_forward_only for s in plan.new_snapshots) - - # Follow-up with a metadata change in the same environment - model = model.copy(update={"owner": "new_owner"}) - context.upsert_model(model) - plan = context.plan("dev", skip_tests=True, auto_apply=True, no_prompts=True) - assert len(plan.new_snapshots) == 2 - assert all(s.change_category == SnapshotChangeCategory.METADATA for s in plan.new_snapshots) - - # Deploy the latest change to prod - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - # Check that the new column was added in prod - columns = context.engine_adapter.columns("sushi.waiter_revenue_by_day") - assert "one" in columns - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_downstream_of_forward_only(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Make sushi.orders a forward-only model. - forward_only_model = context.get_model("sushi.orders") - updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) - forward_only_model = forward_only_model.copy( - update={"stamp": "force new version", "kind": updated_model_kind} - ) - context.upsert_model(forward_only_model) - forward_only_snapshot = context.get_snapshot(forward_only_model, raise_if_missing=True) - - non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") - non_breaking_model = non_breaking_model.copy(update={"start": "2023-01-01"}) - context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) - non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) - top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan = context.plan_builder( - "dev", - skip_tests=True, - enable_preview=False, - categorizer_config=CategorizerConfig.all_full(), - ).build() - assert ( - plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.context_diff.snapshots[forward_only_snapshot.snapshot_id].is_forward_only - assert not plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].is_forward_only - assert not plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].is_forward_only - - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiter_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=non_breaking_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - assert ( - not context.plan_builder("dev", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - # Deploy everything to prod. - plan = context.plan_builder("prod", skip_tests=True).build() - assert plan.start == to_timestamp("2023-01-01") - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=top_waiter_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - SnapshotIntervals( - snapshot_id=non_breaking_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - context.apply(plan) - assert ( - not context.plan_builder("prod", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_breaking_only_impacts_immediate_children(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - context.upsert_model(context.get_model("sushi.top_waiters").copy(update={"kind": FullKind()})) - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - breaking_model = context.get_model("sushi.orders") - breaking_model = breaking_model.copy(update={"stamp": "force new version"}) - context.upsert_model(breaking_model) - breaking_snapshot = context.get_snapshot(breaking_model, raise_if_missing=True) - - non_breaking_model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, non_breaking_model))) - non_breaking_snapshot = context.get_snapshot(non_breaking_model, raise_if_missing=True) - top_waiter_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - - plan_builder = context.plan_builder("dev", skip_tests=True, enable_preview=False) - plan_builder.set_choice(breaking_snapshot, SnapshotChangeCategory.BREAKING) - plan = plan_builder.build() - assert ( - plan.context_diff.snapshots[breaking_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.snapshots[non_breaking_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.snapshots[top_waiter_snapshot.snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert plan.start == to_timestamp("2023-01-01") - assert not any(i.snapshot_id == top_waiter_snapshot.snapshot_id for i in plan.missing_intervals) - - context.apply(plan) - assert ( - not context.plan_builder("dev", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - # Deploy everything to prod. - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.missing_intervals - - context.apply(plan) - assert ( - not context.plan_builder("prod", skip_tests=True, enable_preview=False) - .build() - .requires_backfill - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_run_with_select_models( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - assert context.run(select_models=["*waiter_revenue_by_day"]) - - snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) - # Only waiter_revenue_by_day and its parents should be backfilled up to 2023-01-09. - assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { - '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), - '"memory"."sushi"."items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), - '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), - '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), - "assert_item_price_above_zero": to_timestamp("2023-01-08"), - '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_with_run( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - plan = context.plan(run=True) - assert plan.has_changes - assert plan.missing_intervals - - context.apply(plan) - - snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) - assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { - '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."order_items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."orders"': to_timestamp("2023-01-09"), - '"memory"."sushi"."items"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."latest_order"': to_timestamp("2023-01-09"), - '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), - '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-09"), - '"memory"."sushi"."marketing"': to_timestamp("2023-01-09"), - '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-09"), - '"memory"."raw"."demographics"': to_timestamp("2023-01-09"), - "assert_item_price_above_zero": to_timestamp("2023-01-09"), - '"memory"."sushi"."active_customers"': to_timestamp("2023-01-09"), - '"memory"."sushi"."customers"': to_timestamp("2023-01-09"), - '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-09"), - '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-09"), - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_ignore_cron( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - expressions = d.parse( - f""" - MODEL ( - name memory.sushi.test_allow_partials, - kind INCREMENTAL_UNMANAGED, - allow_partials true, - start '2023-01-01', - ); - - SELECT @end_ts AS end_ts - """ - ) - model = load_sql_based_model(expressions) - - context.upsert_model(model) - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - - assert ( - context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ - 0 - ] - == "2023-01-07 23:59:59.999999" - ) - - plan_no_ignore_cron = context.plan_builder( - "prod", run=True, ignore_cron=False, skip_tests=True - ).build() - assert not plan_no_ignore_cron.missing_intervals - - plan = context.plan_builder("prod", run=True, ignore_cron=True, skip_tests=True).build() - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot(model, raise_if_missing=True).snapshot_id, - intervals=[ - (to_timestamp("2023-01-08"), to_timestamp("2023-01-08 15:00:00")), - ], - ) - ] - context.apply(plan) - - assert ( - context.engine_adapter.fetchone("SELECT MAX(end_ts) FROM memory.sushi.test_allow_partials")[ - 0 - ] - == "2023-01-08 14:59:59.999999" - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_run_respects_excluded_transitive_dependencies(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - # Graph: C <- B <- A - # B is a transitive dependency linking A and C - # Note that the alphabetical ordering of the model names is intentional and helps - # surface the problem - expressions_a = d.parse( - f""" - MODEL ( - name memory.sushi.test_model_c, - kind FULL, - allow_partials true, - cron '@hourly', - ); - - SELECT @execution_ts AS execution_ts - """ - ) - model_c = load_sql_based_model(expressions_a) - context.upsert_model(model_c) - - # A VIEW model with no partials allowed and a daily cron instead of hourly. - expressions_b = d.parse( - f""" - MODEL ( - name memory.sushi.test_model_b, - kind VIEW, - allow_partials false, - cron '@daily', - ); - - SELECT * FROM memory.sushi.test_model_c - """ - ) - model_b = load_sql_based_model(expressions_b) - context.upsert_model(model_b) - - expressions_a = d.parse( - f""" - MODEL ( - name memory.sushi.test_model_a, - kind FULL, - allow_partials true, - cron '@hourly', - ); - - SELECT * FROM memory.sushi.test_model_b - """ - ) - model_a = load_sql_based_model(expressions_a) - context.upsert_model(model_a) - - context.plan("prod", skip_tests=True, auto_apply=True, no_prompts=True) - assert ( - context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_c")["execution_ts"].iloc[ - 0 - ] - == "2023-01-08 15:00:00" - ) - - with time_machine.travel("2023-01-08 17:00:00 UTC", tick=False): - context.run( - "prod", - select_models=["*test_model_c", "*test_model_a"], - no_auto_upstream=True, - ignore_cron=True, - ) - assert ( - context.fetchdf("SELECT execution_ts FROM memory.sushi.test_model_a")[ - "execution_ts" - ].iloc[0] - == "2023-01-08 17:00:00" - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_run_with_select_models_no_auto_upstream( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - model = SqlModel.parse_obj({**model.dict(), "audits": []}) - context.upsert_model(model) - - context.plan("prod", no_prompts=True, skip_tests=True, auto_apply=True) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - assert context.run(select_models=["*waiter_revenue_by_day"], no_auto_upstream=True) - - snapshots = context.state_sync.state_sync.get_snapshots(context.snapshots.values()) - # Only waiter_revenue_by_day should be backfilled up to 2023-01-09. - assert {s.name: s.intervals[0][1] for s in snapshots.values() if s.intervals} == { - '"memory"."sushi"."waiter_revenue_by_day"': to_timestamp("2023-01-09"), - '"memory"."sushi"."order_items"': to_timestamp("2023-01-08"), - '"memory"."sushi"."orders"': to_timestamp("2023-01-08"), - '"memory"."sushi"."items"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customer_revenue_lifetime"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customer_revenue_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."latest_order"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_names"': to_timestamp("2023-01-08"), - '"memory"."sushi"."raw_marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."marketing"': to_timestamp("2023-01-08"), - '"memory"."sushi"."waiter_as_customer_by_day"': to_timestamp("2023-01-08"), - '"memory"."sushi"."top_waiters"': to_timestamp("2023-01-08"), - '"memory"."raw"."demographics"': to_timestamp("2023-01-08"), - "assert_item_price_above_zero": to_timestamp("2023-01-08"), - '"memory"."sushi"."active_customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."customers"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_active"': to_timestamp("2023-01-08"), - '"memory"."sushi"."count_customers_inactive"': to_timestamp("2023-01-08"), - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_select_models(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Modify 2 models. - model = context.get_model("sushi.waiter_revenue_by_day") - kwargs = { - **model.dict(), - # Make a breaking change. - "query": model.query.order_by("waiter_id"), # type: ignore - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - model = context.get_model("sushi.customer_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - expected_intervals = [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ] - - waiter_revenue_by_day_snapshot_id = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id - - # Select one of the modified models. - plan_builder = context.plan_builder( - "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True - ) - snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] - plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) - plan = plan_builder.build() - - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert len(dev_df) == 7 - - # Make sure that we only create a view for the selected model. - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert len(schema_objects) == 1 - assert schema_objects[0].name == "waiter_revenue_by_day" - - # Validate the other modified model. - assert not context.get_snapshot("sushi.customer_revenue_by_day").change_category - assert not context.get_snapshot("sushi.customer_revenue_by_day").version - - # Validate the downstream model. - assert not context.engine_adapter.table_exists( - context.get_snapshot("sushi.top_waiters").table_name() - ) - assert not context.engine_adapter.table_exists( - context.get_snapshot("sushi.top_waiters").table_name(False) - ) - - # Make sure that tables are created when deploying to prod. - plan = context.plan("prod", skip_tests=True) - context.apply(plan) - assert context.engine_adapter.table_exists( - context.get_snapshot("sushi.top_waiters").table_name() - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_select_unchanged_model_for_backfill(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Modify 2 models. - model = context.get_model("sushi.waiter_revenue_by_day") - kwargs = { - **model.dict(), - # Make a breaking change. - "query": d.parse_one( - f"{model.query.sql(dialect='duckdb')} ORDER BY waiter_id", dialect="duckdb" - ), - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - model = context.get_model("sushi.customer_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - expected_intervals = [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ] - - waiter_revenue_by_day_snapshot_id = context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id - - # Select one of the modified models. - plan_builder = context.plan_builder( - "dev", select_models=["*waiter_revenue_by_day"], skip_tests=True - ) - snapshot = plan_builder._context_diff.snapshots[waiter_revenue_by_day_snapshot_id] - plan_builder.set_choice(snapshot, SnapshotChangeCategory.BREAKING) - plan = plan_builder.build() - - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=waiter_revenue_by_day_snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - # Make sure that we only create a view for the selected model. - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == {"waiter_revenue_by_day"} - - # Now select a model downstream from the previously modified one in order to backfill it. - plan = context.plan_builder("dev", select_models=["*top_waiters"], skip_tests=True).build() - - assert not plan.has_changes - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.top_waiters", raise_if_missing=True - ).snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - # Make sure that a view has been created for the downstream selected model. - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == {"waiter_revenue_by_day", "top_waiters"} - - -@time_machine.travel("2023-01-08 00:00:00 UTC") -def test_snapshot_triggers(init_and_plan_context: t.Callable, mocker: MockerFixture): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # auto-restatement triggers - orders = context.get_model("sushi.orders") - orders_kind = { - **orders.kind.dict(), - "auto_restatement_cron": "@hourly", - } - orders_kwargs = { - **orders.dict(), - "kind": orders_kind, - } - context.upsert_model(PythonModel.parse_obj(orders_kwargs)) - - order_items = context.get_model("sushi.order_items") - order_items_kind = { - **order_items.kind.dict(), - "auto_restatement_cron": "@hourly", - } - order_items_kwargs = { - **order_items.dict(), - "kind": order_items_kind, - } - context.upsert_model(PythonModel.parse_obj(order_items_kwargs)) - - waiter_revenue_by_day = context.get_model("sushi.waiter_revenue_by_day") - waiter_revenue_by_day_kind = { - **waiter_revenue_by_day.kind.dict(), - "auto_restatement_cron": "@hourly", - } - waiter_revenue_by_day_kwargs = { - **waiter_revenue_by_day.dict(), - "kind": waiter_revenue_by_day_kind, - } - context.upsert_model(SqlModel.parse_obj(waiter_revenue_by_day_kwargs)) - - context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) - - scheduler = context.scheduler() - - import sqlmesh - - spy = mocker.spy(sqlmesh.core.scheduler.Scheduler, "run_merged_intervals") - - with time_machine.travel("2023-01-09 00:00:01 UTC"): - scheduler.run( - environment=c.PROD, - start="2023-01-01", - auto_restatement_enabled=True, - ) - - assert spy.called - - actual_triggers = spy.call_args.kwargs["auto_restatement_triggers"] - actual_triggers = {k: v for k, v in actual_triggers.items() if v} - assert len(actual_triggers) == 12 - - for id, trigger in actual_triggers.items(): - model_name = id.name.replace('"memory"."sushi".', "").replace('"', "") - auto_restatement_triggers = [ - t.name.replace('"memory"."sushi".', "").replace('"', "") for t in trigger - ] - - if model_name in ("orders", "order_items", "waiter_revenue_by_day"): - assert auto_restatement_triggers == [model_name] - elif model_name in ("customer_revenue_lifetime", "customer_revenue_by_day"): - assert sorted(auto_restatement_triggers) == sorted(["orders", "order_items"]) - elif model_name == "top_waiters": - assert auto_restatement_triggers == ["waiter_revenue_by_day"] - else: - assert auto_restatement_triggers == ["orders"] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_max_interval_end_per_model_not_applied_when_end_is_provided( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - context.run() - - plan = context.plan_builder( - restate_models=["*"], start="2023-01-09", end="2023-01-09" - ).build() - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_select_models_for_backfill(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - expected_intervals = [ - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ] - - plan = context.plan_builder( - "dev", backfill_models=["+*waiter_revenue_by_day"], skip_tests=True - ).build() - - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.items", raise_if_missing=True).snapshot_id, - intervals=expected_intervals, - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.order_items", raise_if_missing=True - ).snapshot_id, - intervals=expected_intervals, - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.orders", raise_if_missing=True).snapshot_id, - intervals=expected_intervals, - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ).snapshot_id, - intervals=expected_intervals, - ), - ] - - context.apply(plan) - - dev_df = context.engine_adapter.fetchdf( - "SELECT DISTINCT event_date FROM sushi__dev.waiter_revenue_by_day ORDER BY event_date" - ) - assert len(dev_df) == 1 - - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == { - "items", - "order_items", - "orders", - "waiter_revenue_by_day", - } - - assert not context.engine_adapter.table_exists( - context.get_snapshot("sushi.customer_revenue_by_day").table_name() - ) - - # Make sure that tables are created when deploying to prod. - plan = context.plan("prod") - context.apply(plan) - assert context.engine_adapter.table_exists( - context.get_snapshot("sushi.customer_revenue_by_day").table_name() - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_select_star_is_directly_modified(sushi_test_dbt_context: Context): - context = sushi_test_dbt_context - - model = context.get_model("sushi.simple_model_a") - context.upsert_model( - model, - query_=ParsableSql(sql="SELECT 1 AS a, 2 AS b"), - ) - - snapshot_a_id = context.get_snapshot("sushi.simple_model_a").snapshot_id # type: ignore - snapshot_b_id = context.get_snapshot("sushi.simple_model_b").snapshot_id # type: ignore - - plan = context.plan_builder("dev", skip_tests=True).build() - assert plan.directly_modified == {snapshot_a_id, snapshot_b_id} - assert {i.snapshot_id for i in plan.missing_intervals} == {snapshot_a_id, snapshot_b_id} - - assert plan.snapshots[snapshot_a_id].change_category == SnapshotChangeCategory.NON_BREAKING - assert plan.snapshots[snapshot_b_id].change_category == SnapshotChangeCategory.NON_BREAKING - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_is_incremental_table_is_missing(sushi_test_dbt_context: Context): - context = sushi_test_dbt_context - - model = context.get_model("sushi.waiter_revenue_by_day_v2") - model = model.copy(update={"kind": IncrementalUnmanagedKind(), "start": "2023-01-01"}) - context.upsert_model(model) - context._standalone_audits["test_top_waiters"].start = "2023-01-01" - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - snapshot = context.get_snapshot("sushi.waiter_revenue_by_day_v2") - assert snapshot - - # Manually drop the table - context.engine_adapter.drop_table(snapshot.table_name()) - - context.snapshot_evaluator.evaluate( - snapshot, - start="2023-01-01", - end="2023-01-08", - execution_time="2023-01-08 15:00:00", - snapshots={s.name: s for s in context.snapshots.values()}, - deployability_index=DeployabilityIndex.all_deployable(), - ) - - # Make sure the table was recreated - assert context.engine_adapter.table_exists(snapshot.table_name()) - - -def test_model_attr(sushi_test_dbt_context: Context, assert_exp_eq): - context = sushi_test_dbt_context - model = context.get_model("sushi.top_waiters") - assert_exp_eq( - model.render_query(), - """ - SELECT - CAST("waiter_id" AS INT) AS "waiter_id", - CAST("revenue" AS DOUBLE) AS "revenue", - 3 AS "model_columns" - FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" - WHERE - "ds" = ( - SELECT - MAX("ds") - FROM "memory"."sushi"."waiter_revenue_by_day_v2" AS "waiter_revenue_by_day_v2" - ) - ORDER BY - "revenue" DESC NULLS FIRST - LIMIT 10 - """, - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_incremental_by_partition(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - source_name = "raw.test_incremental_by_partition" - model_name = "memory.sushi.test_incremental_by_partition" - - expressions = d.parse( - f""" - MODEL ( - name {model_name}, - kind INCREMENTAL_BY_PARTITION (disable_restatement false), - partitioned_by [key], - allow_partials true, - start '2023-01-07', - ); - - SELECT key, value FROM {source_name}; - """ - ) - model = load_sql_based_model(expressions) - context.upsert_model(model) - - context.engine_adapter.ctas( - source_name, - d.parse_one("SELECT 'key_a' AS key, 1 AS value"), - ) - - context.plan(auto_apply=True, no_prompts=True) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_a", 1), - ] - - context.engine_adapter.replace_query( - source_name, - d.parse_one("SELECT 'key_b' AS key, 1 AS value"), - ) - context.run(ignore_cron=True) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_a", 1), - ("key_b", 1), - ] - - context.engine_adapter.replace_query( - source_name, - d.parse_one("SELECT 'key_a' AS key, 2 AS value"), - ) - # Run 1 minute later. - with time_machine.travel("2023-01-08 15:01:00 UTC"): - context.run(ignore_cron=True) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_b", 1), - ("key_a", 2), - ] - - # model should fully refresh on restatement - context.engine_adapter.replace_query( - source_name, - d.parse_one("SELECT 'key_c' AS key, 3 AS value"), - ) - context.plan(auto_apply=True, no_prompts=True, restate_models=[model_name]) - assert context.engine_adapter.fetchall(f"SELECT * FROM {model_name}") == [ - ("key_c", 3), - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_custom_materialization(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - custom_insert_called = False - - class CustomFullMaterialization(CustomMaterialization): - NAME = "test_custom_full" - - def insert( - self, - table_name: str, - query_or_df: QueryOrDF, - model: Model, - is_first_insert: bool, - render_kwargs: t.Dict[str, t.Any], - **kwargs: t.Any, - ) -> None: - nonlocal custom_insert_called - custom_insert_called = True - - self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) - - model = context.get_model("sushi.top_waiters") - kwargs = { - **model.dict(), - # Make a breaking change. - "kind": dict(name="CUSTOM", materialization="test_custom_full"), - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - context.plan(auto_apply=True, no_prompts=True) - - assert custom_insert_called - - -# needs to be defined at the top level. If its defined within the test body, -# adding to the snapshot cache fails with: AttributeError: Can't pickle local object -class TestCustomKind(CustomKind): - __test__ = False # prevent pytest warning since this isnt a class containing tests - - @property - def custom_property(self) -> str: - return validate_string(self.materialization_properties.get("custom_property")) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_custom_materialization_with_custom_kind(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - custom_insert_calls = [] - - class CustomFullMaterialization(CustomMaterialization[TestCustomKind]): - NAME = "test_custom_full_with_custom_kind" - - def insert( - self, - table_name: str, - query_or_df: QueryOrDF, - model: Model, - is_first_insert: bool, - render_kwargs: t.Dict[str, t.Any], - **kwargs: t.Any, - ) -> None: - assert isinstance(model.kind, TestCustomKind) - - nonlocal custom_insert_calls - custom_insert_calls.append(model.kind.custom_property) - - self._replace_query_for_model(model, table_name, query_or_df, render_kwargs) - - model = context.get_model("sushi.top_waiters") - kwargs = { - **model.dict(), - # Make a breaking change. - "kind": dict( - name="CUSTOM", - materialization="test_custom_full_with_custom_kind", - materialization_properties={"custom_property": "pytest"}, - ), - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - - context.plan(auto_apply=True) - - assert custom_insert_calls == ["pytest"] - - # no changes - context.plan(auto_apply=True) - - assert custom_insert_calls == ["pytest"] - - # change a property on the custom kind, breaking change - kwargs["kind"]["materialization_properties"]["custom_property"] = "some value" - context.upsert_model(SqlModel.parse_obj(kwargs)) - context.plan(auto_apply=True) - - assert custom_insert_calls == ["pytest", "some value"] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_view_model_non_representative_snapshot( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - # Forward-only parent - forward_only_model_name = "memory.sushi.test_forward_only_model" - forward_only_model_expressions = d.parse( - f""" - MODEL ( - name {forward_only_model_name}, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - ), - ); - - SELECT '2023-01-01' AS ds, 'value' AS value; - """ - ) - forward_only_model = load_sql_based_model(forward_only_model_expressions) - assert forward_only_model.forward_only - context.upsert_model(forward_only_model) - - # FULL downstream model. - full_downstream_model_name = "memory.sushi.test_full_downstream_model" - full_downstream_model_expressions = d.parse( - f""" - MODEL ( - name {full_downstream_model_name}, - kind FULL, - ); - - SELECT ds, value FROM {forward_only_model_name}; - """ - ) - full_downstream_model = load_sql_based_model(full_downstream_model_expressions) - context.upsert_model(full_downstream_model) - - # VIEW downstream of the previous FULL model. - view_downstream_model_name = "memory.sushi.test_view_downstream_model" - view_downstream_model_expressions = d.parse( - f""" - MODEL ( - name {view_downstream_model_name}, - kind VIEW, - ); - - SELECT ds, value FROM {full_downstream_model_name}; - """ - ) - view_downstream_model = load_sql_based_model(view_downstream_model_expressions) - context.upsert_model(view_downstream_model) - - # Apply the initial plan with all 3 models. - context.plan(auto_apply=True, no_prompts=True) - - # Make a change to the forward-only model and apply it in dev. - context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) - forward_only_model_snapshot_id = context.get_snapshot(forward_only_model_name).snapshot_id - full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id - view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id - dev_plan = context.plan("dev", auto_apply=True, no_prompts=True, enable_preview=False) - assert ( - dev_plan.snapshots[forward_only_model_snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - dev_plan.snapshots[full_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - dev_plan.snapshots[view_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert not dev_plan.missing_intervals - - # Make a follow-up breaking change to the downstream full model. - new_full_downstream_model_expressions = d.parse( - f""" - MODEL ( - name {full_downstream_model_name}, - kind FULL, - ); - - SELECT ds, 'new_value' AS value FROM {forward_only_model_name}; - """ - ) - new_full_downstream_model = load_sql_based_model(new_full_downstream_model_expressions) - context.upsert_model(new_full_downstream_model) - full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id - view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id - dev_plan = context.plan( - "dev", - categorizer_config=CategorizerConfig.all_full(), - auto_apply=True, - no_prompts=True, - enable_preview=False, - ) - assert ( - dev_plan.snapshots[full_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - dev_plan.snapshots[view_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_BREAKING - ) - assert len(dev_plan.missing_intervals) == 2 - assert dev_plan.missing_intervals[0].snapshot_id == full_downstream_model_snapshot_id - assert dev_plan.missing_intervals[1].snapshot_id == view_downstream_model_snapshot_id - - # Check that the representative view hasn't been created yet. - assert not context.engine_adapter.table_exists( - context.get_snapshot(view_downstream_model_name).table_name() - ) - - # Now promote the very first change to prod without promoting the 2nd breaking change. - context.upsert_model(full_downstream_model) - context.plan(auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full()) - - # Finally, make a non-breaking change to the full model in the same dev environment. - context.upsert_model(add_projection_to_model(t.cast(SqlModel, new_full_downstream_model))) - full_downstream_model_snapshot_id = context.get_snapshot(full_downstream_model_name).snapshot_id - view_downstream_model_snapshot_id = context.get_snapshot(view_downstream_model_name).snapshot_id - dev_plan = context.plan( - "dev", - categorizer_config=CategorizerConfig.all_full(), - auto_apply=True, - no_prompts=True, - enable_preview=False, - ) - assert ( - dev_plan.snapshots[full_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - dev_plan.snapshots[view_downstream_model_snapshot_id].change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - - # Deploy changes to prod - context.plan("prod", auto_apply=True, no_prompts=True) - - # Check that the representative view has been created. - assert context.engine_adapter.table_exists( - context.get_snapshot(view_downstream_model_name).table_name() - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_view_model_non_representative_snapshot_migration( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - forward_only_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.forward_only_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - forward_only_model = load_sql_based_model(forward_only_model_expr) - context.upsert_model(forward_only_model) - - downstream_view_a_expr = d.parse( - """ - MODEL ( - name memory.sushi.downstream_view_a, - kind VIEW, - ); - - SELECT a from memory.sushi.forward_only_model; - """ - ) - downstream_view_a = load_sql_based_model(downstream_view_a_expr) - context.upsert_model(downstream_view_a) - - downstream_view_b_expr = d.parse( - """ - MODEL ( - name memory.sushi.downstream_view_b, - kind VIEW, - ); - - SELECT a from memory.sushi.downstream_view_a; - """ - ) - downstream_view_b = load_sql_based_model(downstream_view_b_expr) - context.upsert_model(downstream_view_b) - - context.plan(auto_apply=True, no_prompts=True, skip_tests=True) - - # Make a forward-only change - context.upsert_model(add_projection_to_model(t.cast(SqlModel, forward_only_model))) - # Make a non-breaking change downstream - context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_view_a))) - - context.plan(auto_apply=True, no_prompts=True, skip_tests=True) - - # Make sure the downstrean indirect non-breaking view is available in prod - count = context.engine_adapter.fetchone("SELECT COUNT(*) FROM memory.sushi.downstream_view_b")[ - 0 - ] - assert count > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@pytest.mark.parametrize( - "parent_a_category,parent_b_category,expected_child_category", - [ - ( - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.INDIRECT_BREAKING, - ), - ( - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.INDIRECT_NON_BREAKING, - ), - ( - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.INDIRECT_NON_BREAKING, - ), - ( - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.INDIRECT_BREAKING, - ), - ( - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - ), - ( - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - ), - ( - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.BREAKING, - SnapshotChangeCategory.INDIRECT_BREAKING, - ), - ( - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.NON_BREAKING, - SnapshotChangeCategory.INDIRECT_NON_BREAKING, - ), - ( - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - SnapshotChangeCategory.METADATA, - ), - ], -) -def test_rebase_two_changed_parents( - init_and_plan_context: t.Callable, - parent_a_category: SnapshotChangeCategory, # This change is deployed to prod first - parent_b_category: SnapshotChangeCategory, # This change is deployed to prod second - expected_child_category: SnapshotChangeCategory, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - initial_model_a = context.get_model("sushi.orders") - initial_model_b = context.get_model("sushi.items") - - # Make change A and deploy it to dev_a - context.upsert_model(initial_model_a.name, stamp="1") - plan_builder = context.plan_builder("dev_a", skip_tests=True) - plan_builder.set_choice(context.get_snapshot(initial_model_a.name), parent_a_category) - context.apply(plan_builder.build()) - - # Make change B and deploy it to dev_b - context.upsert_model(initial_model_a) - context.upsert_model(initial_model_b.name, stamp="1") - plan_builder = context.plan_builder("dev_b", skip_tests=True) - plan_builder.set_choice(context.get_snapshot(initial_model_b.name), parent_b_category) - context.apply(plan_builder.build()) - - # Deploy change A to prod - context.upsert_model(initial_model_a.name, stamp="1") - context.upsert_model(initial_model_b) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Apply change B in addition to A and plan against prod - context.upsert_model(initial_model_b.name, stamp="1") - plan = context.plan_builder("prod", skip_tests=True).build() - - # Validate the category of child snapshots - direct_child_snapshot = plan.snapshots[context.get_snapshot("sushi.order_items").snapshot_id] - assert direct_child_snapshot.change_category == expected_child_category - - indirect_child_snapshot = plan.snapshots[context.get_snapshot("sushi.top_waiters").snapshot_id] - assert indirect_child_snapshot.change_category == expected_child_category - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_unaligned_start_snapshot_with_non_deployable_downstream(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - downstream_model_name = "memory.sushi.customer_max_revenue" - - expressions = d.parse( - f""" - MODEL ( - name {downstream_model_name}, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key customer_id, - forward_only true, - ), - ); - - SELECT - customer_id, MAX(revenue) AS max_revenue - FROM memory.sushi.customer_revenue_lifetime - GROUP BY 1; - """ - ) - - downstream_model = load_sql_based_model(expressions) - assert downstream_model.forward_only - context.upsert_model(downstream_model) - - context.plan(auto_apply=True, no_prompts=True) - - customer_revenue_lifetime_model = context.get_model("sushi.customer_revenue_lifetime") - kwargs = { - **customer_revenue_lifetime_model.dict(), - "name": "memory.sushi.customer_revenue_lifetime_new", - "kind": dict( - name="INCREMENTAL_UNMANAGED" - ), # Make it incremental unmanaged to ensure the depends_on_past behavior. - } - context.upsert_model(SqlModel.parse_obj(kwargs)) - context.upsert_model( - downstream_model_name, - query_=ParsableSql( - sql="SELECT customer_id, MAX(revenue) AS max_revenue FROM memory.sushi.customer_revenue_lifetime_new GROUP BY 1" - ), - ) - - plan = context.plan_builder("dev", enable_preview=True).build() - assert {s.name for s in plan.new_snapshots} == { - '"memory"."sushi"."customer_revenue_lifetime_new"', - '"memory"."sushi"."customer_max_revenue"', - } - for snapshot_interval in plan.missing_intervals: - assert not plan.deployability_index.is_deployable(snapshot_interval.snapshot_id) - assert snapshot_interval.intervals[0][0] == to_timestamp("2023-01-07") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - - assert all( - s.virtual_environment_mode.is_dev_only or not s.is_model or s.is_symbolic - for s in context.snapshots.values() - ) - - # Init prod - context.plan("prod", auto_apply=True, no_prompts=True) - - # Make a change in dev - original_model = context.get_model("sushi.waiter_revenue_by_day") - original_fingerprint = context.get_snapshot(original_model.name).fingerprint - model = original_model.copy( - update={ - "query_": ParsableSql( - sql=original_model.query.order_by("waiter_id").sql(dialect=original_model.dialect) - ) - } - ) - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - - plan_dev = context.plan_builder("dev").build() - assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") - assert plan_dev.requires_backfill - assert plan_dev.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, - intervals=[(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ), - ] - assert plan_dev.context_diff.snapshots[context.get_snapshot(model.name).snapshot_id].intervals - assert plan_dev.context_diff.snapshots[ - context.get_snapshot("sushi.top_waiters").snapshot_id - ].intervals - assert plan_dev.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].dev_intervals - assert plan_dev.context_diff.snapshots[ - context.get_snapshot("sushi.top_waiters").snapshot_id - ].dev_intervals - context.apply(plan_dev) - - # Make sure the waiter_revenue_by_day model is a table in prod and a view in dev - table_types_df = context.engine_adapter.fetchdf( - "SELECT table_schema, table_type FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 'waiter_revenue_by_day'" - ) - assert table_types_df.to_dict("records") == [ - {"table_schema": "sushi", "table_type": "BASE TABLE"}, - {"table_schema": "sushi__dev", "table_type": "VIEW"}, - ] - - # Check that the specified dates were backfilled - min_event_date = context.engine_adapter.fetchone( - "SELECT MIN(event_date) FROM sushi__dev.waiter_revenue_by_day" - )[0] - assert min_event_date == to_date("2023-01-07") - - # Make sure the changes are applied without backfill in prod - plan_prod = context.plan_builder("prod").build() - assert not plan_prod.requires_backfill - assert not plan_prod.missing_intervals - context.apply(plan_prod) - assert "one" in context.engine_adapter.columns("sushi.waiter_revenue_by_day") - - # Make sure the revert of a breaking changes results in a full rebuild - context.upsert_model(original_model) - assert context.get_snapshot(original_model.name).fingerprint == original_fingerprint - - plan_prod = context.plan_builder( - "prod", allow_destructive_models=["sushi.waiter_revenue_by_day"] - ).build() - assert not plan_prod.requires_backfill - assert not plan_prod.missing_intervals - context.apply(plan_prod) - assert "one" not in context.engine_adapter.columns("sushi.waiter_revenue_by_day") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - # Change to full kind - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.missing_intervals - assert prod_plan.requires_backfill - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - # Change back to view - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": ViewKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "view" - - # Change to incremental - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": IncrementalUnmanagedKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - # Change back to full - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change_incremental( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - - forward_only_model_name = "memory.sushi.test_forward_only_model" - forward_only_model_expressions = d.parse( - f""" - MODEL ( - name {forward_only_model_name}, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - ), - ); - - SELECT '2023-01-01' AS ds, 'value' AS value; - """ - ) - forward_only_model = load_sql_based_model(forward_only_model_expressions) - forward_only_model = forward_only_model.copy( - update={"virtual_environment_mode": VirtualEnvironmentMode.DEV_ONLY} - ) - context.upsert_model(forward_only_model) - - context.plan("prod", auto_apply=True, no_prompts=True) - - # Change to view - model = context.get_model(forward_only_model_name) - original_kind = model.kind - model = model.copy(update={"kind": ViewKind()}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "view" - - model = model.copy(update={"kind": original_kind}) - context.upsert_model(model) - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"test_forward_only_model"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change_with_follow_up_changes_in_dev( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - # Make sure the initial state is a view - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "view" - - # Change to incremental unmanaged kind - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": IncrementalUnmanagedKind()}) - context.upsert_model(model) - dev_plan = context.plan_builder("dev", skip_tests=True).build() - assert dev_plan.missing_intervals - assert dev_plan.requires_backfill - context.apply(dev_plan) - - # Make a follow-up forward-only change - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - dev_plan = context.plan_builder("dev", skip_tests=True, forward_only=True).build() - context.apply(dev_plan) - - # Deploy to prod - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals - assert not prod_plan.context_diff.snapshots[ - context.get_snapshot(model.name).snapshot_id - ].intervals - context.apply(prod_plan) - data_objects = context.engine_adapter.get_data_objects("sushi", {"top_waiters"}) - assert len(data_objects) == 1 - assert data_objects[0].type == "table" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_kind_change_manual_categorization( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.apply(plan) - - model = context.get_model("sushi.top_waiters") - model = model.copy(update={"kind": FullKind()}) - context.upsert_model(model) - dev_plan_builder = context.plan_builder("dev", skip_tests=True, no_auto_categorization=True) - dev_plan_builder.set_choice( - dev_plan_builder._context_diff.snapshots[context.get_snapshot(model.name).snapshot_id], - SnapshotChangeCategory.NON_BREAKING, - ) - dev_plan = dev_plan_builder.build() - assert dev_plan.requires_backfill - assert len(dev_plan.missing_intervals) == 1 - context.apply(dev_plan) - - prod_plan = context.plan_builder("prod", skip_tests=True).build() - assert prod_plan.requires_backfill - assert prod_plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=context.get_snapshot("sushi.top_waiters").snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ), - ] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_seed_model_change( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.load() - context.plan("prod", auto_apply=True, no_prompts=True) - - seed_model = context.get_model("sushi.waiter_names") - with open(seed_model.seed_path, "a") as fd: - fd.write("\n123,New Test Name") - - context.load() - seed_model_snapshot = context.get_snapshot("sushi.waiter_names") - plan = context.plan_builder("dev").build() - assert plan.directly_modified == {seed_model_snapshot.snapshot_id} - assert len(plan.missing_intervals) == 2 - context.apply(plan) - - actual_seed_df_in_dev = context.fetchdf("SELECT * FROM sushi__dev.waiter_names WHERE id = 123") - assert actual_seed_df_in_dev.to_dict("records") == [{"id": 123, "name": "New Test Name"}] - actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") - assert actual_seed_df_in_prod.empty - - plan = context.plan_builder("prod").build() - assert plan.directly_modified == {seed_model_snapshot.snapshot_id} - assert len(plan.missing_intervals) == 1 - assert plan.missing_intervals[0].snapshot_id == seed_model_snapshot.snapshot_id - context.apply(plan) - - actual_seed_df_in_prod = context.fetchdf("SELECT * FROM sushi.waiter_names WHERE id = 123") - assert actual_seed_df_in_prod.to_dict("records") == [{"id": 123, "name": "New Test Name"}] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_virtual_environment_mode_dev_only_model_change_downstream_of_seed( - init_and_plan_context: t.Callable, -): - """This test covers a scenario when a model downstream of a seed model is modified and explicitly selected - causing an (unhydrated) seed model to sourced from the state. If SQLMesh attempts to create - a table for the unchanged seed model, it will fail because the seed model is not hydrated. - """ - context, _ = init_and_plan_context( - "examples/sushi", config="test_config_virtual_environment_mode_dev_only" - ) - context.load() - context.plan("prod", auto_apply=True, no_prompts=True) - - # Make sure that a different version of the seed model is loaded - seed_model = context.get_model("sushi.waiter_names") - seed_model = seed_model.copy(update={"stamp": "force new version"}) - context.upsert_model(seed_model) - - # Make a change to the downstream model - model = context.get_model("sushi.waiter_as_customer_by_day") - model = model.copy(update={"stamp": "force new version"}) - context.upsert_model(model) - - # It is important to clear the cache so that the hydrated seed model is not sourced from the cache - context.clear_caches() - - # Make sure to use the selector so that the seed model is sourced from the state - plan = context.plan_builder("dev", select_models=[model.name]).build() - assert len(plan.directly_modified) == 1 - assert list(plan.directly_modified)[0].name == model.fqn - assert len(plan.missing_intervals) == 1 - assert plan.missing_intervals[0].snapshot_id.name == model.fqn - - # Make sure there's no error when applying the plan - context.apply(plan) - context.plan("prod", auto_apply=True, no_prompts=True) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_plan_ignores_changes(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - restated_snapshot = context.get_snapshot("sushi.top_waiters") - - # Simulate a change. - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - plan = context.plan_builder(restate_models=["sushi.top_waiters"]).build() - assert plan.snapshots != context.snapshots - - assert not plan.directly_modified - assert not plan.has_changes - assert not plan.new_snapshots - assert plan.requires_backfill - assert plan.restatements == { - restated_snapshot.snapshot_id: (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) - } - assert plan.missing_intervals == [ - SnapshotIntervals( - snapshot_id=restated_snapshot.snapshot_id, - intervals=[ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ) - ] - - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_plan_across_environments_snapshot_with_shared_version( - init_and_plan_context: t.Callable, -): - context, _ = init_and_plan_context("examples/sushi") - - # Change kind to incremental unmanaged - model = context.get_model("sushi.waiter_revenue_by_day") - previous_kind = model.kind.copy(update={"forward_only": True}) - assert isinstance(previous_kind, IncrementalByTimeRangeKind) - - model = model.copy( - update={ - "kind": IncrementalUnmanagedKind(), - "physical_version": "pinned_version_12345", - "partitioned_by_": [exp.column("event_date")], - } - ) - context.upsert_model(model) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Make some change and deploy it to both dev and prod environments - model = add_projection_to_model(t.cast(SqlModel, model)) - context.upsert_model(model) - context.plan("dev_a", auto_apply=True, no_prompts=True) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Change the kind back to incremental by time range and deploy to prod - model = model.copy(update={"kind": previous_kind}) - context.upsert_model(model) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Restate the model and verify that the interval hasn't been expanded because of the old snapshot - # with the same version - context.plan( - restate_models=["sushi.waiter_revenue_by_day"], - start="2023-01-06", - end="2023-01-08", - auto_apply=True, - no_prompts=True, - ) - - assert ( - context.fetchdf( - "SELECT COUNT(*) AS cnt FROM sushi.waiter_revenue_by_day WHERE one IS NOT NULL AND event_date < '2023-01-06'" - )["cnt"][0] - == 0 - ) - plan = context.plan_builder("prod").build() - assert not plan.missing_intervals - - -def test_restatement_plan_hourly_with_downstream_daily_restates_correct_intervals(tmp_path: Path): - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind FULL, - cron '@daily' - ); - - select account_id, ts from test.a; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply - ctx.plan(auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} wasnt cleared" - - # Put some data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 01:30:00", - "2024-01-01 23:30:00", - "2024-01-02 03:30:00", - "2024-01-03 12:30:00", - ], - } - ) - engine_adapter.replace_query( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # Restate A across a day boundary with the expectation that two day intervals in B are affected - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 02:00:00", - end="2024-01-02 04:00:00", - auto_apply=True, - no_prompts=True, - ) - - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", # present already - # "2024-01-01 02:30:00", #removed in last restatement - "2024-01-01 23:30:00", # added in last restatement - "2024-01-02 03:30:00", # added in last restatement - ], f"Table {tbl} wasnt cleared" - - -def test_restatement_plan_respects_disable_restatements(tmp_path: Path): - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01', - cron '@daily' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts", - disable_restatement true, - ), - start '2024-01-01', - cron '@daily' - ); - - select account_id, ts from test.a; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a, "b.sql": model_b}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply - ctx.plan(auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - def get_snapshot_intervals(snapshot_id): - return list(ctx.state_sync.get_snapshots([snapshot_id]).values())[0].intervals - - # verify initial state - for tbl in ["test.a", "test.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A and expect b to be ignored - starting_b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01", - end="2024-01-02", - auto_apply=True, - no_prompts=True, - ) - - # verify A was changed and not b - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - assert _dates_in_table("test.b") == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # Verify B intervals were not touched - b_intervals = get_snapshot_intervals(ctx.snapshots['"memory"."test"."b"'].snapshot_id) - assert starting_b_intervals == b_intervals - - -def test_restatement_plan_clears_correct_intervals_across_environments(tmp_path: Path): - model1 = """ - MODEL ( - name test.incremental_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "date" - ), - start '2024-01-01', - cron '@daily' - ); - - select account_id, date from test.external_table; - """ - - model2 = """ - MODEL ( - name test.downstream_of_incremental, - kind FULL - ); - - select account_id, date from test.incremental_model; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "model1.sql", "w") as f: - f.write(model1) - - with open(models_dir / "model2.sql", "w") as f: - f.write(model2) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004, 1005], - "name": ["foo", "bar", "baz", "bing", "bong"], - "date": ["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05"], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "name": exp.DataType.build("varchar"), - "date": exp.DataType.build("date"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # first, create the prod models - ctx.plan(auto_apply=True, no_prompts=True) - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) - assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (5,) - assert not engine_adapter.table_exists("test__dev.incremental_model") - - # then, make a dev version - model1 = """ - MODEL ( - name test.incremental_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "date" - ), - start '2024-01-01', - cron '@daily' - ); - - select 1 as account_id, date from test.external_table; - """ - with open(models_dir / "model1.sql", "w") as f: - f.write(model1) - ctx.load() - - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - assert engine_adapter.table_exists("test__dev.incremental_model") - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (5,) - - # drop some source data so when we restate the interval it essentially clears it which is easy to verify - engine_adapter.execute("delete from test.external_table where date = '2024-01-01'") - assert engine_adapter.fetchone("select count(*) from test.external_table") == (4,) - - # now, restate intervals in dev and verify prod is NOT affected - ctx.plan( - environment="dev", - start="2024-01-01", - end="2024-01-02", - restate_models=["test.incremental_model"], - auto_apply=True, - no_prompts=True, - ) - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-01'" - ) == (1,) - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-01'" - ) == (0,) - - # prod still should not be affected by a run because the restatement only happened in dev - ctx.run() - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (5,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-01'" - ) == (1,) - - # drop another interval from the source data - engine_adapter.execute("delete from test.external_table where date = '2024-01-02'") - - # now, restate intervals in prod and verify that dev IS affected - ctx.plan( - start="2024-01-01", - end="2024-01-03", - restate_models=["test.incremental_model"], - auto_apply=True, - no_prompts=True, - ) - assert engine_adapter.fetchone("select count(*) from test.incremental_model") == (3,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-01'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-02'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test.incremental_model where date = '2024-01-03'" - ) == (1,) - - # dev not affected yet until `sqlmesh run` is run - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (4,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-01'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-02'" - ) == (1,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-03'" - ) == (1,) - - # the restatement plan for prod should have cleared dev intervals too, which means this `sqlmesh run` re-runs 2024-01-01 and 2024-01-02 - ctx.run(environment="dev") - assert engine_adapter.fetchone("select count(*) from test__dev.incremental_model") == (3,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-01'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-02'" - ) == (0,) - assert engine_adapter.fetchone( - "select count(*) from test__dev.incremental_model where date = '2024-01-03'" - ) == (1,) - - # the downstream full model should always reflect whatever the incremental model is showing - assert engine_adapter.fetchone("select count(*) from test.downstream_of_incremental") == (3,) - assert engine_adapter.fetchone("select count(*) from test__dev.downstream_of_incremental") == ( - 3, - ) - - -def test_prod_restatement_plan_clears_correct_intervals_in_derived_dev_tables(tmp_path: Path): - """ - Scenario: - I have models A[hourly] <- B[daily] <- C in prod - I create dev and add 2 new models D and E so that my dev DAG looks like A <- B <- C <- D[daily] <- E - I prod, I restate *one hour* of A - Outcome: - D and E should be restated in dev despite not being a part of prod - since B and D are daily, the whole day should be restated even though only 1hr of the upstream model was restated - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - def _derived_full_model_def(name: str, upstream: str) -> str: - return f""" - MODEL ( - name test.{name}, - kind FULL - ); - - select account_id, ts from test.{upstream}; - """ - - def _derived_incremental_model_def(name: str, upstream: str) -> str: - return f""" - MODEL ( - name test.{name}, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.{upstream} where ts between @start_ts and @end_ts; - """ - - model_b = _derived_incremental_model_def("b", upstream="a") - model_c = _derived_full_model_def("c", upstream="b") - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a, "b.sql": model_b, "c.sql": model_c}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A, B, C in prod - ctx.plan(auto_apply=True, no_prompts=True) - - # add D[daily], E in dev - model_d = _derived_incremental_model_def("d", upstream="c") - model_e = _derived_full_model_def("e", upstream="d") - - for path, defn in { - "d.sql": model_d, - "e.sql": model_e, - }.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test.b", "test.c", "test__dev.d", "test__dev.e"]: - assert engine_adapter.table_exists(tbl) - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - for tbl in ["test.d", "test.e"]: - assert not engine_adapter.table_exists(tbl) - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - for tbl in ["test.a", "test.b", "test.c"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} wasnt cleared" - - # dev shouldnt have been affected yet - for tbl in ["test__dev.d", "test__dev.e"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} was prematurely cleared" - - # run dev to trigger the processing of the prod restatement - ctx.run(environment="dev") - - # data should now be cleared from dev - # note that D is a daily model, so clearing an hour interval from A should have triggered the full day in D - for tbl in ["test__dev.d", "test__dev.e"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], f"Table {tbl} wasnt cleared" - - -def test_prod_restatement_plan_clears_unaligned_intervals_in_derived_dev_tables(tmp_path: Path): - """ - Scenario: - I have a model A[hourly] in prod - I create dev and add a model B[daily] - I prod, I restate *one hour* of A - - Outcome: - The whole day for B should be restated. The restatement plan for prod has no hints about B's cadence because - B only exists in dev and there are no other downstream models in prod that would cause the restatement intervals - to be widened. - - Therefore, this test checks that SQLMesh does the right thing when an interval is partially cleared - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.a where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[hourly] in prod - ctx.plan(auto_apply=True, no_prompts=True) - - # add B[daily] in dev - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # dev shouldnt have been affected yet - assert _dates_in_table("test__dev.b") == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # mess with A independently of SQLMesh to prove a whole day gets restated for B instead of just 1hr - snapshot_table_name = ctx.table_name("test.a", "dev") - engine_adapter.execute( - f"delete from {snapshot_table_name} where cast(ts as date) == '2024-01-01'" - ) - engine_adapter.execute( - f"insert into {snapshot_table_name} (account_id, ts) values (1007, '2024-01-02 01:30:00')" - ) - - assert _dates_in_table("test.a") == ["2024-01-02 00:30:00", "2024-01-02 01:30:00"] - - # run dev to trigger the processing of the prod restatement - ctx.run(environment="dev") - - # B should now have no data for 2024-01-01 - # To prove a single day was restated vs the whole model, it also shouldnt have the '2024-01-02 01:30:00' record - assert _dates_in_table("test__dev.b") == ["2024-01-02 00:30:00"] - - -def test_prod_restatement_plan_causes_dev_intervals_to_be_processed_in_next_dev_plan( - tmp_path: Path, -): - """ - Scenario: - I have a model A[hourly] in prod - I create dev and add a model B[daily] - I prod, I restate *one hour* of A - In dev, I run a normal plan instead of a cadence run - - Outcome: - The whole day for B should be restated as part of a normal plan - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.a where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[hourly] in prod - ctx.plan(auto_apply=True, no_prompts=True) - - # add B[daily] in dev - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-01 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-01 01:00:00", - end="2024-01-01 02:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # dev shouldnt have been affected yet - assert _dates_in_table("test__dev.b") == [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - # plan dev which should trigger the missing intervals to get repopulated - ctx.plan(environment="dev", auto_apply=True, no_prompts=True) - - # dev should have the restated data - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ] - - -def test_prod_restatement_plan_causes_dev_intervals_to_be_widened_on_full_restatement_only_model( - tmp_path, -): - """ - Scenario: - I have am INCREMENTAL_BY_TIME_RANGE model A[daily] in prod - I create dev and add a INCREMENTAL_BY_UNIQUE_KEY model B (which supports full restatement only) - I prod, I restate one day of A which should cause intervals in dev to be cleared (but not processed) - In dev, I run a plan - - Outcome: - In the dev plan, the entire model for B should be rebuilt because it does not support partial restatement - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select account_id, ts from test.external_table where ts between @start_ts and @end_ts; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key (account_id, ts) - ), - cron '@daily' - ); - - select account_id, ts from test.a where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-02 01:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[daily] in prod - ctx.plan(auto_apply=True) - - # add B[daily] in dev - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - - # plan + apply dev - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - - def _dates_in_table(table_name: str) -> t.List[str]: - return [ - str(r[0]) for r in engine_adapter.fetchall(f"select ts from {table_name} order by ts") - ] - - # verify initial state - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-02 01:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - # restate A in prod - engine_adapter.execute("delete from test.external_table where ts = '2024-01-02 01:30:00'") - ctx.plan( - restate_models=["test.a"], - start="2024-01-02 00:00:00", - end="2024-01-03 00:00:00", - auto_apply=True, - no_prompts=True, - ) - - # verify result - assert _dates_in_table("test.a") == [ - "2024-01-01 00:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - # dev shouldnt have been affected yet - assert _dates_in_table("test__dev.b") == [ - "2024-01-01 00:30:00", - "2024-01-02 01:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - # plan dev which should trigger the missing intervals to get repopulated - ctx.plan(environment="dev", auto_apply=True) - - # dev should have fully refreshed - # this is proven by the fact that INCREMENTAL_BY_UNIQUE_KEY cant propagate deletes, so if the - # model was not fully rebuilt, the deleted record would still be present - for tbl in ["test.a", "test__dev.b"]: - assert _dates_in_table(tbl) == [ - "2024-01-01 00:30:00", - "2024-01-03 02:30:00", - "2024-01-04 00:30:00", - ] - - -def test_prod_restatement_plan_missing_model_in_dev( - tmp_path: Path, -): - """ - Scenario: - I have a model B in prod but only model A in dev - I restate B in prod - - Outcome: - The A model should be ignore and the plan shouldn't fail - """ - - model_a = """ - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@hourly' - ); - - select account_id, ts from test.external_table; - """ - - model_b = """ - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ts - ), - cron '@daily' - ); - - select account_id, ts from test.external_table where ts between @start_ts and @end_ts; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "a.sql", "w") as f: - f.write(model_a) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) - ctx = Context(paths=[tmp_path], config=config) - - engine_adapter = ctx.engine_adapter - engine_adapter.create_schema("test") - - # source data - df = pd.DataFrame( - { - "account_id": [1001, 1002, 1003, 1004], - "ts": [ - "2024-01-01 00:30:00", - "2024-01-01 01:30:00", - "2024-01-01 02:30:00", - "2024-01-02 00:30:00", - ], - } - ) - columns_to_types = { - "account_id": exp.DataType.build("int"), - "ts": exp.DataType.build("timestamp"), - } - external_table = exp.table_(table="external_table", db="test", quoted=True) - engine_adapter.create_table(table_name=external_table, target_columns_to_types=columns_to_types) - engine_adapter.insert_append( - table_name=external_table, query_or_df=df, target_columns_to_types=columns_to_types - ) - - # plan + apply A[hourly] in dev - ctx.plan("dev", auto_apply=True, no_prompts=True) - - # add B[daily] in prod and remove A - with open(models_dir / "b.sql", "w") as f: - f.write(model_b) - Path(models_dir / "a.sql").unlink() - - # plan + apply dev - ctx.load() - ctx.plan(auto_apply=True, no_prompts=True) - - # restate B in prod - ctx.plan( - restate_models=["test.b"], - start="2024-01-01", - end="2024-01-02", - auto_apply=True, - no_prompts=True, - ) - - -def test_prod_restatement_plan_includes_related_unpromoted_snapshots(tmp_path: Path): - """ - Scenario: - - I have models A <- B in prod - - I have models A <- B <- C in dev - - Both B and C have gone through a few iterations in dev so multiple snapshot versions exist - for them but not all of them are promoted / active - - I restate A in prod - - Outcome: - - Intervals should be cleared for all of the versions of B and C, regardless - of if they are active in any particular environment, in case they ever get made - active - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - (models_dir / "a.sql").write_text(""" - MODEL ( - name test.a, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select 1 as a, now() as ts; - """) - - (models_dir / "b.sql").write_text(""" - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select a, ts from test.a - """) - - config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01")) - ctx = Context(paths=[tmp_path], config=config) - - def _all_snapshots() -> t.Dict[SnapshotId, Snapshot]: - all_snapshot_ids = [ - SnapshotId(name=name, identifier=identifier) - for (name, identifier) in ctx.state_sync.state_sync.engine_adapter.fetchall( # type: ignore - "select name, identifier from sqlmesh._snapshots" - ) - ] - return ctx.state_sync.get_snapshots(all_snapshot_ids) - - # plan + apply prod - ctx.plan(environment="prod", auto_apply=True) - assert len(_all_snapshots()) == 2 - - # create dev with new version of B - (models_dir / "b.sql").write_text(""" - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select a, ts, 'b dev 1' as change from test.a - """) - - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - assert len(_all_snapshots()) == 3 - - # update B (new version) and create C - (models_dir / "b.sql").write_text(""" - MODEL ( - name test.b, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column "ts" - ), - start '2024-01-01 00:00:00', - cron '@daily' - ); - - select a, ts, 'b dev 2' as change from test.a - """) - - (models_dir / "c.sql").write_text(""" - MODEL ( - name test.c, - kind FULL, - cron '@daily' - ); - - select *, 'c initial' as val from test.b - """) - - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - assert len(_all_snapshots()) == 5 - - # update C (new version), create D (unrelated) - (models_dir / "c.sql").write_text(""" - MODEL ( - name test.c, - kind FULL, - cron '@daily' - ); - - select *, 'c updated' as val from test.b - """) - - (models_dir / "d.sql").write_text(""" - MODEL ( - name test.d, - cron '@daily' - ); - - select 1 as unrelated - """) - - ctx.load() - ctx.plan(environment="dev", auto_apply=True) - all_snapshots_prior_to_restatement = _all_snapshots() - assert len(all_snapshots_prior_to_restatement) == 7 - - def _snapshot_instances(lst: t.Dict[SnapshotId, Snapshot], name_match: str) -> t.List[Snapshot]: - return [s for s_id, s in lst.items() if name_match in s_id.name] - - # verify initial state - - # 1 instance of A (prod) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"a"')) == 1 - - # 3 instances of B (original in prod + 2 updates in dev) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"b"')) == 3 - - # 2 instances of C (initial + update in dev) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"c"')) == 2 - - # 1 instance of D (initial - dev) - assert len(_snapshot_instances(all_snapshots_prior_to_restatement, '"d"')) == 1 - - # restate A in prod - ctx.plan(environment="prod", restate_models=['"memory"."test"."a"'], auto_apply=True) - - all_snapshots_after_restatement = _all_snapshots() - - # All versions of B and C in dev should have had intervals cleared - # D in dev should not be touched and A + B in prod shoud also not be touched - a = _snapshot_instances(all_snapshots_after_restatement, '"a"') - assert len(a) == 1 - - b = _snapshot_instances(all_snapshots_after_restatement, '"b"') - # the 1 B instance in prod should be populated and 2 in dev (1 active) should be cleared - assert len(b) == 3 - assert len([s for s in b if not s.intervals]) == 2 - - c = _snapshot_instances(all_snapshots_after_restatement, '"c"') - # the 2 instances of C in dev (1 active) should be cleared - assert len(c) == 2 - assert len([s for s in c if not s.intervals]) == 2 - - d = _snapshot_instances(all_snapshots_after_restatement, '"d"') - # D should not be touched since it's in no way downstream of A in prod - assert len(d) == 1 - assert d[0].intervals - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dev_restatement_of_prod_model(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) - - restatement_plan = context.plan_builder("dev", restate_models=["*"]).build() - assert set(restatement_plan.restatements) == { - context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id, - context.get_snapshot("sushi.top_waiters").snapshot_id, - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_snapshot_table_exists_for_promoted_snapshot(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("dev", auto_apply=True, no_prompts=True, skip_tests=True) - - # Drop the views and make sure SQLMesh recreates them later - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters", raise_if_missing=True) - context.engine_adapter.drop_view(top_waiters_snapshot.table_name()) - context.engine_adapter.drop_view(top_waiters_snapshot.table_name(False)) - - # Make the environment unfinalized to force recreation of all views in the virtual layer - context.state_sync.state_sync.engine_adapter.execute( - "UPDATE sqlmesh._environments SET finalized_ts = NULL WHERE name = 'dev'" - ) - - context.plan( - "prod", - restate_models=["sushi.top_waiters"], - auto_apply=True, - no_prompts=True, - skip_tests=True, - ) - assert context.engine_adapter.table_exists(top_waiters_snapshot.table_name()) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_against_expired_environment(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - modified_models = {model.fqn, context.get_model("sushi.top_waiters").fqn} - - plan = context.plan_builder("dev").build() - assert plan.has_changes - assert set(plan.context_diff.modified_snapshots) == modified_models - assert plan.missing_intervals - context.apply(plan) - - # Make sure there are no changes when comparing against the existing environment. - plan = context.plan_builder("dev").build() - assert not plan.has_changes - assert not plan.context_diff.modified_snapshots - assert not plan.missing_intervals - - # Invalidate the environment and make sure that the plan detects the changes. - context.invalidate_environment("dev") - plan = context.plan_builder("dev").build() - assert plan.has_changes - assert set(plan.context_diff.modified_snapshots) == modified_models - assert not plan.missing_intervals - context.apply(plan) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_new_forward_only_model_concurrent_versions(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - - # Add the first version of the model and apply it to dev_a. - context.upsert_model(new_model) - snapshot_a = context.get_snapshot(new_model.name) - plan_a = context.plan_builder("dev_a").build() - snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] - - assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots - assert snapshot_a.snapshot_id in plan_a.context_diff.added - assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING - - context.apply(plan_a) - - new_model_alt_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS b; - """ - ) - new_model_alt = load_sql_based_model(new_model_alt_expr) - - # Add the second version of the model but don't apply it yet - context.upsert_model(new_model_alt) - snapshot_b = context.get_snapshot(new_model_alt.name) - plan_b = context.plan_builder("dev_b").build() - snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] - - assert snapshot_b.snapshot_id in plan_b.context_diff.new_snapshots - assert snapshot_b.snapshot_id in plan_b.context_diff.added - assert snapshot_b.change_category == SnapshotChangeCategory.BREAKING - - assert snapshot_b.fingerprint != snapshot_a.fingerprint - assert snapshot_b.version == snapshot_a.version - - # Apply the 1st version to prod - context.upsert_model(new_model) - plan_prod_a = context.plan_builder("prod").build() - assert snapshot_a.snapshot_id in plan_prod_a.snapshots - assert ( - plan_prod_a.snapshots[snapshot_a.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - context.apply(plan_prod_a) - - df = context.fetchdf("SELECT * FROM memory.sushi.new_model") - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} - - # Modify the 1st version in prod to trigger a forward-only change - new_model = add_projection_to_model(t.cast(SqlModel, new_model)) - context.upsert_model(new_model) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Apply the 2nd version to dev_b. - # At this point the snapshot of the 2nd version has already been categorized but not - # persisted in the state. This means that when the snapshot of the 1st version was - # being unpaused during promotion to prod, the state of the 2nd version snapshot was not updated - context.apply(plan_b) - - # Apply the 2nd version to prod - context.upsert_model(new_model_alt) - plan_prod_b = context.plan_builder("prod").build() - assert ( - plan_prod_b.snapshots[snapshot_b.snapshot_id].change_category - == SnapshotChangeCategory.BREAKING - ) - assert not plan_prod_b.requires_backfill - context.apply(plan_prod_b) - - df = context.fetchdf("SELECT * FROM memory.sushi.new_model").replace({np.nan: None}) - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: None}} - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_new_forward_only_model_same_dev_environment(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - - # Add the first version of the model and apply it to dev. - context.upsert_model(new_model) - snapshot_a = context.get_snapshot(new_model.name) - plan_a = context.plan_builder("dev").build() - snapshot_a = plan_a.snapshots[snapshot_a.snapshot_id] - - assert snapshot_a.snapshot_id in plan_a.context_diff.new_snapshots - assert snapshot_a.snapshot_id in plan_a.context_diff.added - assert snapshot_a.change_category == SnapshotChangeCategory.BREAKING - - context.apply(plan_a) - - df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model") - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "a": {0: 1}} - - new_model_alt_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only TRUE, - on_destructive_change 'allow', - ), - ); - - SELECT '2023-01-07' AS ds, 1 AS b; - """ - ) - new_model_alt = load_sql_based_model(new_model_alt_expr) - - # Add the second version of the model and apply it to the same environment. - context.upsert_model(new_model_alt) - snapshot_b = context.get_snapshot(new_model_alt.name) - - context.invalidate_environment("dev", sync=True) - plan_b = context.plan_builder("dev").build() - snapshot_b = plan_b.snapshots[snapshot_b.snapshot_id] - - context.apply(plan_b) - - df = context.fetchdf("SELECT * FROM memory.sushi__dev.new_model").replace({np.nan: None}) - assert df.to_dict() == {"ds": {0: "2023-01-07"}, "b": {0: 1}} - - -@time_machine.travel("2023-01-08 01:00:00 UTC") -def test_run_auto_restatement(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - context.engine_adapter.execute( - "CREATE TABLE _test_auto_restatement_intervals (name STRING, start_ds STRING, end_ds STRING)" - ) - - @macro() - def record_intervals( - evaluator, name: exp.Expression, start: exp.Expression, end: exp.Expression, **kwargs: t.Any - ) -> None: - if evaluator.runtime_stage == "evaluating": - evaluator.engine_adapter.insert_append( - "_test_auto_restatement_intervals", - pd.DataFrame({"name": [name.name], "start_ds": [start.name], "end_ds": [end.name]}), - ) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday - auto_restatement_intervals 3, - ), - start '2023-01-01', - ); - - @record_intervals('new_model', @start_ds, @end_ds); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - context.upsert_model(new_model) - - new_model_downstream_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model_downstream, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - ), - cron '@hourly', - ); - - @record_intervals('new_model_downstream', @start_ts, @end_ts); - - SELECT * FROM memory.sushi.new_model; - """ - ) - new_model_downstream = load_sql_based_model(new_model_downstream_expr) - context.upsert_model(new_model_downstream) - - plan = context.plan_builder("prod").build() - context.apply(plan) - - with time_machine.travel("2023-01-08 06:01:00 UTC"): - assert context.run() - - recorded_intervals_df = context.engine_adapter.fetchdf( - "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model'" - ) - # The first interval is the first backfill and the second interval should be the 3 auto restated intervals - assert recorded_intervals_df.to_dict() == { - "start_ds": {0: "2023-01-01", 1: "2023-01-05"}, - "end_ds": {0: "2023-01-07", 1: "2023-01-07"}, - } - recorded_intervals_downstream_df = context.engine_adapter.fetchdf( - "SELECT start_ds, end_ds FROM _test_auto_restatement_intervals WHERE name = 'new_model_downstream'" - ) - # The first interval is the first backfill, the second interval should be the 3 days of restated intervals, and - # the third interval should catch up to the current hour - assert recorded_intervals_downstream_df.to_dict() == { - "start_ds": { - 0: "2023-01-01 00:00:00", - 1: "2023-01-05 00:00:00", - 2: "2023-01-08 01:00:00", - }, - "end_ds": { - 0: "2023-01-08 00:59:59.999999", - 1: "2023-01-07 23:59:59.999999", - 2: "2023-01-08 05:59:59.999999", - }, - } - - snapshot = context.get_snapshot(new_model.name) - snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ - snapshot.snapshot_id - ] - assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") - assert not snapshot.pending_restatement_intervals - - snapshot_downstream = context.get_snapshot(new_model_downstream.name) - snapshot_downstream = context.state_sync.state_sync.get_snapshots( - [snapshot_downstream.snapshot_id] - )[snapshot_downstream.snapshot_id] - assert not snapshot_downstream.next_auto_restatement_ts - assert not snapshot_downstream.pending_restatement_intervals - - -@time_machine.travel("2023-01-08 01:00:00 UTC") -def test_run_auto_restatement_plan_preview(init_and_plan_context: t.Callable): - context, init_plan = init_and_plan_context("examples/sushi") - context.apply(init_plan) - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - auto_restatement_cron '0 6 * * 7', - ), - start '2023-01-01', - ); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - context.upsert_model(new_model) - snapshot = context.get_snapshot(new_model.name) - - plan_dev = context.plan_builder("dev").build() - # Make sure that a limited preview is computed by default - assert to_timestamp(plan_dev.start) == to_timestamp("2023-01-07") - assert plan_dev.missing_intervals == [ - SnapshotIntervals( - snapshot.snapshot_id, - [(to_timestamp("2023-01-07"), to_timestamp("2023-01-08"))], - ) - ] - assert not plan_dev.deployability_index.is_deployable(snapshot.snapshot_id) - context.apply(plan_dev) - - plan_prod = context.plan_builder("prod").build() - assert plan_prod.missing_intervals == [ - SnapshotIntervals( - context.get_snapshot(new_model.name).snapshot_id, - [ - (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), - (to_timestamp("2023-01-02"), to_timestamp("2023-01-03")), - (to_timestamp("2023-01-03"), to_timestamp("2023-01-04")), - (to_timestamp("2023-01-04"), to_timestamp("2023-01-05")), - (to_timestamp("2023-01-05"), to_timestamp("2023-01-06")), - (to_timestamp("2023-01-06"), to_timestamp("2023-01-07")), - (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")), - ], - ) - ] - context.apply(plan_prod) - - -@time_machine.travel("2023-01-08 01:00:00 UTC") -def test_run_auto_restatement_failure(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - @macro() - def fail_auto_restatement(evaluator, start: exp.Expression, **kwargs: t.Any) -> None: - if evaluator.runtime_stage == "evaluating" and start.name != "2023-01-01": - raise Exception("Failed") - - new_model_expr = d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - auto_restatement_cron '0 6 * * 7', -- At 6am every Sunday - auto_restatement_intervals 3, - ), - start '2023-01-01', - ); - - @fail_auto_restatement(@start_ds); - - SELECT '2023-01-07' AS ds, 1 AS a; - """ - ) - new_model = load_sql_based_model(new_model_expr) - context.upsert_model(new_model) - - plan = context.plan_builder("prod").build() - context.apply(plan) - - with time_machine.travel("2023-01-08 06:01:00 UTC"): - run_status = context.run() - assert run_status.is_failure - - snapshot = context.get_snapshot(new_model.name) - snapshot = context.state_sync.state_sync.get_snapshots([snapshot.snapshot_id])[ - snapshot.snapshot_id - ] - assert snapshot.next_auto_restatement_ts == to_timestamp("2023-01-15 06:00:00") - assert snapshot.pending_restatement_intervals == [ - (to_timestamp("2023-01-05"), to_timestamp("2023-01-08")) - ] - - -def test_plan_twice_with_star_macro_yields_no_diff(tmp_path: Path): - init_example_project(tmp_path, engine_type="duckdb") - - star_model_definition = """ - MODEL ( - name sqlmesh_example.star_model, - kind FULL - ); - - SELECT @STAR(sqlmesh_example.full_model) FROM sqlmesh_example.full_model - """ - - star_model_path = tmp_path / "models" / "star_model.sql" - star_model_path.write_text(star_model_definition) - - db_path = str(tmp_path / "db.db") - config = Config( - gateways={"main": GatewayConfig(connection=DuckDBConnectionConfig(database=db_path))}, - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - ) - context = Context(paths=tmp_path, config=config) - context.plan(auto_apply=True, no_prompts=True) - - # Instantiate new context to remove caches etc - new_context = Context(paths=tmp_path, config=config) - - star_model = new_context.get_model("sqlmesh_example.star_model") - assert ( - star_model.render_query_or_raise().sql() - == 'SELECT CAST("full_model"."item_id" AS INT) AS "item_id", CAST("full_model"."num_orders" AS BIGINT) AS "num_orders" FROM "db"."sqlmesh_example"."full_model" AS "full_model"' - ) - - new_plan = new_context.plan_builder().build() - assert not new_plan.has_changes - assert not new_plan.new_snapshots - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_create_environment_no_changes_with_selector(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - with pytest.raises(NoChangesPlanError): - context.plan_builder("dev").build() - - plan = context.plan_builder("dev", select_models=["*top_waiters"]).build() - assert not plan.missing_intervals - context.apply(plan) - - schema_objects = context.engine_adapter.get_data_objects("sushi__dev") - assert {o.name for o in schema_objects} == {"top_waiters"} - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_empty_backfill(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - plan = context.plan_builder("prod", skip_tests=True, empty_backfill=True).build() - assert plan.missing_intervals - assert plan.empty_backfill - assert not plan.requires_backfill - - context.apply(plan) - - for model in context.models.values(): - if model.is_seed or model.kind.is_symbolic: - continue - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM {model.name}")[0] - assert row_num == 0 - - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.requires_backfill - assert not plan.has_changes - assert not plan.missing_intervals - - snapshots = plan.snapshots - for snapshot in snapshots.values(): - if not snapshot.intervals: - continue - assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_empty_backfill_new_model(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - new_model = load_sql_based_model( - d.parse( - """ - MODEL ( - name memory.sushi.new_model, - kind FULL, - cron '0 8 * * *', - start '2023-01-01', - ); - - SELECT 1 AS one; - """ - ) - ) - new_model_name = context.upsert_model(new_model).fqn - - with time_machine.travel("2023-01-09 00:00:00 UTC"): - plan = context.plan_builder("dev", skip_tests=True, empty_backfill=True).build() - assert plan.end == to_datetime("2023-01-09") - assert plan.missing_intervals - assert plan.empty_backfill - assert not plan.requires_backfill - - context.apply(plan) - - for model in context.models.values(): - if model.is_seed or model.kind.is_symbolic: - continue - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.new_model")[ - 0 - ] - assert row_num == 0 - - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.requires_backfill - assert not plan.missing_intervals - - snapshots = plan.snapshots - for snapshot in snapshots.values(): - if not snapshot.intervals: - continue - elif snapshot.name == new_model_name: - assert snapshot.intervals[-1][1] == to_timestamp("2023-01-09") - else: - assert snapshot.intervals[-1][1] <= to_timestamp("2023-01-08") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@pytest.mark.parametrize("forward_only", [False, True]) -def test_plan_repairs_unrenderable_snapshot_state( - init_and_plan_context: t.Callable, forward_only: bool -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - target_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert target_snapshot - - # Manually corrupt the snapshot's query - raw_snapshot = context.state_sync.state_sync.engine_adapter.fetchone( - f"SELECT snapshot FROM sqlmesh._snapshots WHERE name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'" - )[0] # type: ignore - parsed_snapshot = json.loads(raw_snapshot) - parsed_snapshot["node"]["query"] = "SELECT @missing_macro()" - context.state_sync.state_sync.engine_adapter.update_table( - "sqlmesh._snapshots", - {"snapshot": json.dumps(parsed_snapshot)}, - f"name = '{target_snapshot.name}' AND identifier = '{target_snapshot.identifier}'", - ) - - context.clear_caches() - target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ - target_snapshot.snapshot_id - ] - - with pytest.raises(Exception): - target_snapshot_in_state.model.render_query_or_raise() - - # Repair the snapshot by creating a new version of it - context.upsert_model(target_snapshot.model.name, stamp="repair") - target_snapshot = context.get_snapshot(target_snapshot.name) - - plan_builder = context.plan_builder("prod", forward_only=forward_only) - plan = plan_builder.build() - if not forward_only: - assert target_snapshot.snapshot_id in {i.snapshot_id for i in plan.missing_intervals} - assert plan.directly_modified == {target_snapshot.snapshot_id} - plan_builder.set_choice(target_snapshot, SnapshotChangeCategory.NON_BREAKING) - plan = plan_builder.build() - - context.apply(plan) - - context.clear_caches() - assert context.get_snapshot(target_snapshot.name).model.render_query_or_raise() - target_snapshot_in_state = context.state_sync.get_snapshots([target_snapshot.snapshot_id])[ - target_snapshot.snapshot_id - ] - assert target_snapshot_in_state.model.render_query_or_raise() - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_no_backfill_for_model_downstream_of_metadata_change(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - # Make sushi.waiter_revenue_by_day a forward-only model. - forward_only_model = context.get_model("sushi.waiter_revenue_by_day") - updated_model_kind = forward_only_model.kind.copy(update={"forward_only": True}) - forward_only_model = forward_only_model.copy(update={"kind": updated_model_kind}) - context.upsert_model(forward_only_model) - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Make a metadata change upstream of the forward-only model. - context.upsert_model("sushi.orders", owner="new_owner") - - plan = context.plan_builder("test_dev").build() - assert plan.has_changes - assert not plan.directly_modified - assert not plan.indirectly_modified - assert not plan.missing_intervals - assert all( - snapshot.change_category == SnapshotChangeCategory.METADATA - for snapshot in plan.new_snapshots - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_evaluate_uncategorized_snapshot(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Add a new projection - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - # Downstream model references the new projection - downstream_model = context.get_model("sushi.top_waiters") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, downstream_model), literal=False)) - - df = context.evaluate( - "sushi.top_waiters", start="2023-01-05", end="2023-01-06", execution_time=now() - ) - assert set(df["one"].tolist()) == {1} - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_table_name(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert snapshot - assert ( - context.table_name("sushi.waiter_revenue_by_day", "prod") - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" - ) - - with pytest.raises(SQLMeshError, match="Environment 'dev' was not found."): - context.table_name("sushi.waiter_revenue_by_day", "dev") - - with pytest.raises( - SQLMeshError, match="Model 'sushi.missing' was not found in environment 'prod'." - ): - context.table_name("sushi.missing", "prod") - - # Add a new projection - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("dev_a", auto_apply=True, no_prompts=True, skip_tests=True) - - new_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert new_snapshot.version != snapshot.version - - assert ( - context.table_name("sushi.waiter_revenue_by_day", "dev_a") - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{new_snapshot.version}" - ) - - # Make a forward-only change - context.upsert_model(model, stamp="forward_only") - - context.plan("dev_b", auto_apply=True, no_prompts=True, skip_tests=True, forward_only=True) - - forward_only_snapshot = context.get_snapshot("sushi.waiter_revenue_by_day") - assert forward_only_snapshot.version == snapshot.version - assert forward_only_snapshot.dev_version != snapshot.version - - assert ( - context.table_name("sushi.waiter_revenue_by_day", "dev_b") - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{forward_only_snapshot.dev_version}__dev" - ) - - assert ( - context.table_name("sushi.waiter_revenue_by_day", "dev_b", prod=True) - == f"memory.sqlmesh__sushi.sushi__waiter_revenue_by_day__{snapshot.version}" - ) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_full_model_change_with_plan_start_not_matching_model_start( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - model = context.get_model("sushi.top_waiters") - context.upsert_model(model, kind=model_kind_type_from_name("FULL")()) # type: ignore - - # Apply the change with --skip-backfill first and no plan start - context.plan("dev", skip_tests=True, skip_backfill=True, no_prompts=True, auto_apply=True) - - # Apply the plan again but this time don't skip backfill and set start - # to be later than the model start - context.plan("dev", skip_tests=True, no_prompts=True, auto_apply=True, start="1 day ago") - - # Check that the number of rows is not 0 - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi__dev.top_waiters")[0] - assert row_num > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_indirect_non_breaking_view_is_updated_with_new_table_references( - init_and_plan_context: t.Callable, -): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Add a new projection to the base model - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model(add_projection_to_model(t.cast(SqlModel, model))) - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - # Run the janitor to delete the old snapshot record - context.run_janitor(ignore_ttl=True) - - # Check the downstream view and make sure it's still queryable - assert context.get_model("sushi.top_waiters").kind.is_view - row_num = context.engine_adapter.fetchone(f"SELECT COUNT(*) FROM sushi.top_waiters")[0] - assert row_num > 0 - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_plan_explain(init_and_plan_context: t.Callable): - old_console = get_console() - set_console(TerminalConsole()) - - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - waiter_revenue_by_day_model = context.get_model("sushi.waiter_revenue_by_day") - waiter_revenue_by_day_model = add_projection_to_model( - t.cast(SqlModel, waiter_revenue_by_day_model) - ) - context.upsert_model(waiter_revenue_by_day_model) - - waiter_revenue_by_day_snapshot = context.get_snapshot(waiter_revenue_by_day_model.name) - top_waiters_snapshot = context.get_snapshot("sushi.top_waiters") - - common_kwargs = dict(skip_tests=True, no_prompts=True, explain=True) - - # For now just making sure the plan doesn't error - context.plan("dev", **common_kwargs) - context.plan("dev", **common_kwargs, skip_backfill=True) - context.plan("dev", **common_kwargs, empty_backfill=True) - context.plan("dev", **common_kwargs, forward_only=True, enable_preview=True) - context.plan("prod", **common_kwargs) - context.plan("prod", **common_kwargs, forward_only=True) - context.plan("prod", **common_kwargs, restate_models=[waiter_revenue_by_day_model.name]) - - set_console(old_console) - - # Make sure that the now changes were actually applied - for target_env in ("dev", "prod"): - plan = context.plan_builder(target_env, skip_tests=True).build() - assert plan.has_changes - assert plan.missing_intervals - assert plan.directly_modified == {waiter_revenue_by_day_snapshot.snapshot_id} - assert len(plan.new_snapshots) == 2 - assert {s.snapshot_id for s in plan.new_snapshots} == { - waiter_revenue_by_day_snapshot.snapshot_id, - top_waiters_snapshot.snapshot_id, - } - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_requirements(sushi_dbt_context: Context): - assert set(sushi_dbt_context.requirements) == {"dbt-core", "dbt-duckdb"} - assert sushi_dbt_context.requirements["dbt-core"].startswith("1.") - assert sushi_dbt_context.requirements["dbt-duckdb"].startswith("1.") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_dialect_with_normalization_strategy(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context( - "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" - ) - assert context.default_dialect == "duckdb,normalization_strategy=LOWERCASE" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_dbt_before_all_with_var_ref_source(init_and_plan_context: t.Callable): - _, plan = init_and_plan_context( - "tests/fixtures/dbt/sushi_test", config="test_config_with_normalization_strategy" - ) - environment_statements = plan.to_evaluatable().environment_statements - assert environment_statements - rendered_statements = [e.render_before_all(dialect="duckdb") for e in environment_statements] - assert rendered_statements[0] == [ - "CREATE TABLE IF NOT EXISTS analytic_stats (physical_table TEXT, evaluation_time TEXT)", - "CREATE TABLE IF NOT EXISTS to_be_executed_last (col TEXT)", - "SELECT 1 AS var, 'items' AS src, 'waiters' AS ref", - ] - - -@pytest.mark.parametrize( - "context_fixture", - ["sushi_context", "sushi_dbt_context", "sushi_test_dbt_context", "sushi_no_default_catalog"], -) -def test_model_add(context_fixture: Context, request): - initial_add(request.getfixturevalue(context_fixture), "dev") - - -def test_model_removed(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - - top_waiters_snapshot_id = sushi_context.get_snapshot( - "sushi.top_waiters", raise_if_missing=True - ).snapshot_id - - sushi_context._models.pop('"memory"."sushi"."top_waiters"') - - def _validate_plan(context, plan): - validate_plan_changes(plan, removed=[top_waiters_snapshot_id]) - assert not plan.missing_intervals - - def _validate_apply(context): - assert not sushi_context.get_snapshot("sushi.top_waiters", raise_if_missing=False) - assert sushi_context.state_reader.get_snapshots([top_waiters_snapshot_id]) - env = sushi_context.state_reader.get_environment(environment) - assert env - assert all(snapshot.name != '"memory"."sushi"."top_waiters"' for snapshot in env.snapshots) - - apply_to_environment( - sushi_context, - environment, - SnapshotChangeCategory.BREAKING, - plan_validators=[_validate_plan], - apply_validators=[_validate_apply], - ) - - -def test_non_breaking_change(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - validate_query_change(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING, False) - - -def test_breaking_change(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - validate_query_change(sushi_context, environment, SnapshotChangeCategory.BREAKING, False) - - -def test_logical_change(sushi_context: Context): - environment = "dev" - initial_add(sushi_context, environment) - previous_sushi_items_version = sushi_context.get_snapshot( - "sushi.items", raise_if_missing=True - ).version - - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.FLOAT, - DataType.Type.DOUBLE, - ) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - assert ( - sushi_context.get_snapshot("sushi.items", raise_if_missing=True).version - == previous_sushi_items_version - ) - - -def validate_query_change( - context: Context, - environment: str, - change_category: SnapshotChangeCategory, - logical: bool, -): - versions = snapshots_to_versions(context.snapshots.values()) - - change_data_type( - context, - "sushi.items", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - - directly_modified = ['"memory"."sushi"."items"'] - indirectly_modified = [ - '"memory"."sushi"."order_items"', - '"memory"."sushi"."waiter_revenue_by_day"', - '"memory"."sushi"."customer_revenue_by_day"', - '"memory"."sushi"."customer_revenue_lifetime"', - '"memory"."sushi"."top_waiters"', - "assert_item_price_above_zero", - ] - not_modified = [ - snapshot.name - for snapshot in context.snapshots.values() - if snapshot.name not in directly_modified and snapshot.name not in indirectly_modified - ] - - if change_category == SnapshotChangeCategory.BREAKING and not logical: - models_same = not_modified - models_different = directly_modified + indirectly_modified - elif change_category == SnapshotChangeCategory.FORWARD_ONLY: - models_same = not_modified + directly_modified + indirectly_modified - models_different = [] - else: - models_same = not_modified + indirectly_modified - models_different = directly_modified - - def _validate_plan(context, plan): - validate_plan_changes(plan, modified=directly_modified + indirectly_modified) - assert bool(plan.missing_intervals) != logical - - def _validate_apply(context): - current_versions = snapshots_to_versions(context.snapshots.values()) - validate_versions_same(models_same, versions, current_versions) - validate_versions_different(models_different, versions, current_versions) - - apply_to_environment( - context, - environment, - change_category, - plan_validators=[_validate_plan], - apply_validators=[_validate_apply], - ) - - -@pytest.mark.parametrize( - "from_, to", - [ - (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.FULL), - (ModelKindName.FULL, ModelKindName.INCREMENTAL_BY_TIME_RANGE), - ], -) -def test_model_kind_change(from_: ModelKindName, to: ModelKindName, sushi_context: Context): - environment = f"test_model_kind_change__{from_.value.lower()}__{to.value.lower()}" - incremental_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True).copy() - - if from_ != ModelKindName.INCREMENTAL_BY_TIME_RANGE: - change_model_kind(sushi_context, from_) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - if to == ModelKindName.INCREMENTAL_BY_TIME_RANGE: - sushi_context.upsert_model(incremental_snapshot.model) - else: - change_model_kind(sushi_context, to) - - logical = to in (ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.EMBEDDED) - validate_model_kind_change(to, sushi_context, environment, logical=logical) - - -def change_model_kind(context: Context, kind: ModelKindName): - if kind in (ModelKindName.VIEW, ModelKindName.EMBEDDED, ModelKindName.FULL): - context.upsert_model( - "sushi.items", - partitioned_by=[], - ) - context.upsert_model("sushi.items", kind=model_kind_type_from_name(kind)()) # type: ignore - - -def validate_model_kind_change( - kind_name: ModelKindName, - context: Context, - environment: str, - *, - logical: bool, -): - directly_modified = ['"memory"."sushi"."items"'] - indirectly_modified = [ - '"memory"."sushi"."order_items"', - '"memory"."sushi"."waiter_revenue_by_day"', - '"memory"."sushi"."customer_revenue_by_day"', - '"memory"."sushi"."customer_revenue_lifetime"', - '"memory"."sushi"."top_waiters"', - "assert_item_price_above_zero", - ] - if kind_name == ModelKindName.INCREMENTAL_BY_TIME_RANGE: - kind: ModelKind = IncrementalByTimeRangeKind(time_column=TimeColumn(column="event_date")) - elif kind_name == ModelKindName.INCREMENTAL_BY_UNIQUE_KEY: - kind = IncrementalByUniqueKeyKind(unique_key="id") - else: - kind = model_kind_type_from_name(kind_name)() # type: ignore - - def _validate_plan(context, plan): - validate_plan_changes(plan, modified=directly_modified + indirectly_modified) - assert ( - next( - snapshot - for snapshot in plan.snapshots.values() - if snapshot.name == '"memory"."sushi"."items"' - ).model.kind.name - == kind.name - ) - assert bool(plan.missing_intervals) != logical - - apply_to_environment( - context, - environment, - SnapshotChangeCategory.NON_BREAKING, - plan_validators=[_validate_plan], - ) - - -def test_environment_isolation(sushi_context: Context): - prod_snapshots = sushi_context.snapshots.values() - - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - directly_modified = ['"memory"."sushi"."items"'] - indirectly_modified = [ - '"memory"."sushi"."order_items"', - '"memory"."sushi"."waiter_revenue_by_day"', - '"memory"."sushi"."customer_revenue_by_day"', - '"memory"."sushi"."customer_revenue_lifetime"', - '"memory"."sushi"."top_waiters"', - "assert_item_price_above_zero", - ] - - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) - - # Verify prod unchanged - validate_apply_basics(sushi_context, "prod", prod_snapshots) - - def _validate_plan(context, plan): - validate_plan_changes(plan, modified=directly_modified + indirectly_modified) - assert not plan.missing_intervals - - apply_to_environment( - sushi_context, - "prod", - SnapshotChangeCategory.BREAKING, - plan_validators=[_validate_plan], - ) - - -def test_environment_promotion(sushi_context: Context): - initial_add(sushi_context, "dev") - - # Simulate prod "ahead" - change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) - apply_to_environment(sushi_context, "prod", SnapshotChangeCategory.BREAKING) - - # Simulate rebase - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) - - # Make changes in dev - change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DECIMAL) - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.NON_BREAKING) - - change_data_type(sushi_context, "sushi.top_waiters", DataType.Type.DOUBLE, DataType.Type.INT) - apply_to_environment(sushi_context, "dev", SnapshotChangeCategory.BREAKING) - - change_data_type( - sushi_context, - "sushi.customer_revenue_by_day", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - apply_to_environment( - sushi_context, - "dev", - SnapshotChangeCategory.FORWARD_ONLY, - allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], - ) - - # Promote to prod - def _validate_plan(context, plan): - sushi_items_snapshot = context.get_snapshot("sushi.items", raise_if_missing=True) - sushi_top_waiters_snapshot = context.get_snapshot( - "sushi.top_waiters", raise_if_missing=True - ) - sushi_customer_revenue_by_day_snapshot = context.get_snapshot( - "sushi.customer_revenue_by_day", raise_if_missing=True - ) - - assert ( - plan.context_diff.modified_snapshots[sushi_items_snapshot.name][0].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert ( - plan.context_diff.modified_snapshots[sushi_top_waiters_snapshot.name][0].change_category - == SnapshotChangeCategory.BREAKING - ) - assert ( - plan.context_diff.modified_snapshots[sushi_customer_revenue_by_day_snapshot.name][ - 0 - ].change_category - == SnapshotChangeCategory.NON_BREAKING - ) - assert plan.context_diff.snapshots[ - sushi_customer_revenue_by_day_snapshot.snapshot_id - ].is_forward_only - - apply_to_environment( - sushi_context, - "prod", - SnapshotChangeCategory.NON_BREAKING, - plan_validators=[_validate_plan], - allow_destructive_models=['"memory"."sushi"."customer_revenue_by_day"'], - ) - - -def test_no_override(sushi_context: Context) -> None: - change_data_type( - sushi_context, - "sushi.items", - DataType.Type.INT, - DataType.Type.BIGINT, - ) - - change_data_type( - sushi_context, - "sushi.order_items", - DataType.Type.INT, - DataType.Type.BIGINT, - ) - - plan_builder = sushi_context.plan_builder("prod") - plan = plan_builder.build() - - sushi_items_snapshot = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) - sushi_order_items_snapshot = sushi_context.get_snapshot( - "sushi.order_items", raise_if_missing=True - ) - sushi_water_revenue_by_day_snapshot = sushi_context.get_snapshot( - "sushi.waiter_revenue_by_day", raise_if_missing=True - ) - - items = plan.context_diff.snapshots[sushi_items_snapshot.snapshot_id] - order_items = plan.context_diff.snapshots[sushi_order_items_snapshot.snapshot_id] - waiter_revenue = plan.context_diff.snapshots[sushi_water_revenue_by_day_snapshot.snapshot_id] - - plan_builder.set_choice(items, SnapshotChangeCategory.BREAKING).set_choice( - order_items, SnapshotChangeCategory.NON_BREAKING - ) - plan_builder.build() - assert items.is_new_version - assert waiter_revenue.is_new_version - plan_builder.set_choice(items, SnapshotChangeCategory.NON_BREAKING) - plan_builder.build() - assert not waiter_revenue.is_new_version - - -@pytest.mark.parametrize( - "change_categories, expected", - [ - ([SnapshotChangeCategory.NON_BREAKING], SnapshotChangeCategory.BREAKING), - ([SnapshotChangeCategory.BREAKING], SnapshotChangeCategory.BREAKING), - ( - [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.NON_BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ( - [SnapshotChangeCategory.NON_BREAKING, SnapshotChangeCategory.BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ( - [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.NON_BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ( - [SnapshotChangeCategory.BREAKING, SnapshotChangeCategory.BREAKING], - SnapshotChangeCategory.BREAKING, - ), - ], -) -def test_revert( - sushi_context: Context, - change_categories: t.List[SnapshotChangeCategory], - expected: SnapshotChangeCategory, -): - environment = "prod" - original_snapshot_id = sushi_context.get_snapshot("sushi.items", raise_if_missing=True) - - types = (DataType.Type.DOUBLE, DataType.Type.FLOAT, DataType.Type.DECIMAL) - assert len(change_categories) < len(types) - - for i, category in enumerate(change_categories): - change_data_type(sushi_context, "sushi.items", *types[i : i + 2]) - apply_to_environment(sushi_context, environment, category) - assert ( - sushi_context.get_snapshot("sushi.items", raise_if_missing=True) != original_snapshot_id - ) - - change_data_type(sushi_context, "sushi.items", types[len(change_categories)], types[0]) - - def _validate_plan(_, plan): - snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') - assert snapshot.change_category == expected - assert not plan.missing_intervals - - apply_to_environment( - sushi_context, - environment, - change_categories[-1], - plan_validators=[_validate_plan], - ) - assert sushi_context.get_snapshot("sushi.items", raise_if_missing=True) == original_snapshot_id - - -def test_revert_after_downstream_change(sushi_context: Context): - environment = "prod" - change_data_type(sushi_context, "sushi.items", DataType.Type.DOUBLE, DataType.Type.FLOAT) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.BREAKING) - - change_data_type( - sushi_context, - "sushi.waiter_revenue_by_day", - DataType.Type.DOUBLE, - DataType.Type.FLOAT, - ) - apply_to_environment(sushi_context, environment, SnapshotChangeCategory.NON_BREAKING) - - change_data_type(sushi_context, "sushi.items", DataType.Type.FLOAT, DataType.Type.DOUBLE) - - def _validate_plan(_, plan): - snapshot = next(s for s in plan.snapshots.values() if s.name == '"memory"."sushi"."items"') - assert snapshot.change_category == SnapshotChangeCategory.BREAKING - assert plan.missing_intervals - - apply_to_environment( - sushi_context, - environment, - SnapshotChangeCategory.BREAKING, - plan_validators=[_validate_plan], - ) - - -def test_auto_categorization(sushi_context: Context): - environment = "dev" - for config in sushi_context.configs.values(): - config.plan.auto_categorize_changes.sql = AutoCategorizationMode.FULL - initial_add(sushi_context, environment) - - version = sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).version - fingerprint = sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).fingerprint - - model = t.cast(SqlModel, sushi_context.get_model("sushi.customers", raise_if_missing=True)) - sushi_context.upsert_model( - "sushi.customers", - query_=ParsableSql(sql=model.query.select("'foo' AS foo").sql(dialect=model.dialect)), # type: ignore - ) - apply_to_environment(sushi_context, environment) - - assert ( - sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).change_category - == SnapshotChangeCategory.INDIRECT_NON_BREAKING - ) - assert ( - sushi_context.get_snapshot( - "sushi.waiter_as_customer_by_day", raise_if_missing=True - ).fingerprint - != fingerprint - ) - assert ( - sushi_context.get_snapshot("sushi.waiter_as_customer_by_day", raise_if_missing=True).version - == version - ) - - -@use_terminal_console -def test_multi(mocker): - context = Context(paths=["examples/multi/repo_1", "examples/multi/repo_2"], gateway="memory") - - with patch.object(get_console(), "log_warning") as mock_logger: - context.plan_builder(environment="dev") - warnings = mock_logger.call_args[0][0] - repo1_path, repo2_path = context.configs.keys() - assert f"Linter warnings for {repo1_path}" in warnings - assert f"Linter warnings for {repo2_path}" not in warnings - - assert ( - context.render("bronze.a").sql() - == '''SELECT 1 AS "col_a", 'b' AS "col_b", 1 AS "one", 'repo_1' AS "dup"''' - ) - assert ( - context.render("silver.d").sql() - == '''SELECT "c"."col_a" AS "col_a", 2 AS "two", 'repo_2' AS "dup" FROM "memory"."silver"."c" AS "c"''' - ) - context._new_state_sync().reset(default_catalog=context.default_catalog) - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 5 - context.apply(plan) - - # Ensure before_all, after_all statements for multiple repos have executed - environment_statements = context.state_reader.get_environment_statements(c.PROD) - assert len(environment_statements) == 2 - assert context.fetchdf("select * from before_1").to_dict()["1"][0] == 1 - assert context.fetchdf("select * from before_2").to_dict()["2"][0] == 2 - assert context.fetchdf("select * from after_1").to_dict()["repo_1"][0] == "repo_1" - assert context.fetchdf("select * from after_2").to_dict()["repo_2"][0] == "repo_2" - - old_context = context - context = Context( - paths=["examples/multi/repo_1"], - state_sync=old_context.state_sync, - gateway="memory", - ) - context._engine_adapter = old_context.engine_adapter - del context.engine_adapters - - model = context.get_model("bronze.a") - assert model.project == "repo_1" - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql(sql=model.query.select("'c' AS c").sql(dialect=model.dialect)) - } - ) - ) - plan = context.plan_builder().build() - - assert set(snapshot.name for snapshot in plan.directly_modified) == { - '"memory"."bronze"."a"', - '"memory"."bronze"."b"', - '"memory"."silver"."e"', - } - assert sorted([x.name for x in list(plan.indirectly_modified.values())[0]]) == [ - '"memory"."silver"."c"', - '"memory"."silver"."d"', - ] - assert len(plan.missing_intervals) == 3 - context.apply(plan) - validate_apply_basics(context, c.PROD, plan.snapshots.values()) - - # Ensure that before_all and after_all statements of both repos are there despite planning with repo_1 - environment_statements = context.state_reader.get_environment_statements(c.PROD) - assert len(environment_statements) == 2 - - # Ensure that environment statements have the project field set correctly - sorted_env_statements = sorted(environment_statements, key=lambda es: es.project) - assert sorted_env_statements[0].project == "repo_1" - assert sorted_env_statements[1].project == "repo_2" - - # Assert before_all and after_all for each project - assert sorted_env_statements[0].before_all == [ - "CREATE TABLE IF NOT EXISTS before_1 AS select @one()" - ] - assert sorted_env_statements[0].after_all == [ - "CREATE TABLE IF NOT EXISTS after_1 AS select @dup()" - ] - assert sorted_env_statements[1].before_all == [ - "CREATE TABLE IF NOT EXISTS before_2 AS select @two()" - ] - assert sorted_env_statements[1].after_all == [ - "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" - ] - - -@use_terminal_console -def test_multi_repo_single_project_environment_statements_update(copy_to_temp_path): - paths = copy_to_temp_path("examples/multi") - repo_1_path = f"{paths[0]}/repo_1" - repo_2_path = f"{paths[0]}/repo_2" - - context = Context(paths=[repo_1_path, repo_2_path], gateway="memory") - context._new_state_sync().reset(default_catalog=context.default_catalog) - - initial_plan = context.plan_builder().build() - context.apply(initial_plan) - - # Get initial statements - initial_statements = context.state_reader.get_environment_statements(c.PROD) - assert len(initial_statements) == 2 - - # Modify repo_1's config to add a new before_all statement - repo_1_config_path = f"{repo_1_path}/config.yaml" - with open(repo_1_config_path, "r") as f: - config_content = f.read() - - # Add a new before_all statement to repo_1 only - modified_config = config_content.replace( - "CREATE TABLE IF NOT EXISTS before_1 AS select @one()", - "CREATE TABLE IF NOT EXISTS before_1 AS select @one()\n - CREATE TABLE IF NOT EXISTS before_1_modified AS select 999", - ) - - with open(repo_1_config_path, "w") as f: - f.write(modified_config) - - # Create new context with modified config but only for repo_1 - context_repo_1_only = Context( - paths=[repo_1_path], state_sync=context.state_sync, gateway="memory" - ) - - # Plan with only repo_1, this should preserve repo_2's statements from state - repo_1_plan = context_repo_1_only.plan_builder(environment="dev").build() - context_repo_1_only.apply(repo_1_plan) - updated_statements = context_repo_1_only.state_reader.get_environment_statements("dev") - - # Should still have statements from both projects - assert len(updated_statements) == 2 - - # Sort by project - sorted_updated = sorted(updated_statements, key=lambda es: es.project or "") - - # Verify repo_1 has the new statement - repo_1_updated = sorted_updated[0] - assert repo_1_updated.project == "repo_1" - assert len(repo_1_updated.before_all) == 2 - assert "CREATE TABLE IF NOT EXISTS before_1_modified" in repo_1_updated.before_all[1] - - # Verify repo_2 statements are preserved from state - repo_2_preserved = sorted_updated[1] - assert repo_2_preserved.project == "repo_2" - assert len(repo_2_preserved.before_all) == 1 - assert "CREATE TABLE IF NOT EXISTS before_2" in repo_2_preserved.before_all[0] - assert "CREATE TABLE IF NOT EXISTS after_2 AS select @dup()" in repo_2_preserved.after_all[0] - - -@use_terminal_console -def test_multi_virtual_layer(copy_to_temp_path): - paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") - path = Path(paths[0]) - first_db_path = str(path / "db_1.db") - second_db_path = str(path / "db_2.db") - - config = Config( - gateways={ - "first": GatewayConfig( - connection=DuckDBConnectionConfig(database=first_db_path), - variables={"overriden_var": "gateway_1"}, - ), - "second": GatewayConfig( - connection=DuckDBConnectionConfig(database=second_db_path), - variables={"overriden_var": "gateway_2"}, - ), - }, - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - model_naming=NameInferenceConfig(infer_names=True), - default_gateway="first", - gateway_managed_virtual_layer=True, - variables={"overriden_var": "global", "global_one": 88}, - ) - - context = Context(paths=paths, config=config) - assert context.default_catalog_per_gateway == {"first": "db_1", "second": "db_2"} - assert len(context.engine_adapters) == 2 - - # For the model without gateway the default should be used and the gateway variable should overide the global - assert ( - context.render("first_schema.model_one").sql() - == 'SELECT \'gateway_1\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' - ) - - # For model with gateway specified the appropriate variable should be used to overide - assert ( - context.render("db_2.second_schema.model_one").sql() - == 'SELECT \'gateway_2\' AS "item_id", 88 AS "global_one", 1 AS "macro_one"' - ) - - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 4 - context.apply(plan) - - # Validate the tables that source from the first tables are correct as well with evaluate - assert ( - context.evaluate( - "first_schema.model_two", start=now(), end=now(), execution_time=now() - ).to_string() - == " item_id global_one\n0 gateway_1 88" - ) - assert ( - context.evaluate( - "db_2.second_schema.model_two", start=now(), end=now(), execution_time=now() - ).to_string() - == " item_id global_one\n0 gateway_2 88" - ) - - assert sorted(set(snapshot.name for snapshot in plan.directly_modified)) == [ - '"db_1"."first_schema"."model_one"', - '"db_1"."first_schema"."model_two"', - '"db_2"."second_schema"."model_one"', - '"db_2"."second_schema"."model_two"', - ] - - model = context.get_model("db_1.first_schema.model_one") - - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder().build() - context.apply(plan) - - state_environments = context.state_reader.get_environments() - state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) - - assert state_environments[0].gateway_managed - assert len(state_snapshots) == len(state_environments[0].snapshots) - assert [snapshot.name for snapshot in plan.directly_modified] == [ - '"db_1"."first_schema"."model_one"' - ] - assert [x.name for x in list(plan.indirectly_modified.values())[0]] == [ - '"db_1"."first_schema"."model_two"' - ] - - assert len(plan.missing_intervals) == 1 - assert ( - context.evaluate( - "db_1.first_schema.model_one", start=now(), end=now(), execution_time=now() - ).to_string() - == " item_id global_one macro_one extra\n0 gateway_1 88 1 c" - ) - - # Create dev environment with changed models - model = context.get_model("db_2.second_schema.model_one") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - model = context.get_model("first_schema.model_two") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder("dev").build() - context.apply(plan) - - dev_environment = context.state_sync.get_environment("dev") - assert dev_environment is not None - - metadata_engine_1 = DuckDBMetadata.from_context(context) - start_schemas_1 = set(metadata_engine_1.schemas) - assert sorted(start_schemas_1) == sorted( - {"first_schema__dev", "sqlmesh", "first_schema", "sqlmesh__first_schema"} - ) - - metadata_engine_2 = DuckDBMetadata(context._get_engine_adapter("second")) - start_schemas_2 = set(metadata_engine_2.schemas) - assert sorted(start_schemas_2) == sorted( - {"sqlmesh__second_schema", "second_schema", "second_schema__dev"} - ) - - # Invalidate dev environment - context.invalidate_environment("dev") - invalidate_environment = context.state_sync.get_environment("dev") - assert invalidate_environment is not None - assert invalidate_environment.expiration_ts < dev_environment.expiration_ts # type: ignore - assert sorted(start_schemas_1) == sorted(set(metadata_engine_1.schemas)) - assert sorted(start_schemas_2) == sorted(set(metadata_engine_2.schemas)) - - # Run janitor - context._run_janitor() - assert context.state_sync.get_environment("dev") is None - removed_schemas = start_schemas_1 - set(metadata_engine_1.schemas) - assert removed_schemas == {"first_schema__dev"} - removed_schemas = start_schemas_2 - set(metadata_engine_2.schemas) - assert removed_schemas == {"second_schema__dev"} - prod_environment = context.state_sync.get_environment("prod") - - # Remove the second gateway's second model and apply plan - second_model = path / "models/second_schema/model_two.sql" - os.remove(second_model) - assert not second_model.exists() - context = Context(paths=paths, config=config) - plan = context.plan_builder().build() - context.apply(plan) - prod_environment = context.state_sync.get_environment("prod") - assert len(prod_environment.snapshots_) == 3 - - # Changing the flag should show a diff - context.config.gateway_managed_virtual_layer = False - plan = context.plan_builder().build() - assert not plan.requires_backfill - assert ( - plan.context_diff.previous_gateway_managed_virtual_layer - != plan.context_diff.gateway_managed_virtual_layer - ) - assert plan.context_diff.has_changes - - # This should error since the default_gateway won't have access to create the view on a non-shared catalog - with pytest.raises(NodeExecutionFailedError, match=r"Execution failed for node SnapshotId*"): - context.apply(plan) - - -def test_multi_dbt(mocker): - context = Context(paths=["examples/multi_dbt/bronze", "examples/multi_dbt/silver"]) - context._new_state_sync().reset(default_catalog=context.default_catalog) - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 4 - context.apply(plan) - validate_apply_basics(context, c.PROD, plan.snapshots.values()) - - environment_statements = context.state_sync.get_environment_statements(c.PROD) - assert len(environment_statements) == 2 - bronze_statements = environment_statements[0] - assert bronze_statements.before_all == [ - "JINJA_STATEMENT_BEGIN;\nCREATE TABLE IF NOT EXISTS analytic_stats (physical_table VARCHAR, evaluation_time VARCHAR);\nJINJA_END;" - ] - assert not bronze_statements.after_all - silver_statements = environment_statements[1] - assert not silver_statements.before_all - assert silver_statements.after_all == [ - "JINJA_STATEMENT_BEGIN;\n{{ store_schemas(schemas) }}\nJINJA_END;" - ] - assert "store_schemas" in silver_statements.jinja_macros.root_macros - analytics_table = context.fetchdf("select * from analytic_stats;") - assert sorted(analytics_table.columns) == sorted(["physical_table", "evaluation_time"]) - schema_table = context.fetchdf("select * from schema_table;") - assert sorted(schema_table.all_schemas[0]) == sorted(["bronze", "silver"]) - - -def test_multi_hybrid(mocker): - context = Context( - paths=["examples/multi_hybrid/dbt_repo", "examples/multi_hybrid/sqlmesh_repo"] - ) - context._new_state_sync().reset(default_catalog=context.default_catalog) - plan = context.plan_builder().build() - - assert len(plan.new_snapshots) == 5 - assert context.dag.roots == {'"memory"."dbt_repo"."e"'} - assert context.dag.graph['"memory"."dbt_repo"."c"'] == {'"memory"."sqlmesh_repo"."b"'} - assert context.dag.graph['"memory"."sqlmesh_repo"."b"'] == {'"memory"."sqlmesh_repo"."a"'} - assert context.dag.graph['"memory"."sqlmesh_repo"."a"'] == {'"memory"."dbt_repo"."e"'} - assert context.dag.downstream('"memory"."dbt_repo"."e"') == [ - '"memory"."sqlmesh_repo"."a"', - '"memory"."sqlmesh_repo"."b"', - '"memory"."dbt_repo"."c"', - '"memory"."dbt_repo"."d"', - ] - - sqlmesh_model_a = context.get_model("sqlmesh_repo.a") - dbt_model_c = context.get_model("dbt_repo.c") - assert sqlmesh_model_a.project == "sqlmesh_repo" - - sqlmesh_rendered = ( - 'SELECT "e"."col_a" AS "col_a", "e"."col_b" AS "col_b" FROM "memory"."dbt_repo"."e" AS "e"' - ) - dbt_rendered = 'SELECT DISTINCT ROUND(CAST(("b"."col_a" / NULLIF(100, 0)) AS DECIMAL(16, 2)), 2) AS "rounded_col_a" FROM "memory"."sqlmesh_repo"."b" AS "b"' - assert sqlmesh_model_a.render_query().sql() == sqlmesh_rendered - assert dbt_model_c.render_query().sql() == dbt_rendered - - context.apply(plan) - validate_apply_basics(context, c.PROD, plan.snapshots.values()) - - -def test_incremental_time_self_reference( - mocker: MockerFixture, sushi_context: Context, sushi_data_validator: SushiDataValidator -): - start_ts = to_timestamp("1 week ago") - start_date, end_date = to_date("1 week ago"), to_date("yesterday") - if to_timestamp(start_date) < start_ts: - # The start date must be aligned by the interval unit. - start_date += timedelta(days=1) - - df = sushi_context.engine_adapter.fetchdf( - "SELECT MIN(event_date) FROM sushi.customer_revenue_lifetime" - ) - assert df.iloc[0, 0] == pd.to_datetime(start_date) - df = sushi_context.engine_adapter.fetchdf( - "SELECT MAX(event_date) FROM sushi.customer_revenue_lifetime" - ) - assert df.iloc[0, 0] == pd.to_datetime(end_date) - results = sushi_data_validator.validate("sushi.customer_revenue_lifetime", start_date, end_date) - plan = sushi_context.plan_builder( - restate_models=["sushi.customer_revenue_lifetime", "sushi.customer_revenue_by_day"], - start=start_date, - end="5 days ago", - ).build() - revenue_lifeteime_snapshot = sushi_context.get_snapshot( - "sushi.customer_revenue_lifetime", raise_if_missing=True - ) - revenue_by_day_snapshot = sushi_context.get_snapshot( - "sushi.customer_revenue_by_day", raise_if_missing=True - ) - assert sorted(plan.missing_intervals, key=lambda x: x.snapshot_id) == sorted( - [ - SnapshotIntervals( - snapshot_id=revenue_lifeteime_snapshot.snapshot_id, - intervals=[ - (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), - (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), - (to_timestamp(to_date("5 days ago")), to_timestamp(to_date("4 days ago"))), - (to_timestamp(to_date("4 days ago")), to_timestamp(to_date("3 days ago"))), - (to_timestamp(to_date("3 days ago")), to_timestamp(to_date("2 days ago"))), - (to_timestamp(to_date("2 days ago")), to_timestamp(to_date("1 days ago"))), - (to_timestamp(to_date("1 day ago")), to_timestamp(to_date("today"))), - ], - ), - SnapshotIntervals( - snapshot_id=revenue_by_day_snapshot.snapshot_id, - intervals=[ - (to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))), - (to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))), - ], - ), - ], - key=lambda x: x.snapshot_id, - ) - sushi_context.console = mocker.Mock(spec=Console) - sushi_context.apply(plan) - num_batch_calls = Counter( - [x[0][0] for x in sushi_context.console.update_snapshot_evaluation_progress.call_args_list] # type: ignore - ) - # Validate that we made 7 calls to the customer_revenue_lifetime snapshot and 1 call to the customer_revenue_by_day snapshot - assert num_batch_calls == { - sushi_context.get_snapshot("sushi.customer_revenue_lifetime", raise_if_missing=True): 7, - sushi_context.get_snapshot("sushi.customer_revenue_by_day", raise_if_missing=True): 1, - } - # Validate that the results are the same as before the restate - assert results == sushi_data_validator.validate( - "sushi.customer_revenue_lifetime", start_date, end_date - ) - - -def test_invalidating_environment(sushi_context: Context): - apply_to_environment(sushi_context, "dev") - start_environment = sushi_context.state_sync.get_environment("dev") - assert start_environment is not None - metadata = DuckDBMetadata.from_context(sushi_context) - start_schemas = set(metadata.schemas) - assert "sushi__dev" in start_schemas - sushi_context.invalidate_environment("dev") - invalidate_environment = sushi_context.state_sync.get_environment("dev") - assert invalidate_environment is not None - schemas_prior_to_janitor = set(metadata.schemas) - assert invalidate_environment.expiration_ts < start_environment.expiration_ts # type: ignore - assert start_schemas == schemas_prior_to_janitor - sushi_context._run_janitor() - schemas_after_janitor = set(metadata.schemas) - assert sushi_context.state_sync.get_environment("dev") is None - assert start_schemas - schemas_after_janitor == {"sushi__dev"} - - -def test_environment_suffix_target_table(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context( - "examples/sushi", config="environment_suffix_table_config" - ) - context.apply(plan) - metadata = DuckDBMetadata.from_context(context) - environments_schemas = {"sushi"} - internal_schemas = {"sqlmesh", "sqlmesh__sushi"} - starting_schemas = environments_schemas | internal_schemas - # Make sure no new schemas are created - assert set(metadata.schemas) - starting_schemas == {"raw"} - prod_views = {x for x in metadata.qualified_views if x.db in environments_schemas} - # Make sure that all models are present - assert len(prod_views) == 16 - apply_to_environment(context, "dev") - # Make sure no new schemas are created - assert set(metadata.schemas) - starting_schemas == {"raw"} - dev_views = { - x for x in metadata.qualified_views if x.db in environments_schemas and "__dev" in x.name - } - # Make sure that there is a view with `__dev` for each view that exists in prod - assert len(dev_views) == len(prod_views) - assert {x.name.replace("__dev", "") for x in dev_views} - {x.name for x in prod_views} == set() - context.invalidate_environment("dev") - context._run_janitor() - views_after_janitor = metadata.qualified_views - # Make sure that the number of views after the janitor is the same as when you subtract away dev views - assert len(views_after_janitor) == len( - {x.sql(dialect="duckdb") for x in views_after_janitor} - - {x.sql(dialect="duckdb") for x in dev_views} - ) - # Double check there are no dev views - assert len({x for x in views_after_janitor if "__dev" in x.name}) == 0 - # Make sure prod views were not removed - assert {x.sql(dialect="duckdb") for x in prod_views} - { - x.sql(dialect="duckdb") for x in views_after_janitor - } == set() - - -def test_environment_suffix_target_catalog(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: - monkeypatch.chdir(tmp_path) - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(catalogs={"main_warehouse": ":memory:"}), - environment_suffix_target=EnvironmentSuffixTarget.CATALOG, - ) - - assert config.default_connection - - models_dir = tmp_path / "models" - models_dir.mkdir() - - (models_dir / "model.sql").write_text(""" - MODEL ( - name example_schema.test_model, - kind FULL - ); - - SELECT '1' as a""") - - (models_dir / "fqn_model.sql").write_text(""" - MODEL ( - name memory.example_fqn_schema.test_model_fqn, - kind FULL - ); - - SELECT '1' as a""") - - ctx = Context(config=config, paths=tmp_path) - - metadata = DuckDBMetadata.from_context(ctx) - assert ctx.default_catalog == "main_warehouse" - assert metadata.catalogs == {"main_warehouse", "memory"} - - ctx.plan(auto_apply=True) - - # prod should go to the default catalog and not be overridden to a catalog called 'prod' - assert ( - ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore - == "1" - ) - assert ( - ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore - == "1" - ) - assert metadata.catalogs == {"main_warehouse", "memory"} - assert metadata.schemas_in_catalog("main_warehouse") == [ - "example_schema", - "sqlmesh__example_schema", - ] - assert metadata.schemas_in_catalog("memory") == [ - "example_fqn_schema", - "sqlmesh__example_fqn_schema", - ] - - # dev should be overridden to go to a catalogs called 'main_warehouse__dev' and 'memory__dev' - ctx.plan(environment="dev", include_unmodified=True, auto_apply=True) - assert ( - ctx.engine_adapter.fetchone("select * from main_warehouse__dev.example_schema.test_model")[ - 0 - ] # type: ignore - == "1" - ) - assert ( - ctx.engine_adapter.fetchone("select * from memory__dev.example_fqn_schema.test_model_fqn")[ - 0 - ] # type: ignore - == "1" - ) - assert metadata.catalogs == {"main_warehouse", "main_warehouse__dev", "memory", "memory__dev"} - - # schemas in dev envs should match prod and not have a suffix - assert metadata.schemas_in_catalog("main_warehouse") == [ - "example_schema", - "sqlmesh__example_schema", - ] - assert metadata.schemas_in_catalog("main_warehouse__dev") == ["example_schema"] - assert metadata.schemas_in_catalog("memory") == [ - "example_fqn_schema", - "sqlmesh__example_fqn_schema", - ] - assert metadata.schemas_in_catalog("memory__dev") == ["example_fqn_schema"] - - ctx.invalidate_environment("dev", sync=True) - - # dev catalogs cleaned up - assert metadata.catalogs == {"main_warehouse", "memory"} - - # prod catalogs still contain physical layer and views still work - assert metadata.schemas_in_catalog("main_warehouse") == [ - "example_schema", - "sqlmesh__example_schema", - ] - assert metadata.schemas_in_catalog("memory") == [ - "example_fqn_schema", - "sqlmesh__example_fqn_schema", - ] - - assert ( - ctx.engine_adapter.fetchone("select * from main_warehouse.example_schema.test_model")[0] # type: ignore - == "1" - ) - assert ( - ctx.engine_adapter.fetchone("select * from memory.example_fqn_schema.test_model_fqn")[0] # type: ignore - == "1" - ) - - -def test_environment_catalog_mapping(init_and_plan_context: t.Callable): - environments_schemas = {"raw", "sushi"} - - def get_prod_dev_views(metadata: DuckDBMetadata) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: - views = metadata.qualified_views - prod_views = { - x for x in views if x.catalog == "prod_catalog" if x.db in environments_schemas - } - dev_views = {x for x in views if x.catalog == "dev_catalog" if x.db in environments_schemas} - return prod_views, dev_views - - def get_default_catalog_and_non_tables( - metadata: DuckDBMetadata, default_catalog: t.Optional[str] - ) -> t.Tuple[t.Set[exp.Table], t.Set[exp.Table]]: - tables = metadata.qualified_tables - user_default_tables = { - x for x in tables if x.catalog == default_catalog and x.db != "sqlmesh" - } - non_default_tables = {x for x in tables if x.catalog != default_catalog} - return user_default_tables, non_default_tables - - context, plan = init_and_plan_context( - "examples/sushi", config="environment_catalog_mapping_config" - ) - context.apply(plan) - metadata = DuckDBMetadata(context.engine_adapter) - state_metadata = DuckDBMetadata.from_context(context.state_sync.state_sync) - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 0 - assert len(user_default_tables) == 15 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - apply_to_environment(context, "dev") - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 16 - assert len(user_default_tables) == 16 - assert len(non_default_tables) == 0 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - apply_to_environment(context, "prodnot") - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 32 - assert len(user_default_tables) == 16 - assert len(non_default_tables) == 0 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - context.invalidate_environment("dev") - context._run_janitor() - prod_views, dev_views = get_prod_dev_views(metadata) - ( - user_default_tables, - non_default_tables, - ) = get_default_catalog_and_non_tables(metadata, context.default_catalog) - assert len(prod_views) == 16 - assert len(dev_views) == 16 - assert len(user_default_tables) == 16 - assert len(non_default_tables) == 0 - assert state_metadata.schemas == ["sqlmesh"] - assert {x.sql() for x in state_metadata.qualified_tables}.issuperset( - { - "physical.sqlmesh._environments", - "physical.sqlmesh._intervals", - "physical.sqlmesh._snapshots", - "physical.sqlmesh._versions", - } - ) - - -@pytest.mark.parametrize( - "context_fixture", - ["sushi_context", "sushi_no_default_catalog"], -) -def test_unaligned_start_snapshots(context_fixture: Context, request): - context = request.getfixturevalue(context_fixture) - environment = "dev" - apply_to_environment(context, environment) - # Make breaking change to model upstream of a depends_on_self model - context.upsert_model("sushi.order_items", stamp="1") - # Apply the change starting at a date later then the beginning of the downstream depends_on_self model - plan = apply_to_environment( - context, - environment, - choice=SnapshotChangeCategory.BREAKING, - plan_start="2 days ago", - enable_preview=True, - ) - revenue_lifetime_snapshot = context.get_snapshot( - "sushi.customer_revenue_lifetime", raise_if_missing=True - ) - # Validate that the depends_on_self model is non-deployable - assert not plan.deployability_index.is_deployable(revenue_lifetime_snapshot) - - -class OldPythonModel(PythonModel): - kind: ModelKind = ViewKind() - - -def test_python_model_default_kind_change(init_and_plan_context: t.Callable): - """ - Around 2024-07-17 Python models had their default Kind changed from VIEW to FULL in order to - avoid some edge cases where the views might not get updated in certain situations. - - This test ensures that if a user had a Python `kind: VIEW` model stored in state, - it can still be loaded without error and just show as a breaking change from `kind: VIEW` - to `kind: FULL` - """ - - # note: we deliberately dont specify a Kind here to allow the defaults to be picked up - python_model_file = """import typing as t -import pandas as pd # noqa: TID253 -from sqlmesh import ExecutionContext, model - -@model( - "sushi.python_view_model", - columns={ - "id": "int", - } -) -def execute( - context: ExecutionContext, - **kwargs: t.Any, -) -> pd.DataFrame: - return pd.DataFrame([ - {"id": 1} - ]) -""" - - context: Context - context, _ = init_and_plan_context("examples/sushi") - - with open(context.path / "models" / "python_view_model.py", mode="w", encoding="utf8") as f: - f.write(python_model_file) - - # monkey-patch PythonModel to default to kind: View again - # and ViewKind to allow python models again - with ( - mock.patch.object(ViewKind, "supports_python_models", return_value=True), - mock.patch("sqlmesh.core.model.definition.PythonModel", OldPythonModel), - ): - context.load() - - # check the monkey-patching worked - model = context.get_model("sushi.python_view_model") - assert model.kind.name == ModelKindName.VIEW - assert model.source_type == "python" - - # apply plan - plan: Plan = context.plan(auto_apply=True) - - # check that run() still works even though we have a Python model with kind: View in the state - snapshot_ids = [s for s in plan.directly_modified if "python_view_model" in s.name] - snapshot_from_state = list(context.state_sync.get_snapshots(snapshot_ids).values())[0] - assert snapshot_from_state.model.kind.name == ModelKindName.VIEW - assert snapshot_from_state.model.source_type == "python" - context.run() - - # reload context to load model with new defaults - # this also shows the earlier monkey-patching is no longer in effect - context.load() - model = context.get_model("sushi.python_view_model") - assert model.kind.name == ModelKindName.FULL - assert model.source_type == "python" - - plan = context.plan( - categorizer_config=CategorizerConfig.all_full() - ) # the default categorizer_config doesnt auto-categorize python models - - assert plan.has_changes - assert not plan.indirectly_modified - - assert len(plan.directly_modified) == 1 - snapshot_id = list(plan.directly_modified)[0] - assert snapshot_id.name == '"memory"."sushi"."python_view_model"' - assert plan.modified_snapshots[snapshot_id].change_category == SnapshotChangeCategory.BREAKING - - context.apply(plan) - - df = context.engine_adapter.fetchdf("SELECT id FROM sushi.python_view_model") - assert df["id"].to_list() == [1] - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_of_full_model_with_start(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - restatement_plan = context.plan( - restate_models=["sushi.customers"], - start="2023-01-07", - auto_apply=True, - no_prompts=True, - ) - - sushi_customer_interval = restatement_plan.restatements[ - context.get_snapshot("sushi.customers").snapshot_id - ] - assert sushi_customer_interval == (to_timestamp("2023-01-01"), to_timestamp("2023-01-09")) - waiter_by_day_interval = restatement_plan.restatements[ - context.get_snapshot("sushi.waiter_as_customer_by_day").snapshot_id - ] - assert waiter_by_day_interval == (to_timestamp("2023-01-07"), to_timestamp("2023-01-08")) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_should_not_override_environment_statements(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - context.config.before_all = ["SELECT 'test_before_all';", *context.config.before_all] - context.load() - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - - prod_env_statements = context.state_reader.get_environment_statements(c.PROD) - assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" - - context.plan( - restate_models=["sushi.waiter_revenue_by_day"], - start="2023-01-07", - auto_apply=True, - no_prompts=True, - ) - - prod_env_statements = context.state_reader.get_environment_statements(c.PROD) - assert prod_env_statements[0].before_all[0] == "SELECT 'test_before_all';" - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_restatement_shouldnt_backfill_beyond_prod_intervals(init_and_plan_context: t.Callable): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.top_waiters") - context.upsert_model(SqlModel.parse_obj({**model.dict(), "cron": "@hourly"})) - - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - context.run() - - with time_machine.travel("2023-01-09 02:00:00 UTC"): - # It's time to backfill the waiter_revenue_by_day model but it hasn't run yet - restatement_plan = context.plan( - restate_models=["sushi.waiter_revenue_by_day"], - no_prompts=True, - skip_tests=True, - ) - intervals_by_id = {i.snapshot_id: i for i in restatement_plan.missing_intervals} - # Make sure the intervals don't go beyond the prod intervals - assert intervals_by_id[context.get_snapshot("sushi.top_waiters").snapshot_id].intervals[-1][ - 1 - ] == to_timestamp("2023-01-08 15:00:00 UTC") - assert intervals_by_id[ - context.get_snapshot("sushi.waiter_revenue_by_day").snapshot_id - ].intervals[-1][1] == to_timestamp("2023-01-08 00:00:00 UTC") - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -@use_terminal_console -def test_audit_only_metadata_change(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Add a new audit - model = context.get_model("sushi.waiter_revenue_by_day") - audits = model.audits.copy() - audits.append(("number_of_rows", {"threshold": exp.Literal.number(1)})) - model = model.copy(update={"audits": audits}) - context.upsert_model(model) - - plan = context.plan_builder("prod", skip_tests=True).build() - assert len(plan.new_snapshots) == 2 - assert all(s.change_category.is_metadata for s in plan.new_snapshots) - assert not plan.missing_intervals - - with capture_output() as output: - context.apply(plan) - - assert "Auditing models" in output.stdout - assert model.name in output.stdout - - -def initial_add(context: Context, environment: str): - assert not context.state_reader.get_environment(environment) - - plan = context.plan(environment, start=start(context), create_from="nonexistent_env") - validate_plan_changes(plan, added={x.snapshot_id for x in context.snapshots.values()}) - - context.apply(plan) - validate_apply_basics(context, environment, plan.snapshots.values()) - - -def test_plan_production_environment_statements(tmp_path: Path): - model_a = """ - MODEL ( - name test_schema.a, - kind FULL, - ); - - @IF( - @runtime_stage IN ('evaluating', 'creating'), - INSERT INTO schema_names_for_prod (physical_schema_name) VALUES (@resolve_template('@{schema_name}')) - ); - - SELECT 1 AS account_id - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - before_all = [ - "CREATE TABLE IF NOT EXISTS schema_names_for_@this_env (physical_schema_name VARCHAR)", - "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS should_create AS SELECT @runtime_stage)", - ] - after_all = [ - "@IF(@this_env = 'prod', CREATE TABLE IF NOT EXISTS after_t AS SELECT @var_5)", - "@IF(@runtime_stage = 'before_all', CREATE TABLE IF NOT EXISTS not_create AS SELECT @runtime_stage)", - ] - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=before_all, - after_all=after_all, - variables={"var_5": 5}, - ) - ctx = Context(paths=[tmp_path], config=config) - ctx.plan(auto_apply=True, no_prompts=True) - - before_t = ctx.fetchdf("select * from schema_names_for_prod").to_dict() - assert before_t["physical_schema_name"][0] == "sqlmesh__test_schema" - - after_t = ctx.fetchdf("select * from after_t").to_dict() - assert after_t["5"][0] == 5 - - environment_statements = ctx.state_reader.get_environment_statements(c.PROD) - assert environment_statements[0].before_all == before_all - assert environment_statements[0].after_all == after_all - assert environment_statements[0].python_env.keys() == {"__sqlmesh__vars__"} - assert environment_statements[0].python_env["__sqlmesh__vars__"].payload == "{'var_5': 5}" - - should_create = ctx.fetchdf("select * from should_create").to_dict() - assert should_create["before_all"][0] == "before_all" - - with pytest.raises( - Exception, match=r"Catalog Error: Table with name not_create does not exist!" - ): - ctx.fetchdf("select * from not_create") - - -def test_environment_statements_error_handling(tmp_path: Path): - model_a = """ - MODEL ( - name test_schema.a, - kind FULL, - ); - - SELECT 1 AS account_id - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - for path, defn in {"a.sql": model_a}.items(): - with open(models_dir / path, "w") as f: - f.write(defn) - - before_all = [ - "CREATE TABLE identical_table (physical_schema_name VARCHAR)", - "CREATE TABLE identical_table (physical_schema_name VARCHAR)", - ] - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=before_all, - ) - ctx = Context(paths=[tmp_path], config=config) - - expected_error_message = re.escape( - """An error occurred during execution of the following 'before_all' statement: - -CREATE TABLE identical_table (physical_schema_name TEXT) - -Catalog Error: Table with name "identical_table" already exists!""" - ) - - with pytest.raises(SQLMeshError, match=expected_error_message): - ctx.plan(auto_apply=True, no_prompts=True) - - after_all = [ - "@bad_macro()", - ] - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - after_all=after_all, - ) - ctx = Context(paths=[tmp_path], config=config) - - expected_error_message = re.escape( - """An error occurred during rendering of the 'after_all' statements: - -Failed to resolve macros for - -@bad_macro() - -Macro 'bad_macro' does not exist.""" - ) - - with pytest.raises(SQLMeshError, match=expected_error_message): - ctx.plan(auto_apply=True, no_prompts=True) - - -def test_before_all_after_all_execution_order(tmp_path: Path, mocker: MockerFixture): - model = """ - MODEL ( - name test_schema.model_that_depends_on_before_all, - kind FULL, - ); - - SELECT id, value FROM before_all_created_table - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - with open(models_dir / "model.sql", "w") as f: - f.write(model) - - # before_all statement that creates a table that the above model depends on - before_all_statement = ( - "CREATE TABLE IF NOT EXISTS before_all_created_table AS SELECT 1 AS id, 'test' AS value" - ) - - # after_all that depends on the model - after_all_statement = "CREATE TABLE IF NOT EXISTS after_all_created_table AS SELECT id, value FROM test_schema.model_that_depends_on_before_all" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=[before_all_statement], - after_all=[after_all_statement], - ) - - execute_calls: t.List[str] = [] - - original_duckdb_execute = DuckDBEngineAdapter.execute - - def track_duckdb_execute(self, expression, **kwargs): - sql = expression if isinstance(expression, str) else expression.sql(dialect="duckdb") - state_tables = [ - "_snapshots", - "_environments", - "_versions", - "_intervals", - "_auto_restatements", - "_environment_statements", - ] - - # to ignore the state queries - if not any(table in sql.lower() for table in state_tables): - execute_calls.append(sql) - - return original_duckdb_execute(self, expression, **kwargs) - - ctx = Context(paths=[tmp_path], config=config) - - # the plan would fail if the execution order ever changes and before_all statements dont execute first - ctx.plan(auto_apply=True, no_prompts=True) - - mocker.patch.object(DuckDBEngineAdapter, "execute", track_duckdb_execute) - - # run with the patched execute - ctx.run("prod", start="2023-01-01", end="2023-01-02") - - # validate explicitly that the first execute is for the before_all - assert "before_all_created_table" in execute_calls[0] - - # and that the last is the sole after all that depends on the model - assert "after_all_created_table" in execute_calls[-1] - - -@time_machine.travel("2025-03-08 00:00:00 UTC") -def test_tz(init_and_plan_context): - context, _ = init_and_plan_context("examples/sushi") - - model = context.get_model("sushi.waiter_revenue_by_day") - context.upsert_model( - SqlModel.parse_obj( - {**model.dict(), "cron_tz": "America/Los_Angeles", "start": "2025-03-07"} - ) - ) - - def assert_intervals(plan, intervals): - assert ( - next( - intervals.intervals - for intervals in plan.missing_intervals - if intervals.snapshot_id.name == model.fqn - ) - == intervals - ) - - plan = context.plan_builder("prod", skip_tests=True).build() - - # we have missing intervals but not waiter_revenue_by_day because it's not midnight pacific yet - assert plan.missing_intervals - - with pytest.raises(StopIteration): - assert_intervals(plan, []) - - # now we're ready 8AM UTC == midnight PST - with time_machine.travel("2025-03-08 08:00:00 UTC"): - plan = context.plan_builder("prod", skip_tests=True).build() - assert_intervals(plan, [(to_timestamp("2025-03-07"), to_timestamp("2025-03-08"))]) - - with time_machine.travel("2025-03-09 07:00:00 UTC"): - plan = context.plan_builder("prod", skip_tests=True).build() - - assert_intervals( - plan, - [ - (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), - ], - ) - - with time_machine.travel("2025-03-09 08:00:00 UTC"): - plan = context.plan_builder("prod", skip_tests=True).build() - - assert_intervals( - plan, - [ - (to_timestamp("2025-03-07"), to_timestamp("2025-03-08")), - (to_timestamp("2025-03-08"), to_timestamp("2025-03-09")), - ], - ) - - context.apply(plan) - - plan = context.plan_builder("prod", skip_tests=True).build() - assert not plan.missing_intervals - - -def apply_to_environment( - context: Context, - environment: str, - choice: t.Optional[SnapshotChangeCategory] = None, - plan_validators: t.Optional[t.Iterable[t.Callable]] = None, - apply_validators: t.Optional[t.Iterable[t.Callable]] = None, - plan_start: t.Optional[TimeLike] = None, - allow_destructive_models: t.Optional[t.List[str]] = None, - enable_preview: bool = False, -): - plan_validators = plan_validators or [] - apply_validators = apply_validators or [] - - plan_builder = context.plan_builder( - environment, - start=plan_start or start(context) if environment != c.PROD else None, - forward_only=choice == SnapshotChangeCategory.FORWARD_ONLY, - include_unmodified=True, - allow_destructive_models=allow_destructive_models if allow_destructive_models else [], - enable_preview=enable_preview, - ) - if environment != c.PROD: - plan_builder.set_start(plan_start or start(context)) - - if choice: - if choice == SnapshotChangeCategory.FORWARD_ONLY: - # FORWARD_ONLY is deprecated, fallback to NON_BREAKING to keep the existing tests - choice = SnapshotChangeCategory.NON_BREAKING - plan_choice(plan_builder, choice) - for validator in plan_validators: - validator(context, plan_builder.build()) - - plan = plan_builder.build() - context.apply(plan) - - validate_apply_basics(context, environment, plan.snapshots.values(), plan.deployability_index) - for validator in apply_validators: - validator(context) - return plan - - -def change_data_type( - context: Context, model_name: str, old_type: DataType.Type, new_type: DataType.Type -) -> None: - model = context.get_model(model_name) - assert model is not None - - if isinstance(model, SqlModel): - query = model.query.copy() - data_types = query.find_all(DataType) - for data_type in data_types: - if data_type.this == old_type: - data_type.set("this", new_type) - context.upsert_model(model_name, query_=ParsableSql(sql=query.sql(dialect=model.dialect))) - elif model.columns_to_types_ is not None: - for k, v in model.columns_to_types_.items(): - if v.this == old_type: - model.columns_to_types_[k] = DataType.build(new_type) - context.upsert_model(model_name, columns=model.columns_to_types_) - - -def validate_plan_changes( - plan: Plan, - *, - added: t.Optional[t.Iterable[SnapshotId]] = None, - modified: t.Optional[t.Iterable[str]] = None, - removed: t.Optional[t.Iterable[SnapshotId]] = None, -) -> None: - added = added or [] - modified = modified or [] - removed = removed or [] - assert set(added) == plan.context_diff.added - assert set(modified) == set(plan.context_diff.modified_snapshots) - assert set(removed) == set(plan.context_diff.removed_snapshots) - - -def validate_versions_same( - model_names: t.List[str], - versions: t.Dict[str, str], - other_versions: t.Dict[str, str], -) -> None: - for name in model_names: - assert versions[name] == other_versions[name] - - -def validate_versions_different( - model_names: t.List[str], - versions: t.Dict[str, str], - other_versions: t.Dict[str, str], -) -> None: - for name in model_names: - assert versions[name] != other_versions[name] - - -def validate_apply_basics( - context: Context, - environment: str, - snapshots: t.Iterable[Snapshot], - deployability_index: t.Optional[DeployabilityIndex] = None, -) -> None: - validate_snapshots_in_state_sync(snapshots, context) - validate_state_sync_environment(snapshots, environment, context) - validate_tables(snapshots, context, deployability_index) - validate_environment_views(snapshots, environment, context, deployability_index) - - -def validate_snapshots_in_state_sync(snapshots: t.Iterable[Snapshot], context: Context) -> None: - snapshot_infos = map(to_snapshot_info, snapshots) - state_sync_table_infos = map( - to_snapshot_info, context.state_reader.get_snapshots(snapshots).values() - ) - assert set(snapshot_infos) == set(state_sync_table_infos) - - -def validate_state_sync_environment( - snapshots: t.Iterable[Snapshot], env: str, context: Context -) -> None: - environment = context.state_reader.get_environment(env) - assert environment - snapshot_infos = map(to_snapshot_info, snapshots) - environment_table_infos = map(to_snapshot_info, environment.snapshots) - assert set(snapshot_infos) == set(environment_table_infos) - - -def validate_tables( - snapshots: t.Iterable[Snapshot], - context: Context, - deployability_index: t.Optional[DeployabilityIndex] = None, -) -> None: - adapter = context.engine_adapter - deployability_index = deployability_index or DeployabilityIndex.all_deployable() - for snapshot in snapshots: - is_deployable = deployability_index.is_representative(snapshot) - if not snapshot.is_model or snapshot.is_external: - continue - table_should_exist = not snapshot.is_embedded - assert adapter.table_exists(snapshot.table_name(is_deployable)) == table_should_exist - if table_should_exist: - assert select_all(snapshot.table_name(is_deployable), adapter) - - -def validate_environment_views( - snapshots: t.Iterable[Snapshot], - environment: str, - context: Context, - deployability_index: t.Optional[DeployabilityIndex] = None, -) -> None: - adapter = context.engine_adapter - deployability_index = deployability_index or DeployabilityIndex.all_deployable() - for snapshot in snapshots: - is_deployable = deployability_index.is_representative(snapshot) - if not snapshot.is_model or snapshot.is_symbolic: - continue - view_name = snapshot.qualified_view_name.for_environment( - EnvironmentNamingInfo.from_environment_catalog_mapping( - context.config.environment_catalog_mapping, - name=environment, - suffix_target=context.config.environment_suffix_target, - ) - ) - - assert adapter.table_exists(view_name) - assert select_all(snapshot.table_name(is_deployable), adapter) == select_all( - view_name, adapter - ) - - -def select_all(table: str, adapter: EngineAdapter) -> t.Iterable: - return adapter.fetchall(f"select * from {table} order by 1") - - -def snapshots_to_versions(snapshots: t.Iterable[Snapshot]) -> t.Dict[str, str]: - return {snapshot.name: snapshot.version or "" for snapshot in snapshots} - - -def to_snapshot_info(snapshot: SnapshotInfoLike) -> SnapshotTableInfo: - return snapshot.table_info - - -def start(context: Context) -> TimeLike: - env = context.state_sync.get_environment("prod") - assert env - return env.start_at - - -def add_projection_to_model(model: SqlModel, literal: bool = True) -> SqlModel: - one_expr = exp.Literal.number(1).as_("one") if literal else exp.column("one") - kwargs = { - **model.dict(), - "query": model.query.select(one_expr), # type: ignore - } - return SqlModel.parse_obj(kwargs) - - -def test_plan_environment_statements_doesnt_cause_extra_diff(tmp_path: Path): - model_a = """ - MODEL ( - name test_schema.a, - kind FULL, - ); - - SELECT 1; - """ - - models_dir = tmp_path / "models" - models_dir.mkdir() - - (models_dir / "a.sql").write_text(model_a) - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - before_all=["select 1 as before_all"], - after_all=["select 2 as after_all"], - ) - ctx = Context(paths=[tmp_path], config=config) - - # first plan - should apply changes - assert ctx.plan(auto_apply=True, no_prompts=True).has_changes - - # second plan - nothing has changed so should report no changes - assert not ctx.plan(auto_apply=True, no_prompts=True).has_changes - - -def test_janitor_cleanup_order(mocker: MockerFixture, tmp_path: Path): - def setup_scenario(): - models_dir = tmp_path / "models" - - if not models_dir.exists(): - models_dir.mkdir() - - model1_path = models_dir / "model1.sql" - - with open(model1_path, "w") as f: - f.write("MODEL(name test.model1, kind FULL); SELECT 1 AS col") - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - ) - ctx = Context(paths=[tmp_path], config=config) - - ctx.plan("dev", no_prompts=True, auto_apply=True) - - model1_snapshot = ctx.get_snapshot("test.model1") - - # Delete the model file to cause a snapshot expiration - model1_path.unlink() - - ctx.load() - - ctx.plan("dev", no_prompts=True, auto_apply=True) - - # Invalidate the environment to cause an environment cleanup - ctx.invalidate_environment("dev") - - try: - ctx._run_janitor(ignore_ttl=True) - except: - pass - - return ctx, model1_snapshot - - # Case 1: Assume that the snapshot cleanup yields an error, the snapshot records - # should still exist in the state sync so the next janitor can retry - mocker.patch( - "sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup", - side_effect=Exception("snapshot cleanup error"), - ) - ctx, model1_snapshot = setup_scenario() - - # - Check that the snapshot record exists in the state sync - state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) - assert state_snapshot - - # - Run the janitor again, this time it should succeed - mocker.patch("sqlmesh.core.snapshot.evaluator.SnapshotEvaluator.cleanup") - ctx._run_janitor(ignore_ttl=True) - - # - Check that the snapshot record does not exist in the state sync anymore - state_snapshot = ctx.state_sync.state_sync.get_snapshots([model1_snapshot.snapshot_id]) - assert not state_snapshot - - # Case 2: Assume that the view cleanup yields an error, the enviroment - # record should still exist - mocker.patch( - "sqlmesh.core.context.cleanup_expired_views", side_effect=Exception("view cleanup error") - ) - ctx, model1_snapshot = setup_scenario() - - views = ctx.fetchdf("FROM duckdb_views() SELECT * EXCLUDE(sql) WHERE NOT internal") - assert views.empty - - # - Check that the environment record exists in the state sync - assert ctx.state_sync.get_environment("dev") - - # - Run the janitor again, this time it should succeed - mocker.patch("sqlmesh.core.context.cleanup_expired_views") - ctx._run_janitor(ignore_ttl=True) - - # - Check that the environment record does not exist in the state sync anymore - assert not ctx.state_sync.get_environment("dev") - - -@use_terminal_console -def test_destroy(copy_to_temp_path): - # Testing project with two gateways to verify cleanup is performed across engines - paths = copy_to_temp_path("tests/fixtures/multi_virtual_layer") - path = Path(paths[0]) - first_db_path = str(path / "db_1.db") - second_db_path = str(path / "db_2.db") - - config = Config( - gateways={ - "first": GatewayConfig( - connection=DuckDBConnectionConfig(database=first_db_path), - variables={"overriden_var": "gateway_1"}, - ), - "second": GatewayConfig( - connection=DuckDBConnectionConfig(database=second_db_path), - variables={"overriden_var": "gateway_2"}, - ), - }, - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - model_naming=NameInferenceConfig(infer_names=True), - default_gateway="first", - gateway_managed_virtual_layer=True, - variables={"overriden_var": "global", "global_one": 88}, - ) - - context = Context(paths=paths, config=config) - plan = context.plan_builder().build() - assert len(plan.new_snapshots) == 4 - context.apply(plan) - - # Confirm cache exists - cache_path = Path(path) / ".cache" - assert cache_path.exists() - assert len(list(cache_path.iterdir())) > 0 - - model = context.get_model("db_1.first_schema.model_one") - - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'c' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder().build() - context.apply(plan) - - state_environments = context.state_reader.get_environments() - state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) - - assert len(state_snapshots) == len(state_environments[0].snapshots) - - # Create dev environment with changed models - model = context.get_model("db_2.second_schema.model_one") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d' AS extra").sql(dialect=model.dialect) - ) - } - ) - ) - model = context.get_model("first_schema.model_two") - context.upsert_model( - model.copy( - update={ - "query_": ParsableSql( - sql=model.query.select("'d2' AS col").sql(dialect=model.dialect) - ) - } - ) - ) - plan = context.plan_builder("dev").build() - context.apply(plan) - - dev_environment = context.state_sync.get_environment("dev") - assert dev_environment is not None - - state_environments = context.state_reader.get_environments() - state_snapshots = context.state_reader.get_snapshots(context.snapshots.values()) - assert ( - len(state_snapshots) - == len(state_environments[0].snapshots) - == len(state_environments[1].snapshots) - ) - - # The state tables at this point should be able to be retrieved - state_tables = { - "_environments", - "_snapshots", - "_intervals", - "_auto_restatements", - "_environment_statements", - "_intervals", - "_versions", - } - for table_name in state_tables: - context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") - - # The actual tables as well - context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_one") - context.engine_adapters["second"].fetchdf(f"SELECT * FROM db_2.second_schema.model_two") - context.fetchdf(f"SELECT * FROM db_1.first_schema.model_one") - context.fetchdf(f"SELECT * FROM db_1.first_schema.model_two") - - # Use the destroy command to remove all data objects and state - # Mock the console confirmation to automatically return True - with patch.object(context.console, "_confirm", return_value=True): - context._destroy() - - # Ensure all tables have been removed - for table_name in state_tables: - with pytest.raises( - Exception, match=f"Catalog Error: Table with name {table_name} does not exist!" - ): - context.fetchdf(f"SELECT * FROM db_1.sqlmesh.{table_name}") - - # Validate tables have been deleted as well - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_two does not exist!" - ): - context.fetchdf("SELECT * FROM db_1.first_schema.model_two") - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_one does not exist!" - ): - context.fetchdf("SELECT * FROM db_1.first_schema.model_one") - - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_two does not exist!" - ): - context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_two") - with pytest.raises( - Exception, match=r"Catalog Error: Table with name model_one does not exist!" - ): - context.engine_adapters["second"].fetchdf("SELECT * FROM db_2.second_schema.model_one") - - # Ensure the cache has been removed - assert not cache_path.exists() - - -@use_terminal_console -def test_audits_running_on_metadata_changes(tmp_path: Path): - def setup_senario(model_before: str, model_after: str): - models_dir = Path("models") - create_temp_file(tmp_path, models_dir / "test.sql", model_before) - - # Create first snapshot - context = Context(paths=tmp_path, config=Config()) - context.plan("prod", no_prompts=True, auto_apply=True) - - # Create second (metadata) snapshot - create_temp_file(tmp_path, models_dir / "test.sql", model_after) - context.load() - - with capture_output() as output: - with pytest.raises(PlanError): - context.plan("prod", no_prompts=True, auto_apply=True) - - assert 'Failed models\n\n "model"' in output.stdout - - return output - - # Ensure incorrect audits (bad data, incorrect definition etc) are evaluated immediately - output = setup_senario( - "MODEL (name model); SELECT NULL AS col", - "MODEL (name model, audits (not_null(columns=[col]))); SELECT NULL AS col", - ) - assert "'not_null' audit error: 1 row failed" in output.stdout - - output = setup_senario( - "MODEL (name model); SELECT NULL AS col", - "MODEL (name model, audits (not_null(columns=[this_col_does_not_exist]))); SELECT NULL AS col", - ) - assert ( - 'Binder Error: Referenced column "this_col_does_not_exist" not found in \nFROM clause!' - in output.stdout - ) - - -@pytest.mark.set_default_connection(disable=True) -def test_missing_connection_config(): - # This is testing the actual implementation of Config.get_connection - # To make writing tests easier, it's patched by the autouse fixture provide_sqlmesh_default_connection - # Case 1: No default_connection or gateways specified should raise a ConfigError - with pytest.raises(ConfigError): - ctx = Context(config=Config()) - - # Case 2: No connection specified in the gateway should raise a ConfigError - with pytest.raises(ConfigError): - ctx = Context(config=Config(gateways={"incorrect": GatewayConfig()})) - - # Case 3: Specifying a default_connection or connection in the gateway should work - ctx = Context(config=Config(default_connection=DuckDBConnectionConfig())) - ctx = Context( - config=Config(gateways={"default": GatewayConfig(connection=DuckDBConnectionConfig())}) - ) - - -@use_terminal_console -def test_render_path_instead_of_model(tmp_path: Path): - create_temp_file(tmp_path, Path("models/test.sql"), "MODEL (name test_model); SELECT 1 AS col") - ctx = Context(paths=tmp_path, config=Config()) - - # Case 1: Fail gracefully when the user is passing in a path instead of a model name - for test_model in ["models/test.sql", "models/test.py"]: - with pytest.raises( - SQLMeshError, - match="Resolving models by path is not supported, please pass in the model name instead.", - ): - ctx.render(test_model) - - # Case 2: Fail gracefully when the model name is not found - with pytest.raises(SQLMeshError, match="Cannot find model with name 'incorrect_model'"): - ctx.render("incorrect_model") - - # Case 3: Render the model successfully - assert ctx.render("test_model").sql() == 'SELECT 1 AS "col"' - - -@use_terminal_console -def test_plan_always_recreate_environment(tmp_path: Path): - def plan_with_output(ctx: Context, environment: str): - with patch.object(logger, "info") as mock_logger: - with capture_output() as output: - ctx.load() - ctx.plan(environment, no_prompts=True, auto_apply=True) - - # Facade logs info "Promoting environment {environment}" - assert mock_logger.call_args[0][1] == environment - - return output - - models_dir = tmp_path / "models" - - logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") - - create_temp_file( - tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" - ) - - config = Config(plan=PlanConfig(always_recreate_environment=True)) - ctx = Context(paths=[tmp_path], config=config) - - # Case 1: Neither prod nor dev exists, so dev is initialized - output = plan_with_output(ctx, "dev") - - assert """`dev` environment will be initialized""" in output.stdout - - # Case 2: Prod does not exist, so dev is updated - create_temp_file( - tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 5 AS col" - ) - - output = plan_with_output(ctx, "dev") - assert "`dev` environment will be initialized" in output.stdout - - # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod - output = plan_with_output(ctx, "prod") - assert "`prod` environment will be initialized" in output.stdout - - # Case 4: Dev is updated with a breaking change. Prod exists now so plan comparisons moving forward should be against prod - create_temp_file( - tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" - ) - ctx.load() - - plan = ctx.plan_builder("dev").build() - - assert ( - next(iter(plan.context_diff.snapshots.values())).change_category - == SnapshotChangeCategory.BREAKING - ) - - output = plan_with_output(ctx, "dev") - assert "New environment `dev` will be created from `prod`" in output.stdout - assert "Differences from the `prod` environment" in output.stdout - - # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes - # so it's still classified as a breaking change - create_temp_file( - tmp_path, - models_dir / "a.sql", - "MODEL (name test.a, kind FULL, owner 'test'); SELECT 10 AS col", - ) - ctx.load() - - plan = ctx.plan_builder("dev").build() - - assert ( - next(iter(plan.context_diff.snapshots.values())).change_category - == SnapshotChangeCategory.BREAKING - ) - - output = plan_with_output(ctx, "dev") - assert "New environment `dev` will be created from `prod`" in output.stdout - assert "Differences from the `prod` environment" in output.stdout - - stdout_rstrip = "\n".join([line.rstrip() for line in output.stdout.split("\n")]) - assert ( - """MODEL ( - name test.a, -+ owner test, - kind FULL - ) - SELECT -- 5 AS col -+ 10 AS col""" - in stdout_rstrip - ) - - # Case 6: Ensure that target environment and create_from environment are not the same - output = plan_with_output(ctx, "prod") - assert not "New environment `prod` will be created from `prod`" in output.stdout - - # Case 7: Check that we can still run Context::diff() against any environment - for environment in ["dev", "prod"]: - context_diff = ctx._context_diff(environment) - assert context_diff.environment == environment - - -@time_machine.travel("2020-01-01 00:00:00 UTC") -def test_scd_type_2_full_restatement_no_start_date(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - # Initial product catalog of 3 products - raw_products = d.parse(""" - MODEL ( - name memory.store.raw_products, - kind FULL - ); - - SELECT * FROM VALUES - (101, 'Laptop Pro', 1299.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), - (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), - (103, 'Office Chair', 199.99, 'Furniture', '2020-01-01 00:00:00'::TIMESTAMP) - AS t(product_id, product_name, price, category, last_updated); - """) - - # SCD Type 2 model for product history tracking - product_history = d.parse(""" - MODEL ( - name memory.store.product_history, - kind SCD_TYPE_2_BY_TIME ( - unique_key product_id, - updated_at_name last_updated, - disable_restatement false - ), - owner catalog_team, - cron '0 */6 * * *', - grain product_id, - description 'Product catalog change history' - ); - - SELECT - product_id::INT AS product_id, - product_name::TEXT AS product_name, - price::DECIMAL(10,2) AS price, - category::TEXT AS category, - last_updated AS last_updated - FROM - memory.store.raw_products; - """) - - raw_products_model = load_sql_based_model(raw_products) - product_history_model = load_sql_based_model(product_history) - context.upsert_model(raw_products_model) - context.upsert_model(product_history_model) - - # Initial plan and apply - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - query = "SELECT product_id, product_name, price, category, last_updated, valid_from, valid_to FROM memory.store.product_history ORDER BY product_id, valid_from" - initial_data = context.engine_adapter.fetchdf(query) - - # Validate initial state of 3 products all active - assert len(initial_data) == 3 - assert initial_data["valid_to"].isna().all() - initial_product_names = set(initial_data["product_name"].tolist()) - assert initial_product_names == {"Laptop Pro", "Wireless Mouse", "Office Chair"} - - # Price update and category change - with time_machine.travel("2020-01-15 12:00:00 UTC"): - raw_products_v2 = d.parse(""" - MODEL ( - name memory.store.raw_products, - kind FULL - ); - - SELECT * FROM VALUES - (101, 'Laptop Pro', 1199.99, 'Electronics', '2020-01-15 00:00:00'::TIMESTAMP), - (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP), - (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP) - AS t(product_id, product_name, price, category, last_updated); - """) - raw_products_v2_model = load_sql_based_model(raw_products_v2) - context.upsert_model(raw_products_v2_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - context.run() - - data_after_first_change = context.engine_adapter.fetchdf(query) - - # Should have 5 records (3 original closed, 2 new activε, 1 unchanged) - assert len(data_after_first_change) == 5 - - # Second change - with time_machine.travel("2020-02-01 10:00:00 UTC"): - raw_products_v3 = d.parse(""" - MODEL ( - name memory.store.raw_products, - kind FULL - ); - - SELECT * FROM VALUES - (101, 'Laptop Pro Max', 1399.99, 'Electronics', '2020-02-01 00:00:00'::TIMESTAMP), - (103, 'Ergonomic Office Chair', 229.99, 'Office Furniture', '2020-01-15 00:00:00'::TIMESTAMP), - (102, 'Wireless Mouse', 49.99, 'Electronics', '2020-01-01 00:00:00'::TIMESTAMP) - AS t(product_id, product_name, price, category, last_updated); - """) - raw_products_v3_model = load_sql_based_model(raw_products_v3) - context.upsert_model(raw_products_v3_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - context.run() - data_after_second_change = context.engine_adapter.fetchdf(query) - assert len(data_after_second_change) == 6 - - # Store the current state before full restatement - data_before_full_restatement = data_after_second_change.copy() - - # Perform full restatement (no start date provided) - with time_machine.travel("2020-02-01 15:00:00 UTC"): - plan = context.plan_builder( - "prod", skip_tests=True, restate_models=["memory.store.product_history"] - ).build() - context.apply(plan) - data_after_full_restatement = context.engine_adapter.fetchdf(query) - assert len(data_after_full_restatement) == 3 - - # Check that all currently active products before restatement are still active after restatement - active_before = data_before_full_restatement[ - data_before_full_restatement["valid_to"].isna() - ] - active_after = data_after_full_restatement - assert set(active_before["product_id"]) == set(active_after["product_id"]) - - expected_products = { - 101: { - "product_name": "Laptop Pro Max", - "price": 1399.99, - "category": "Electronics", - "last_updated": "2020-02-01", - }, - 102: { - "product_name": "Wireless Mouse", - "price": 49.99, - "category": "Electronics", - "last_updated": "2020-01-01", - }, - 103: { - "product_name": "Ergonomic Office Chair", - "price": 229.99, - "category": "Office Furniture", - "last_updated": "2020-01-15", - }, - } - for _, row in data_after_full_restatement.iterrows(): - pid = row["product_id"] - assert pid in expected_products - expected = expected_products[pid] - assert row["product_name"] == expected["product_name"] - assert float(row["price"]) == expected["price"] - assert row["category"] == expected["category"] - - # valid_from should be the epoch, valid_to should be NaT - assert str(row["valid_from"]) == "1970-01-01 00:00:00" - assert pd.isna(row["valid_to"]) - - -def test_plan_evaluator_correlation_id(tmp_path: Path): - def _correlation_id_in_sqls(correlation_id: CorrelationId, mock_logger): - sqls = [call[0][0] for call in mock_logger.call_args_list] - return any(f"/* {correlation_id} */" in sql for sql in sqls) - - ctx = Context(paths=[tmp_path], config=Config()) - - # Case: Ensure that the correlation id (plan_id) is included in the SQL for each plan - for i in range(2): - create_temp_file( - tmp_path, - Path("models", "test.sql"), - f"MODEL (name test.a, kind FULL); SELECT {i} AS col", - ) - - with mock.patch("sqlmesh.core.engine_adapter.base.EngineAdapter._log_sql") as mock_logger: - ctx.load() - plan = ctx.plan(auto_apply=True, no_prompts=True) - - correlation_id = CorrelationId.from_plan_id(plan.plan_id) - assert str(correlation_id) == f"SQLMESH_PLAN: {plan.plan_id}" - - assert _correlation_id_in_sqls(correlation_id, mock_logger) - - -@time_machine.travel("2023-01-08 15:00:00 UTC") -def test_scd_type_2_regular_run_with_offset(init_and_plan_context: t.Callable): - context, plan = init_and_plan_context("examples/sushi") - context.apply(plan) - - raw_employee_status = d.parse(""" - MODEL ( - name memory.hr_system.raw_employee_status, - kind FULL - ); - - SELECT - 1001 AS employee_id, - 'engineering' AS department, - 'EMEA' AS region, - '2023-01-08 15:00:00 UTC' AS last_modified; - """) - - employee_history = d.parse(""" - MODEL ( - name memory.hr_system.employee_history, - kind SCD_TYPE_2_BY_TIME ( - unique_key employee_id, - updated_at_name last_modified, - disable_restatement false - ), - owner hr_analytics, - cron '0 7 * * *', - grain employee_id, - description 'Historical tracking of employee status changes' - ); - - SELECT - employee_id::INT AS employee_id, - department::TEXT AS department, - region::TEXT AS region, - last_modified AS last_modified - FROM - memory.hr_system.raw_employee_status; - """) - - raw_employee_status_model = load_sql_based_model(raw_employee_status) - employee_history_model = load_sql_based_model(employee_history) - context.upsert_model(raw_employee_status_model) - context.upsert_model(employee_history_model) - - # Initial plan and apply - plan = context.plan_builder("prod", skip_tests=True).build() - context.apply(plan) - - query = "SELECT employee_id, department, region, valid_from, valid_to FROM memory.hr_system.employee_history ORDER BY employee_id, valid_from" - initial_data = context.engine_adapter.fetchdf(query) - - assert len(initial_data) == 1 - assert initial_data["valid_to"].isna().all() - assert initial_data["department"].tolist() == ["engineering"] - assert initial_data["region"].tolist() == ["EMEA"] - - # Apply a future plan with source changes a few hours before the cron time of the SCD Type 2 model BUT on the same day - with time_machine.travel("2023-01-09 00:10:00 UTC"): - raw_employee_status_v2 = d.parse(""" - MODEL ( - name memory.hr_system.raw_employee_status, - kind FULL - ); - - SELECT - 1001 AS employee_id, - 'engineering' AS department, - 'AMER' AS region, - '2023-01-09 00:10:00 UTC' AS last_modified; - """) - raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) - context.upsert_model(raw_employee_status_v2_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - - # The 7th hour of the day the run is kicked off for the SCD Type 2 model - with time_machine.travel("2023-01-09 07:00:01 UTC"): - context.run() - data_after_change = context.engine_adapter.fetchdf(query) - - # Validate the SCD2 records for employee 1001 - assert len(data_after_change) == 2 - assert data_after_change.iloc[0]["employee_id"] == 1001 - assert data_after_change.iloc[0]["department"] == "engineering" - assert data_after_change.iloc[0]["region"] == "EMEA" - assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" - assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" - assert data_after_change.iloc[1]["employee_id"] == 1001 - assert data_after_change.iloc[1]["department"] == "engineering" - assert data_after_change.iloc[1]["region"] == "AMER" - assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" - assert pd.isna(data_after_change.iloc[1]["valid_to"]) - - # Update source model again a bit later on the same day - raw_employee_status_v2 = d.parse(""" - MODEL ( - name memory.hr_system.raw_employee_status, - kind FULL - ); - - SELECT - 1001 AS employee_id, - 'sales' AS department, - 'ANZ' AS region, - '2023-01-09 07:26:00 UTC' AS last_modified; - """) - raw_employee_status_v2_model = load_sql_based_model(raw_employee_status_v2) - context.upsert_model(raw_employee_status_v2_model) - context.plan( - auto_apply=True, no_prompts=True, categorizer_config=CategorizerConfig.all_full() - ) - - # A day later the run is kicked off for the SCD Type 2 model again - with time_machine.travel("2023-01-10 07:00:00 UTC"): - context.run() - data_after_change = context.engine_adapter.fetchdf(query) - - # Validate the SCD2 history for employee 1001 after second change with the historical records intact - assert len(data_after_change) == 3 - assert data_after_change.iloc[0]["employee_id"] == 1001 - assert data_after_change.iloc[0]["department"] == "engineering" - assert data_after_change.iloc[0]["region"] == "EMEA" - assert str(data_after_change.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" - assert str(data_after_change.iloc[0]["valid_to"]) == "2023-01-09 00:10:00" - assert data_after_change.iloc[1]["employee_id"] == 1001 - assert data_after_change.iloc[1]["department"] == "engineering" - assert data_after_change.iloc[1]["region"] == "AMER" - assert str(data_after_change.iloc[1]["valid_from"]) == "2023-01-09 00:10:00" - assert str(data_after_change.iloc[1]["valid_to"]) == "2023-01-09 07:26:00" - assert data_after_change.iloc[2]["employee_id"] == 1001 - assert data_after_change.iloc[2]["department"] == "sales" - assert data_after_change.iloc[2]["region"] == "ANZ" - assert str(data_after_change.iloc[2]["valid_from"]) == "2023-01-09 07:26:00" - assert pd.isna(data_after_change.iloc[2]["valid_to"]) - - # Now test restatement works (full restatement support currently) - with time_machine.travel("2023-01-10 07:38:00 UTC"): - plan = context.plan_builder( - "prod", - skip_tests=True, - restate_models=["memory.hr_system.employee_history"], - start="2023-01-09 00:10:00", - ).build() - context.apply(plan) - restated_data = context.engine_adapter.fetchdf(query) - - # Validate the SCD2 history after restatement has been wiped bar one - assert len(restated_data) == 1 - assert restated_data.iloc[0]["employee_id"] == 1001 - assert restated_data.iloc[0]["department"] == "sales" - assert restated_data.iloc[0]["region"] == "ANZ" - assert str(restated_data.iloc[0]["valid_from"]) == "1970-01-01 00:00:00" - assert pd.isna(restated_data.iloc[0]["valid_to"]) - - -def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path): - paths = copy_to_temp_path("examples/multi") - repo_1_path = paths[0] / "repo_1" - repo_2_path = paths[0] / "repo_2" - - # Add an extra gateway to repo_2's config - repo_2_config_path = repo_2_path / "config.yaml" - config_content = repo_2_config_path.read_text() - - modified_config = config_content.replace( - "default_gateway: local", - dedent(""" - extra: - connection: - type: duckdb - database: extra.duckdb - - default_gateway: local - """), - ) - - repo_2_config_path.write_text(modified_config) - - # Create context with both repos but using the repo_1 path first - context = Context( - paths=(repo_1_path, repo_2_path), - gateway="memory", - ) - - # Verify all gateways from both repos are present - gathered_gateways = context.engine_adapters.keys() - expected_gateways = {"local", "memory", "extra"} - assert gathered_gateways == expected_gateways - - -def test_physical_table_naming_strategy_table_only(copy_to_temp_path: t.Callable): - sushi_context = Context( - paths=copy_to_temp_path("examples/sushi"), - config="table_only_naming_config", - ) - - assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.TABLE_ONLY - sushi_context.plan(auto_apply=True) - - adapter = sushi_context.engine_adapter - - snapshot_tables = [ - dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) - for r in adapter.fetchall( - "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" - ) - ] - - assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) - - prod_env = sushi_context.state_reader.get_environment("prod") - assert prod_env - - prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) - - assert all( - s.table_naming_convention == TableNamingConvention.TABLE_ONLY - for s in prod_env_snapshots.values() - ) - - -def test_physical_table_naming_strategy_hash_md5(copy_to_temp_path: t.Callable): - sushi_context = Context( - paths=copy_to_temp_path("examples/sushi"), - config="hash_md5_naming_config", - ) - - assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.HASH_MD5 - sushi_context.plan(auto_apply=True) - - adapter = sushi_context.engine_adapter - - snapshot_tables = [ - dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) - for r in adapter.fetchall( - "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" - ) - ] - - assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) - assert all([t["table"].startswith("sqlmesh_md5") for t in snapshot_tables]) - - prod_env = sushi_context.state_reader.get_environment("prod") - assert prod_env - - prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) - - assert all( - s.table_naming_convention == TableNamingConvention.HASH_MD5 - for s in prod_env_snapshots.values() - ) - - -@pytest.mark.slow -def test_default_audits_applied_in_plan(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - - # Create a model with data that will pass the audits - create_temp_file( - tmp_path, - models_dir / "orders.sql", - dedent(""" - MODEL ( - name test.orders, - kind FULL - ); - - SELECT - 1 AS order_id, - 'customer_1' AS customer_id, - 100.50 AS amount, - '2024-01-01'::DATE AS order_date - UNION ALL - SELECT - 2 AS order_id, - 'customer_2' AS customer_id, - 200.75 AS amount, - '2024-01-02'::DATE AS order_date - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", - audits=[ - "not_null(columns := [order_id, customer_id])", - "unique_values(columns := [order_id])", - ], - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Create and apply plan, here audits should pass - plan = context.plan("prod", no_prompts=True) - context.apply(plan) - - # Verify model has the default audits - model = context.get_model("test.orders") - assert len(model.audits) == 2 - - audit_names = [audit[0] for audit in model.audits] - assert "not_null" in audit_names - assert "unique_values" in audit_names - - # Verify audit arguments are preserved - for audit_name, audit_args in model.audits: - if audit_name == "not_null": - assert "columns" in audit_args - columns = [col.name for col in audit_args["columns"].expressions] - assert "order_id" in columns - assert "customer_id" in columns - elif audit_name == "unique_values": - assert "columns" in audit_args - columns = [col.name for col in audit_args["columns"].expressions] - assert "order_id" in columns - - -@pytest.mark.slow -def test_default_audits_fail_on_bad_data(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - - # Create a model with data that violates NOT NULL constraint - create_temp_file( - tmp_path, - models_dir / "bad_orders.sql", - dedent(""" - MODEL ( - name test.bad_orders, - kind FULL - ); - - SELECT - 1 AS order_id, - NULL AS customer_id, -- This violates NOT NULL - 100.50 AS amount, - '2024-01-01'::DATE AS order_date - UNION ALL - SELECT - 2 AS order_id, - 'customer_2' AS customer_id, - 200.75 AS amount, - '2024-01-02'::DATE AS order_date - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", audits=["not_null(columns := [customer_id])"] - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Plan should fail due to audit failure - with pytest.raises(PlanError): - context.plan("prod", no_prompts=True, auto_apply=True) - - -@pytest.mark.slow -def test_default_audits_with_model_specific_audits(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - audits_dir = tmp_path / "audits" - audits_dir.mkdir(exist_ok=True) - - create_temp_file( - tmp_path, - audits_dir / "range_check.sql", - dedent(""" - AUDIT ( - name range_check - ); - - SELECT * FROM @this_model - WHERE @column < @min_value OR @column > @max_value - """), - ) - - # Create a model with its own audits in addition to defaults - create_temp_file( - tmp_path, - models_dir / "products.sql", - dedent(""" - MODEL ( - name test.products, - kind FULL, - audits ( - range_check(column := price, min_value := 0, max_value := 10000) - ) - ); - - SELECT - 1 AS product_id, - 'Widget' AS product_name, - 99.99 AS price - UNION ALL - SELECT - 2 AS product_id, - 'Gadget' AS product_name, - 149.99 AS price - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", - audits=[ - "not_null(columns := [product_id, product_name])", - "unique_values(columns := [product_id])", - ], - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Create and apply plan - plan = context.plan("prod", no_prompts=True) - context.apply(plan) - - # Verify model has both default and model-specific audits - model = context.get_model("test.products") - assert len(model.audits) == 3 - - audit_names = [audit[0] for audit in model.audits] - assert "not_null" in audit_names - assert "unique_values" in audit_names - assert "range_check" in audit_names - - # Verify audit execution order, default audits first then model-specific - assert model.audits[0][0] == "not_null" - assert model.audits[1][0] == "unique_values" - assert model.audits[2][0] == "range_check" - - -@pytest.mark.slow -def test_default_audits_with_custom_audit_definitions(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir(exist_ok=True) - audits_dir = tmp_path / "audits" - audits_dir.mkdir(exist_ok=True) - - # Create custom audit definition - create_temp_file( - tmp_path, - audits_dir / "positive_amount.sql", - dedent(""" - AUDIT ( - name positive_amount - ); - - SELECT * FROM @this_model - WHERE @column <= 0 - """), - ) - - # Create a model - create_temp_file( - tmp_path, - models_dir / "transactions.sql", - dedent(""" - MODEL ( - name test.transactions, - kind FULL - ); - - SELECT - 1 AS transaction_id, - 'TXN001' AS transaction_code, - 250.00 AS amount, - '2024-01-01'::DATE AS transaction_date - UNION ALL - SELECT - 2 AS transaction_id, - 'TXN002' AS transaction_code, - 150.00 AS amount, - '2024-01-02'::DATE AS transaction_date - """), - ) - - config = Config( - model_defaults=ModelDefaultsConfig( - dialect="duckdb", - audits=[ - "not_null(columns := [transaction_id, transaction_code])", - "unique_values(columns := [transaction_id])", - "positive_amount(column := amount)", - ], - ) - ) - - context = Context(paths=tmp_path, config=config) - - # Create and apply plan - plan = context.plan("prod", no_prompts=True) - context.apply(plan) - - # Verify model has all default audits including custom - model = context.get_model("test.transactions") - assert len(model.audits) == 3 - - audit_names = [audit[0] for audit in model.audits] - assert "not_null" in audit_names - assert "unique_values" in audit_names - assert "positive_amount" in audit_names - - # Verify custom audit arguments - for audit_name, audit_args in model.audits: - if audit_name == "positive_amount": - assert "column" in audit_args - assert audit_args["column"].name == "amount" - - -def test_incremental_by_time_model_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - assert updated_df["new_column"].dropna().tolist() == [3] - - with time_machine.travel("2023-01-11 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - CAST(4 AS STRING) as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 3 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - # The destructive change was ignored but this change is coercable and therefore we still return ints - assert updated_df["new_column"].dropna().tolist() == [3, 4] - - with time_machine.travel("2023-01-12 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - CAST(5 AS STRING) as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - # Make the change compatible since that means we will attempt and alter now that is considered additive - context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { - exp.DataType.build("INT"): {exp.DataType.build("STRING")} - } - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 4 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - # The change is now reflected since an additive alter could be performed - assert updated_df["new_column"].dropna().tolist() == ["3", "4", "5"] - - context.close() - - -def test_incremental_by_time_model_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column to the source table - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("ALTER TABLE source_table ADD COLUMN new_column INT") - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is removed since destructive is allowed - assert "name" not in updated_df.columns - # new_column is not added since additive is ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was applied - assert "name" not in updated_df.columns - # new_column is still not added since additive is ignored - assert "new_column" not in updated_df.columns - - with time_machine.travel("2023-01-11 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - CAST(1 AS STRING) as id, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { - exp.DataType.build("INT"): {exp.DataType.build("STRING")} - } - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 3 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is still not added since additive is ignored - assert "new_column" not in updated_df.columns - # The additive change was ignored since we set the change as compatible therefore - # instead of getting strings in the result we still return ints - assert updated_df["id"].tolist() == [1, 1, 1] - - with time_machine.travel("2023-01-12 00:00:00 UTC"): - updated_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change allow - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - CAST(1 AS STRING) as id, - 'other' as other_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(updated_model) - - context = Context(paths=[tmp_path], config=config) - # Make the change compatible since that means we will attempt and alter now that is considered additive - context.engine_adapter.SCHEMA_DIFFER_KWARGS["compatible_types"] = { - exp.DataType.build("INT"): {exp.DataType.build("STRING")} - } - context.plan("prod", auto_apply=True, no_prompts=True, run=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 4 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is now added since it is additive is now allowed - assert "new_column" in updated_df.columns - # The change is now reflected since an additive alter could be performed - assert updated_df["id"].dropna().tolist() == ["1", "1", "1", "1"] - - context.close() - - -def test_incremental_by_unique_key_model_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_incremental_by_unique_key_model_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_UNIQUE_KEY ( - unique_key id, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_incremental_unmanaged_model_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_incremental_unmanaged_model_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_UNMANAGED( - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 2 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_scd_type_2_by_time_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_dt as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_dt as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_scd_type_2_by_time_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_dt as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_TIME ( - unique_key id, - updated_at_name ds, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_dt as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_scd_type_2_by_column_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [name], - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [new_column], - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_scd_type_2_by_column_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [stable], - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - 'stable' as stable, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind SCD_TYPE_2_BY_COLUMN ( - unique_key id, - columns [stable], - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'stable2' as stable, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was ignored - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_incremental_partition_ignore_destructive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - - context.close() - - -def test_incremental_partition_ignore_additive_change(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change allow, - on_additive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 'test_name' as name, - @start_ds as ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("CREATE TABLE source_table (source_id INT)") - context.engine_adapter.execute("INSERT INTO source_table VALUES (1)") - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_PARTITION ( - on_destructive_change allow, - on_additive_change ignore - ), - partitioned_by [ds], - start '2023-01-01', - cron '@daily' - ); - - SELECT - *, - 1 as id, - 3 as new_column, - @start_ds as ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True) - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - - assert len(updated_df) == 1 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.run() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "source_id" in initial_df.columns - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not still in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - - context.close() - - -def test_incremental_by_time_model_ignore_destructive_change_unit_test(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - test_dir = tmp_path / "tests" - test_dir.mkdir() - test_filepath = test_dir / "test_test_model.yaml" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - name, - ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - initial_test = f""" - -test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - name: 'test_name' - ds: '2025-01-01' - outputs: - query: - - id: 1 - name: 'test_name' - ds: '2025-01-01' -""" - - # Write initial test - test_filepath.write_text(initial_test) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute( - "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" - ) - context.engine_adapter.execute( - "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" - ) - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - new_column, - ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - updated_test = f""" - - test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - new_column: 3 - ds: '2025-01-01' - outputs: - query: - - id: 1 - new_column: 3 - ds: '2025-01-01' - """ - - # Write initial test - test_filepath.write_text(updated_test) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 1 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") - context.run() - test_result = context.test() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still in table since destructive was ignored - assert "name" in updated_df.columns - # new_column is added since it is additive and allowed - assert "new_column" in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - -def test_incremental_by_time_model_ignore_additive_change_unit_test(tmp_path: Path): - models_dir = tmp_path / "models" - models_dir.mkdir() - data_dir = tmp_path / "data" - data_dir.mkdir() - data_filepath = data_dir / "test.duckdb" - test_dir = tmp_path / "tests" - test_dir.mkdir() - test_filepath = test_dir / "test_test_model.yaml" - - config = Config( - model_defaults=ModelDefaultsConfig(dialect="duckdb"), - default_connection=DuckDBConnectionConfig(database=str(data_filepath)), - ) - - # Initial model with 3 columns - initial_model = f""" - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - name, - ds - FROM - source_table; - """ - - # Write initial model - (models_dir / "test_model.sql").write_text(initial_model) - - initial_test = f""" - -test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - name: 'test_name' - ds: '2025-01-01' - outputs: - query: - - id: 1 - name: 'test_name' - ds: '2025-01-01' -""" - - # Write initial test - test_filepath.write_text(initial_test) - - with time_machine.travel("2023-01-08 00:00:00 UTC"): - # Create context and apply initial model - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute( - "CREATE TABLE source_table (id INT, name STRING, new_column INT, ds STRING)" - ) - context.engine_adapter.execute( - "INSERT INTO source_table VALUES (1, 'test_name', NULL, '2023-01-01')" - ) - - # Apply initial plan and load data - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify initial data was loaded - initial_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(initial_df) == 1 - assert "id" in initial_df.columns - assert "name" in initial_df.columns - assert "ds" in initial_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - # remove `name` column and add new column - initial_model = """ - MODEL ( - name test_model, - kind INCREMENTAL_BY_TIME_RANGE ( - time_column ds, - forward_only true, - on_destructive_change allow, - on_additive_change ignore - ), - start '2023-01-01', - cron '@daily' - ); - - SELECT - id, - new_column, - ds - FROM - source_table; - """ - (models_dir / "test_model.sql").write_text(initial_model) - - # `new_column` is in the output since unit tests are based on the model definition that currently - # exists and doesn't take into account the historical changes to the table. Therefore `new_column` is - # not actually in the table but it is represented in the test - updated_test = f""" - test_test_model: - model: test_model - inputs: - source_table: - - id: 1 - new_column: 3 - ds: '2025-01-01' - outputs: - query: - - id: 1 - new_column: 3 - ds: '2025-01-01' - """ - - # Write initial test - test_filepath.write_text(updated_test) - - context = Context(paths=[tmp_path], config=config) - context.plan("prod", auto_apply=True, no_prompts=True, skip_tests=True) - test_result = context.test() - - # Verify data loading continued to work - # The existing data should still be there and new data should be loaded - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 1 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is not in table since destructive was ignored - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() - - with time_machine.travel("2023-01-10 00:00:00 UTC"): - context = Context(paths=[tmp_path], config=config) - context.engine_adapter.execute("INSERT INTO source_table VALUES (2, NULL, 3, '2023-01-09')") - context.run() - test_result = context.test() - updated_df = context.fetchdf('SELECT * FROM "default"."test_model"') - assert len(updated_df) == 2 - assert "id" in updated_df.columns - assert "ds" in updated_df.columns - # name is still not in table since destructive was allowed - assert "name" not in updated_df.columns - # new_column is not added since it is additive and ignored - assert "new_column" not in updated_df.columns - assert len(test_result.successes) == 1 - assert test_result.testsRun == len(test_result.successes) - - context.close() diff --git a/tests/core/test_janitor.py b/tests/core/test_janitor.py new file mode 100644 index 0000000000..e5e209f2cc --- /dev/null +++ b/tests/core/test_janitor.py @@ -0,0 +1,282 @@ +import typing as t +from unittest.mock import call + +import pytest +from pytest_mock.plugin import MockerFixture + +from sqlmesh.core.config import EnvironmentSuffixTarget +from sqlmesh.core import constants as c +from sqlmesh.core.dialect import parse_one, schema_ +from sqlmesh.core.engine_adapter import create_engine_adapter +from sqlmesh.core.environment import Environment +from sqlmesh.core.model import ( + ModelKindName, + SqlModel, +) +from sqlmesh.core.model.definition import ExternalModel +from sqlmesh.core.snapshot import ( + SnapshotChangeCategory, +) +from sqlmesh.core.state_sync import ( + EngineAdapterStateSync, +) +from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots +from sqlmesh.utils.date import now_timestamp +from sqlmesh.utils.errors import SQLMeshError + +pytestmark = pytest.mark.slow + + +@pytest.fixture +def state_sync(duck_conn, tmp_path): + state_sync = EngineAdapterStateSync( + create_engine_adapter(lambda: duck_conn, "duckdb"), + schema=c.SQLMESH, + cache_dir=tmp_path / c.CACHE, + ) + state_sync.migrate() + return state_sync + + +def test_cleanup_expired_views(mocker: MockerFixture, make_snapshot: t.Callable): + adapter = mocker.MagicMock() + adapter.dialect = None + snapshot_a = make_snapshot(SqlModel(name="catalog.schema.a", query=parse_one("select 1, ds"))) + snapshot_a.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot_b = make_snapshot(SqlModel(name="catalog.schema.b", query=parse_one("select 1, ds"))) + snapshot_b.categorize_as(SnapshotChangeCategory.BREAKING) + # Make sure that we don't drop schemas from external models + snapshot_external_model = make_snapshot( + ExternalModel(name="catalog.external_schema.external_table", kind=ModelKindName.EXTERNAL) + ) + snapshot_external_model.categorize_as(SnapshotChangeCategory.BREAKING) + schema_environment = Environment( + name="test_environment", + suffix_target=EnvironmentSuffixTarget.SCHEMA, + snapshots=[ + snapshot_a.table_info, + snapshot_b.table_info, + snapshot_external_model.table_info, + ], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="test_plan_id", + previous_plan_id="test_plan_id", + catalog_name_override="catalog_override", + ) + snapshot_c = make_snapshot(SqlModel(name="catalog.schema.c", query=parse_one("select 1, ds"))) + snapshot_c.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot_d = make_snapshot(SqlModel(name="catalog.schema.d", query=parse_one("select 1, ds"))) + snapshot_d.categorize_as(SnapshotChangeCategory.BREAKING) + table_environment = Environment( + name="test_environment", + suffix_target=EnvironmentSuffixTarget.TABLE, + snapshots=[ + snapshot_c.table_info, + snapshot_d.table_info, + snapshot_external_model.table_info, + ], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="test_plan_id", + previous_plan_id="test_plan_id", + catalog_name_override="catalog_override", + ) + cleanup_expired_views(adapter, {}, [schema_environment, table_environment]) + assert adapter.drop_schema.called + assert adapter.drop_view.called + assert adapter.drop_schema.call_args_list == [ + call( + schema_("schema__test_environment", "catalog_override"), + ignore_if_not_exists=True, + cascade=True, + ) + ] + assert sorted(adapter.drop_view.call_args_list) == [ + call("catalog_override.schema.c__test_environment", ignore_if_not_exists=True), + call("catalog_override.schema.d__test_environment", ignore_if_not_exists=True), + ] + + +@pytest.mark.parametrize( + "suffix_target", [EnvironmentSuffixTarget.SCHEMA, EnvironmentSuffixTarget.TABLE] +) +def test_cleanup_expired_environment_schema_warn_on_delete_failure( + mocker: MockerFixture, make_snapshot: t.Callable, suffix_target: EnvironmentSuffixTarget +): + adapter = mocker.MagicMock() + adapter.dialect = None + adapter.drop_schema.side_effect = Exception("Failed to drop the schema") + adapter.drop_view.side_effect = Exception("Failed to drop the view") + + snapshot = make_snapshot( + SqlModel(name="test_catalog.test_schema.test_model", query=parse_one("select 1, ds")) + ) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + schema_environment = Environment( + name="test_environment", + suffix_target=suffix_target, + snapshots=[snapshot.table_info], + start_at="2022-01-01", + end_at="2022-01-01", + plan_id="test_plan_id", + previous_plan_id="test_plan_id", + catalog_name_override="catalog_override", + ) + + with pytest.raises(SQLMeshError, match="Failed to drop the expired environment .*"): + cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=False) + + cleanup_expired_views(adapter, {}, [schema_environment], warn_on_delete_failure=True) + + if suffix_target == EnvironmentSuffixTarget.SCHEMA: + assert adapter.drop_schema.called + else: + assert adapter.drop_view.called + + +def test_delete_expired_snapshots_common_function_batching( + state_sync: EngineAdapterStateSync, make_snapshot: t.Callable, mocker: MockerFixture +): + """Test that the common delete_expired_snapshots function properly pages through batches and deletes them.""" + from sqlmesh.core.state_sync.common import ExpiredBatchRange, RowBoundary, LimitBoundary + from unittest.mock import MagicMock + + now_ts = now_timestamp() + + # Create 5 expired snapshots with different timestamps + snapshots = [] + for idx in range(5): + snapshot = make_snapshot( + SqlModel( + name=f"model_{idx}", + query=parse_one("select 1 as a, ds"), + ), + ) + snapshot.ttl = "in 10 seconds" + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot.updated_ts = now_ts - (20000 + idx * 1000) + snapshots.append(snapshot) + + state_sync.push_snapshots(snapshots) + + # Spy on get_expired_snapshots and delete_expired_snapshots methods + get_expired_spy = mocker.spy(state_sync, "get_expired_snapshots") + delete_expired_spy = mocker.spy(state_sync, "delete_expired_snapshots") + + # Mock snapshot evaluator + mock_evaluator = MagicMock() + mock_evaluator.cleanup = MagicMock() + + # Run delete_expired_snapshots with batch_size=2 + delete_expired_snapshots( + state_sync, + mock_evaluator, + current_ts=now_ts, + batch_size=2, + ) + + # Verify get_expired_snapshots was called the correct number of times: + # - 3 batches (2+2+1): each batch triggers 2 calls (one from iter_expired_snapshot_batches, one from delete_expired_snapshots) + # - Plus 1 final call that returns empty to exit the loop + # Total: 3 * 2 + 1 = 7 calls + assert get_expired_spy.call_count == 7 + + # Verify the progression of batch_range calls from the iter_expired_snapshot_batches loop + # (calls at indices 0, 2, 4, 6 are from iter_expired_snapshot_batches) + # (calls at indices 1, 3, 5 are from delete_expired_snapshots in facade.py) + calls = get_expired_spy.call_args_list + + # First call from iterator should have a batch_range starting from the beginning + first_call_kwargs = calls[0][1] + assert "batch_range" in first_call_kwargs + first_range = first_call_kwargs["batch_range"] + assert isinstance(first_range, ExpiredBatchRange) + assert isinstance(first_range.start, RowBoundary) + assert isinstance(first_range.end, LimitBoundary) + assert first_range.end.batch_size == 2 + assert first_range.start.updated_ts == 0 + assert first_range.start.name == "" + assert first_range.start.identifier == "" + + # Third call (second batch from iterator) should have a batch_range from the first batch's range + third_call_kwargs = calls[2][1] + assert "batch_range" in third_call_kwargs + second_range = third_call_kwargs["batch_range"] + assert isinstance(second_range, ExpiredBatchRange) + assert isinstance(second_range.start, RowBoundary) + assert isinstance(second_range.end, LimitBoundary) + assert second_range.end.batch_size == 2 + # Should have progressed from the first batch + assert second_range.start.updated_ts > 0 + assert second_range.start.name == '"model_3"' + + # Fifth call (third batch from iterator) should have a batch_range from the second batch's range + fifth_call_kwargs = calls[4][1] + assert "batch_range" in fifth_call_kwargs + third_range = fifth_call_kwargs["batch_range"] + assert isinstance(third_range, ExpiredBatchRange) + assert isinstance(third_range.start, RowBoundary) + assert isinstance(third_range.end, LimitBoundary) + assert third_range.end.batch_size == 2 + # Should have progressed from the second batch + assert third_range.start.updated_ts >= second_range.start.updated_ts + assert third_range.start.name == '"model_1"' + + # Seventh call (final call from iterator) should have a batch_range from the third batch's range + seventh_call_kwargs = calls[6][1] + assert "batch_range" in seventh_call_kwargs + fourth_range = seventh_call_kwargs["batch_range"] + assert isinstance(fourth_range, ExpiredBatchRange) + assert isinstance(fourth_range.start, RowBoundary) + assert isinstance(fourth_range.end, LimitBoundary) + assert fourth_range.end.batch_size == 2 + # Should have progressed from the third batch + assert fourth_range.start.updated_ts >= third_range.start.updated_ts + assert fourth_range.start.name == '"model_0"' + + # Verify delete_expired_snapshots was called 3 times (once per batch) + assert delete_expired_spy.call_count == 3 + + # Verify each delete call used a batch_range + delete_calls = delete_expired_spy.call_args_list + + # First call should have a batch_range matching the first batch + first_delete_kwargs = delete_calls[0][1] + assert "batch_range" in first_delete_kwargs + first_delete_range = first_delete_kwargs["batch_range"] + assert isinstance(first_delete_range, ExpiredBatchRange) + assert isinstance(first_delete_range.start, RowBoundary) + assert first_delete_range.start.updated_ts == 0 + assert isinstance(first_delete_range.end, RowBoundary) + assert first_delete_range.end.updated_ts == second_range.start.updated_ts + assert first_delete_range.end.name == second_range.start.name + assert first_delete_range.end.identifier == second_range.start.identifier + + second_delete_kwargs = delete_calls[1][1] + assert "batch_range" in second_delete_kwargs + second_delete_range = second_delete_kwargs["batch_range"] + assert isinstance(second_delete_range, ExpiredBatchRange) + assert isinstance(second_delete_range.start, RowBoundary) + assert second_delete_range.start.updated_ts == 0 + assert isinstance(second_delete_range.end, RowBoundary) + assert second_delete_range.end.updated_ts == third_range.start.updated_ts + assert second_delete_range.end.name == third_range.start.name + assert second_delete_range.end.identifier == third_range.start.identifier + + third_delete_kwargs = delete_calls[2][1] + assert "batch_range" in third_delete_kwargs + third_delete_range = third_delete_kwargs["batch_range"] + assert isinstance(third_delete_range, ExpiredBatchRange) + assert isinstance(third_delete_range.start, RowBoundary) + assert third_delete_range.start.updated_ts == 0 + assert isinstance(third_delete_range.end, RowBoundary) + assert third_delete_range.end.updated_ts == fourth_range.start.updated_ts + assert third_delete_range.end.name == fourth_range.start.name + assert third_delete_range.end.identifier == fourth_range.start.identifier + # Verify the cleanup method was called for each batch that had cleanup tasks + assert mock_evaluator.cleanup.call_count >= 1 + + # Verify all snapshots were deleted in the end + remaining = state_sync.get_snapshots(snapshots) + assert len(remaining) == 0 diff --git a/tests/core/test_macros.py b/tests/core/test_macros.py index 77d8fb84ae..e37a7ec05b 100644 --- a/tests/core/test_macros.py +++ b/tests/core/test_macros.py @@ -98,7 +98,7 @@ def test_select_macro(evaluator): @macro() def test_literal_type(evaluator, a: t.Literal["test_literal_a", "test_literal_b", 1, True]): - if isinstance(a, exp.Expression): + if isinstance(a, exp.Expr): raise SQLMeshError("Coercion failed") return f"'{a}'" @@ -292,6 +292,16 @@ def test_ast_correctness(macro_evaluator): "SELECT 'a' + a_z + 'c' + c_a, 'b' + b_z + 'c' + c_b", {"y": "c"}, ), + ( + """select @each(['a'], x -> @X)""", + "SELECT 'a'", + {}, + ), + ( + """select @each(['a'], X -> @x)""", + "SELECT 'a'", + {}, + ), ( '"is_@{x}"', '"is_b"', @@ -684,8 +694,8 @@ def test_macro_coercion(macro_evaluator: MacroEvaluator, assert_exp_eq): ) == (1, "2", (3.0,)) # Using exp.Expression will always return the input expression - assert coerce(parse_one("order", into=exp.Column), exp.Expression) == exp.column("order") - assert coerce(exp.Literal.string("OK"), exp.Expression) == exp.Literal.string("OK") + assert coerce(parse_one("order", into=exp.Column), exp.Expr) == exp.column("order") + assert coerce(exp.Literal.string("OK"), exp.Expr) == exp.Literal.string("OK") # Strict flag allows raising errors and is used when recursively coercing expressions # otherwise, in general, we want to be lenient and just warn the user when something is not possible @@ -920,12 +930,10 @@ def test_date_spine(assert_exp_eq, dialect, date_part): FLATTEN( INPUT => ARRAY_GENERATE_RANGE( 0, - ( - DATEDIFF( - {date_part.upper()}, - CAST('2022-01-01' AS DATE), - CAST('2024-12-31' AS DATE) - ) + 1 - 1 + DATEDIFF( + {date_part.upper()}, + CAST('2022-01-01' AS DATE), + CAST('2024-12-31' AS DATE) ) + 1 ) ) @@ -1112,7 +1120,9 @@ def test_macro_with_spaces(): for sql, expected in ( ("@x", '"a b"'), + ("@X", '"a b"'), ("@{x}", '"a b"'), + ("@{X}", '"a b"'), ("a_@x", '"a_a b"'), ("a.@x", 'a."a b"'), ("@y", "'a b'"), @@ -1121,6 +1131,7 @@ def test_macro_with_spaces(): ("a.@{y}", 'a."a b"'), ("@z", 'a."b c"'), ("d.@z", 'd.a."b c"'), + ("@'test_@{X}_suffix'", "'test_a b_suffix'"), ): assert evaluator.transform(parse_one(sql)).sql() == expected diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 1511e37c53..9bdc976b56 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -1,6 +1,7 @@ # ruff: noqa: F811 import json import typing as t +import re from datetime import date, datetime from pathlib import Path from unittest.mock import patch, PropertyMock @@ -14,7 +15,7 @@ from sqlglot.schema import MappingSchema from sqlmesh.cli.project_init import init_example_project, ProjectTemplate from sqlmesh.core.environment import EnvironmentNamingInfo -from sqlmesh.core.model.kind import TimeColumn, ModelKindName +from sqlmesh.core.model.kind import TimeColumn, ModelKindName, SeedKind from sqlmesh import CustomMaterialization, CustomKind from pydantic import model_validator, ValidationError @@ -36,6 +37,7 @@ from sqlmesh.core.dialect import parse from sqlmesh.core.engine_adapter.base import MERGE_SOURCE_ALIAS, MERGE_TARGET_ALIAS from sqlmesh.core.engine_adapter.duckdb import DuckDBEngineAdapter +from sqlmesh.core.engine_adapter.shared import DataObjectType from sqlmesh.core.macros import MacroEvaluator, macro from sqlmesh.core.model import ( CustomKind, @@ -51,6 +53,8 @@ TimeColumn, ExternalKind, ViewKind, + EmbeddedKind, + SCDType2ByTimeKind, create_external_model, create_seed_model, create_sql_model, @@ -59,9 +63,9 @@ model, ) from sqlmesh.core.model.common import parse_expression -from sqlmesh.core.model.kind import ModelKindName, _model_kind_validator +from sqlmesh.core.model.kind import _ModelKind, ModelKindName, _model_kind_validator from sqlmesh.core.model.seed import CsvSettings -from sqlmesh.core.node import IntervalUnit, _Node +from sqlmesh.core.node import IntervalUnit, _Node, DbtNodeInfo from sqlmesh.core.signal import signal from sqlmesh.core.snapshot import Snapshot, SnapshotChangeCategory from sqlmesh.utils.date import TimeLike, to_datetime, to_ds, to_timestamp @@ -460,10 +464,10 @@ def test_model_qualification(tmp_path: Path): ctx.upsert_model(load_sql_based_model(expressions)) ctx.plan_builder("dev") - assert ( - """Column '"a"' could not be resolved for model '"db"."table"', the column may not exist or is ambiguous.""" - in mock_logger.call_args[0][0] - ) + warning_msg = mock_logger.call_args[0][0] + assert "ambiguousorinvalidcolumn:" in warning_msg + assert "could not be resolved" in warning_msg + assert "db.table" in warning_msg @use_terminal_console @@ -915,7 +919,7 @@ def test_json_serde(): assert ( SqlModel.parse_obj(model_json_parsed).render_query().sql("duckdb") - == 'SELECT REGEXP_MATCHES("x", "y") AS "c"' + == 'SELECT REGEXP_FULL_MATCH("x", "y") AS "c"' ) @@ -945,7 +949,7 @@ def test_scd_type_2_by_col_serde(): model_json_parsed = json.loads(model.json()) assert model_json_parsed["kind"]["dialect"] == "bigquery" assert model_json_parsed["kind"]["unique_key"] == ["`a`"] - assert model_json_parsed["kind"]["columns"] == "*" + assert model_json_parsed["kind"]["columns"] == ["*"] # Bigquery converts TIMESTAMP -> DATETIME assert model_json_parsed["kind"]["time_data_type"] == "DATETIME" @@ -1922,7 +1926,8 @@ def test_render_definition_with_defaults(): kind VIEW ( materialized FALSE ), - virtual_environment_mode 'full' + virtual_environment_mode 'full', + grants_target_layer 'virtual' ); {query} @@ -1935,6 +1940,90 @@ def test_render_definition_with_defaults(): ) == d.format_model_expressions(expected_expressions) +def test_render_definition_with_grants(): + from sqlmesh.core.model.meta import GrantsTargetLayer + + expressions = d.parse( + """ + MODEL ( + name test.grants_model, + kind FULL, + grants ( + 'select' = ['user1', 'user2'], + 'insert' = ['admin'], + 'roles/bigquery.dataViewer' = ['user:data_eng@mycompany.com'] + ), + grants_target_layer all, + ); + SELECT 1 as id + """ + ) + model = load_sql_based_model(expressions) + assert model.grants_target_layer == GrantsTargetLayer.ALL + assert model.grants == { + "select": ["user1", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": ["user:data_eng@mycompany.com"], + } + + rendered = model.render_definition(include_defaults=True) + rendered_text = d.format_model_expressions(rendered) + assert "grants_target_layer 'all'" in rendered_text + assert re.search( + r"grants\s*\(" + r"\s*'select'\s*=\s*ARRAY\('user1',\s*'user2'\)," + r"\s*'insert'\s*=\s*ARRAY\('admin'\)," + r"\s*'roles/bigquery.dataViewer'\s*=\s*ARRAY\('user:data_eng@mycompany.com'\)" + r"\s*\)", + rendered_text, + ) + + model_with_grants = create_sql_model( + name="test_grants_programmatic", + query=d.parse_one("SELECT 1 as id"), + grants={"select": ["user1", "user2"], "insert": ["admin"]}, + grants_target_layer=GrantsTargetLayer.ALL, + ) + assert model_with_grants.grants == {"select": ["user1", "user2"], "insert": ["admin"]} + assert model_with_grants.grants_target_layer == GrantsTargetLayer.ALL + rendered_text = d.format_model_expressions( + model_with_grants.render_definition(include_defaults=True) + ) + assert "grants_target_layer 'all'" in rendered_text + assert re.search( + r"grants\s*\(" + r"\s*'select'\s*=\s*ARRAY\('user1',\s*'user2'\)," + r"\s*'insert'\s*=\s*ARRAY\('admin'\)" + r"\s*\)", + rendered_text, + ) + + virtual_expressions = d.parse( + """ + MODEL ( + name test.virtual_grants_model, + kind FULL, + grants_target_layer virtual + ); + SELECT 1 as id + """ + ) + virtual_model = load_sql_based_model(virtual_expressions) + assert virtual_model.grants_target_layer == GrantsTargetLayer.VIRTUAL + + default_expressions = d.parse( + """ + MODEL ( + name test.default_grants_model, + kind FULL + ); + SELECT 1 as id + """ + ) + default_model = load_sql_based_model(default_expressions) + assert default_model.grants_target_layer == GrantsTargetLayer.VIRTUAL # default value + + def test_render_definition_partitioned_by(): # no parenthesis in definition, no parenthesis when rendered model = load_sql_based_model( @@ -2100,6 +2189,33 @@ def test_render_definition_with_virtual_update_statements(): ) +def test_render_definition_dbt_node_info(): + node_info = DbtNodeInfo(unique_id="model.db.table", name="table", fqn="db.table") + model = load_sql_based_model( + d.parse( + f""" + MODEL ( + name db.table, + kind FULL + ); + + select 1 as a; + """ + ), + dbt_node_info=node_info, + ) + + assert model.dbt_node_info + assert ( + model.render_definition()[0].sql(pretty=True) + == """MODEL ( + name db.table, + dbt_node_info (fqn := 'db.table', name := 'table', unique_id := 'model.db.table'), + kind FULL +)""" + ) + + def test_cron(): daily = _Node(name="x", cron="@daily") assert to_datetime(daily.cron_prev("2020-01-01")) == to_datetime("2019-12-31") @@ -2611,6 +2727,156 @@ def test_parse(assert_exp_eq): ) +def test_dialect_pattern(): + def make_test_sql(text: str) -> str: + return f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE( + time_column ds + ), + {text} + ); + + SELECT 1; + """ + + def assert_match(test_sql: str, expected_value: t.Optional[str] = "duckdb"): + match = d.DIALECT_PATTERN.search(test_sql) + + dialect_str: t.Optional[str] = None + if expected_value is not None: + assert match + dialect_str = match.group("dialect") + + assert dialect_str == expected_value + + # single-quoted dialect + assert_match( + make_test_sql( + """ + dialect 'duckdb', + description 'there's a dialect foo in here too!' + """ + ) + ) + + # bare dialect + assert_match( + make_test_sql( + """ + dialect duckdb, + description 'there's a dialect foo in here too!' + """ + ) + ) + + # double-quoted dialect (allowed in BQ) + assert_match( + make_test_sql( + """ + dialect "duckdb", + description 'there's a dialect foo in here too!' + """ + ) + ) + + # no dialect specified, "dialect" in description + test_sql = make_test_sql( + """ + description 'there's a dialect foo in here too!' + """ + ) + + matches = list(d.DIALECT_PATTERN.finditer(test_sql)) + assert not matches + + # line comment between properties + assert_match( + make_test_sql( + """ + tag my_tag, -- comment + dialect duckdb + """ + ) + ) + + # block comment between properties + assert_match( + make_test_sql( + """ + tag my_tag, /* comment */ + dialect duckdb + """ + ) + ) + + # quoted empty dialect + assert_match( + make_test_sql( + """ + dialect '', + tag my_tag + """ + ), + None, + ) + + # double-quoted empty dialect + assert_match( + make_test_sql( + """ + dialect "", + tag my_tag + """ + ), + None, + ) + + # trailing comment after dialect value + assert_match( + make_test_sql( + """ + dialect duckdb -- trailing comment + """ + ) + ) + + # dialect value isn't terminated by ',' or ')' + test_sql = make_test_sql( + """ + dialect duckdb -- trailing comment + tag my_tag + """ + ) + + matches = list(d.DIALECT_PATTERN.finditer(test_sql)) + assert not matches + + # dialect first + assert_match( + """ + MODEL( + dialect duckdb, + name my_name + ); + """ + ) + + # full parse + sql = """ + MODEL ( + name test_model, + description 'this text mentions dialect foo but is not a property' + ); + + SELECT 1; + """ + expressions = d.parse(sql, default_dialect="duckdb") + model = load_sql_based_model(expressions) + assert model.dialect == "" + + CONST = "bar" @@ -2898,7 +3164,15 @@ def my_model_2(context): # no warning with valid kind dict with patch.object(get_console(), "log_warning") as mock_logger: - @model("kind_valid_dict", kind=dict(name=ModelKindName.FULL), columns={'"COL"': "int"}) + @model( + "kind_valid_dict", + kind=dict( + name=ModelKindName.INCREMENTAL_BY_TIME_RANGE, + time_column="ds", + auto_restatement_cron="@hourly", + ), + columns={'"ds"': "date", '"COL"': "int"}, + ) def my_model(context): pass @@ -2907,11 +3181,33 @@ def my_model(context): path=Path("."), ) - assert isinstance(python_model.kind, FullKind) + assert isinstance(python_model.kind, IncrementalByTimeRangeKind) assert not mock_logger.call_args +def test_python_model_decorator_auto_restatement_cron() -> None: + @model( + "auto_restatement_model", + cron="@daily", + kind=dict( + name=ModelKindName.INCREMENTAL_BY_TIME_RANGE, + time_column="ds", + auto_restatement_cron="@hourly", + ), + columns={'"ds"': "date", '"COL"': "int"}, + ) + def my_model(context): + pass + + python_model = model.get_registry()["auto_restatement_model"].model( + module_path=Path("."), + path=Path("."), + ) + + assert python_model.auto_restatement_cron == "@hourly" + + def test_python_model_decorator_col_descriptions() -> None: # `columns` and `column_descriptions` column names are different cases, but name normalization makes both lower @model("col_descriptions", columns={"col": "int"}, column_descriptions={"COL": "a column"}) @@ -3354,7 +3650,7 @@ def test_model_ctas_query(): assert ( load_sql_based_model(expressions, dialect="bigquery").ctas_query().sql() - == 'WITH RECURSIVE "a" AS (SELECT * FROM (SELECT * FROM (SELECT * FROM "x" AS "x" WHERE FALSE) AS "_q_0" WHERE FALSE) AS "_q_1" WHERE FALSE), "b" AS (SELECT * FROM "a" AS "a" WHERE FALSE UNION ALL SELECT * FROM "a" AS "a" WHERE FALSE) SELECT * FROM "b" AS "b" WHERE FALSE LIMIT 0' + == 'WITH RECURSIVE "a" AS (SELECT * FROM (SELECT * FROM (SELECT * FROM "x" AS "x" WHERE FALSE) AS "_0" WHERE FALSE) AS "_1" WHERE FALSE), "b" AS (SELECT * FROM "a" AS "a" WHERE FALSE UNION ALL SELECT * FROM "a" AS "a" WHERE FALSE) SELECT * FROM "b" AS "b" WHERE FALSE LIMIT 0' ) expressions = d.parse( @@ -3375,7 +3671,7 @@ def test_model_ctas_query(): assert ( load_sql_based_model(expressions, dialect="bigquery").ctas_query().sql() - == 'WITH RECURSIVE "a" AS (WITH "nested_a" AS (SELECT * FROM (SELECT * FROM (SELECT * FROM "x" AS "x" WHERE FALSE) AS "_q_0" WHERE FALSE) AS "_q_1" WHERE FALSE) SELECT * FROM "nested_a" AS "nested_a" WHERE FALSE), "b" AS (SELECT * FROM "a" AS "a" WHERE FALSE UNION ALL SELECT * FROM "a" AS "a" WHERE FALSE) SELECT * FROM "b" AS "b" WHERE FALSE LIMIT 0' + == 'WITH RECURSIVE "a" AS (WITH "nested_a" AS (SELECT * FROM (SELECT * FROM (SELECT * FROM "x" AS "x" WHERE FALSE) AS "_0" WHERE FALSE) AS "_1" WHERE FALSE) SELECT * FROM "nested_a" AS "nested_a" WHERE FALSE), "b" AS (SELECT * FROM "a" AS "a" WHERE FALSE UNION ALL SELECT * FROM "a" AS "a" WHERE FALSE) SELECT * FROM "b" AS "b" WHERE FALSE LIMIT 0' ) @@ -4699,7 +4995,7 @@ def test_model_session_properties(sushi_context): ) ) assert model.session_properties == { - "query_label": parse_one("[('key1', 'value1'), ('key2', 'value2')]") + "query_label": parse_one("[('key1', 'value1'), ('key2', 'value2')]", dialect="bigquery") } model = load_sql_based_model( @@ -5281,7 +5577,7 @@ def scd_type_2_model(context, **kwargs): '["col1"]', [exp.to_column("col1", quoted=True)], ), - ("*", exp.Star()), + ("*", [exp.Star()]), ], ) def test_check_column_variants(input_columns, expected_columns): @@ -5715,7 +6011,7 @@ def test_when_matched_normalization() -> None: assert isinstance(model.kind, IncrementalByUniqueKeyKind) assert isinstance(model.kind.when_matched, exp.Whens) first_expression = model.kind.when_matched.expressions[0] - assert isinstance(first_expression, exp.Expression) + assert isinstance(first_expression, exp.Expr) assert ( first_expression.sql(dialect="snowflake") == 'WHEN MATCHED THEN UPDATE SET "__MERGE_TARGET__"."KEY_A" = "__MERGE_SOURCE__"."KEY_A", "__MERGE_TARGET__"."KEY_B" = "__MERGE_SOURCE__"."KEY_B"' @@ -5743,7 +6039,7 @@ def test_when_matched_normalization() -> None: assert isinstance(model.kind, IncrementalByUniqueKeyKind) assert isinstance(model.kind.when_matched, exp.Whens) first_expression = model.kind.when_matched.expressions[0] - assert isinstance(first_expression, exp.Expression) + assert isinstance(first_expression, exp.Expr) assert ( first_expression.sql(dialect="snowflake") == 'WHEN MATCHED THEN UPDATE SET "__MERGE_TARGET__"."kEy_A" = "__MERGE_SOURCE__"."kEy_A", "__MERGE_TARGET__"."kEY_b" = "__MERGE_SOURCE__"."KEY_B"' @@ -6151,7 +6447,7 @@ def test_end_no_start(): def test_variables(): @macro() - def test_macro_var(evaluator) -> exp.Expression: + def test_macro_var(evaluator) -> exp.Expr: return exp.convert(evaluator.var("TEST_VAR_D") + 10) expressions = parse( @@ -6650,7 +6946,7 @@ def test_unrendered_macros_sql_model(mocker: MockerFixture) -> None: # merge_filter will stay unrendered as well assert model.unique_key[0] == exp.column("a", quoted=True) assert ( - t.cast(exp.Expression, model.merge_filter).sql() + t.cast(exp.Expr, model.merge_filter).sql() == '"__MERGE_SOURCE__"."id" > 0 AND "__MERGE_TARGET__"."updated_at" < @end_ds AND "__MERGE_SOURCE__"."updated_at" > @start_ds AND @merge_filter_var' ) @@ -6853,7 +7149,7 @@ def test_gateway_macro() -> None: assert model.render_query_or_raise().sql() == "SELECT 'in_memory' AS \"gateway\"" @macro() - def macro_uses_gateway(evaluator) -> exp.Expression: + def macro_uses_gateway(evaluator) -> exp.Expr: return exp.convert(evaluator.gateway + "_from_macro") model = load_sql_based_model( @@ -7437,6 +7733,107 @@ def test_forward_only_on_destructive_change_config() -> None: assert context_model.on_destructive_change.is_allow +def test_batch_concurrency_config() -> None: + # No batch_concurrency default for incremental models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column c + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency is None + + # batch_concurrency specified in model defaults applies to incremental models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column c + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency == 5 + + # batch_concurrency specified in model definition overrides default + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column c, + batch_concurrency 10 + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency == 10 + + # batch_concurrency default does not apply to non-incremental models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind FULL, + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency is None + + # batch_concurrency default does not apply to INCREMENTAL_BY_UNIQUE_KEY models + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb", batch_concurrency=5)) + context = Context(config=config) + + expressions = d.parse( + """ + MODEL ( + name memory.db.table, + kind INCREMENTAL_BY_UNIQUE_KEY ( + unique_key a + ), + ); + SELECT a, b, c FROM source_table; + """ + ) + model = load_sql_based_model(expressions, defaults=config.model_defaults.dict()) + context.upsert_model(model) + context_model = context.get_model("memory.db.table") + assert context_model.batch_concurrency == 1 + + def test_model_meta_on_additive_change_property() -> None: """Test that ModelMeta has on_additive_change property that works like on_destructive_change.""" from sqlmesh.core.model.kind import IncrementalByTimeRangeKind, OnAdditiveChange @@ -8113,7 +8510,7 @@ def test_model_kind_to_expression(): .kind.to_expression() .sql() == """SCD_TYPE_2_BY_COLUMN ( -columns *, +columns (*), execution_time_as_valid_from FALSE, unique_key ("a", "b"), valid_from_name "valid_from", @@ -8332,7 +8729,7 @@ def test_merge_filter_macro(): def predicate( evaluator: MacroEvaluator, cluster_column: exp.Column, - ) -> exp.Expression: + ) -> exp.Expr: return parse_one(f"source.{cluster_column} > dateadd(day, -7, target.{cluster_column})") expressions = d.parse( @@ -9377,9 +9774,9 @@ def test_model_blueprinting(tmp_path: Path) -> None: model_defaults=ModelDefaultsConfig(dialect="duckdb"), ) - blueprint_sql = tmp_path / "macros" / "identity_macro.py" - blueprint_sql.parent.mkdir(parents=True, exist_ok=True) - blueprint_sql.write_text( + identity_macro = tmp_path / "macros" / "identity_macro.py" + identity_macro.parent.mkdir(parents=True, exist_ok=True) + identity_macro.write_text( """from sqlmesh import macro @macro() @@ -9507,7 +9904,7 @@ def entrypoint(evaluator): {"customer": SqlValue(sql="customer1"), "customer_field": SqlValue(sql="'bar'")} ) - assert t.cast(exp.Expression, customer1_model.render_query()).sql() == ( + assert t.cast(exp.Expr, customer1_model.render_query()).sql() == ( """SELECT 'bar' AS "foo", "bar" AS "foo2", 'bar' AS "foo3" FROM "db"."customer1"."my_source" AS "my_source\"""" ) @@ -9520,7 +9917,7 @@ def entrypoint(evaluator): {"customer": SqlValue(sql="customer2"), "customer_field": SqlValue(sql="qux")} ) - assert t.cast(exp.Expression, customer2_model.render_query()).sql() == ( + assert t.cast(exp.Expr, customer2_model.render_query()).sql() == ( '''SELECT "qux" AS "foo", "qux" AS "foo2", "qux" AS "foo3" FROM "db"."customer2"."my_source" AS "my_source"''' ) @@ -10306,12 +10703,12 @@ def m4_non_metadata_references_v6(evaluator): query_with_vars = macro_evaluator.transform( parse_one("SELECT " + ", ".join(f"@v{var}, @VAR('v{var}')" for var in [1, 2, 3, 6])) ) - assert t.cast(exp.Expression, query_with_vars).sql() == "SELECT 1, 1, 2, 2, 3, 3, 6, 6" + assert t.cast(exp.Expr, query_with_vars).sql() == "SELECT 1, 1, 2, 2, 3, 3, 6, 6" query_with_blueprint_vars = macro_evaluator.transform( parse_one("SELECT " + ", ".join(f"@v{var}, @BLUEPRINT_VAR('v{var}')" for var in [4, 5])) ) - assert t.cast(exp.Expression, query_with_blueprint_vars).sql() == "SELECT 4, 4, 5, 5" + assert t.cast(exp.Expr, query_with_blueprint_vars).sql() == "SELECT 4, 4, 5, 5" def test_variable_mentioned_in_both_metadata_and_non_metadata_macro(tmp_path: Path) -> None: @@ -11623,3 +12020,492 @@ def test_use_original_sql(): assert model.query_.sql == "SELECT 1 AS one, 2 AS two" assert model.pre_statements_[0].sql == "CREATE TABLE pre (a INT)" assert model.post_statements_[0].sql == "CREATE TABLE post (b INT)" + + +def test_case_sensitive_macro_locals(tmp_path: Path) -> None: + init_example_project(tmp_path, engine_type="duckdb", template=ProjectTemplate.EMPTY) + + db_path = str(tmp_path / "db.db") + db_connection = DuckDBConnectionConfig(database=db_path) + + config = Config( + gateways={"gw": GatewayConfig(connection=db_connection)}, + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + ) + + macro_file = tmp_path / "macros" / "some_macro_with_globals.py" + macro_file.parent.mkdir(parents=True, exist_ok=True) + macro_file.write_text( + """from sqlmesh import macro + +x = 1 +X = 2 + +@macro() +def my_macro(evaluator): + assert evaluator.locals.get("x") == 1 + assert evaluator.locals.get("X") == 2 + + return x + X +""" + ) + test_model = tmp_path / "models" / "test_model.sql" + test_model.parent.mkdir(parents=True, exist_ok=True) + test_model.write_text("MODEL (name test_model, kind FULL); SELECT @my_macro() AS c") + + context = Context(paths=tmp_path, config=config) + model = context.get_model("test_model", raise_if_missing=True) + + assert model.render_query_or_raise().sql() == 'SELECT 3 AS "c"' + + +def test_grants(): + expressions = d.parse(""" + MODEL ( + name test.table, + kind FULL, + grants ( + 'select' = ['user1', 123, admin_role, 'user2'], + 'insert' = 'admin', + 'roles/bigquery.dataViewer' = ["group:data_eng@company.com", 'user:someone@company.com'], + 'update' = 'admin' + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model(expressions) + assert model.grants == { + "select": ["user1", "123", "admin_role", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": ["group:data_eng@company.com", "user:someone@company.com"], + "update": ["admin"], + } + + model = create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind="FULL", + grants={ + "select": ["user1", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": "user:data_eng@company.com", + }, + ) + assert model.grants == { + "select": ["user1", "user2"], + "insert": ["admin"], + "roles/bigquery.dataViewer": ["user:data_eng@company.com"], + } + + +@pytest.mark.parametrize( + "kind", + [ + "FULL", + "VIEW", + SeedKind(path="test.csv"), + IncrementalByTimeRangeKind(time_column="ds"), + IncrementalByUniqueKeyKind(unique_key="id"), + ], +) +def test_grants_valid_model_kinds(kind: t.Union[str, _ModelKind]): + model = create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind=kind, + grants={"select": ["user1", "user2"], "insert": ["admin_user"]}, + ) + assert model.grants == {"select": ["user1", "user2"], "insert": ["admin_user"]} + + +@pytest.mark.parametrize( + "kind", + [ + "EXTERNAL", + "EMBEDDED", + ], +) +def test_grants_invalid_model_kind_errors(kind: str): + with pytest.raises(ValidationError, match=rf".*grants cannot be set for {kind}.*"): + create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind=kind, + grants={"select": ["user1"], "insert": ["admin_user"]}, + ) + + +def test_model_kind_supports_grants(): + assert FullKind().supports_grants is True + assert ViewKind().supports_grants is True + assert IncrementalByTimeRangeKind(time_column="ds").supports_grants is True + assert IncrementalByUniqueKeyKind(unique_key=["id"]).supports_grants is True + assert SCDType2ByTimeKind(unique_key=["id"]).supports_grants is True + + assert EmbeddedKind().supports_grants is False + assert ExternalKind().supports_grants is False + + +def test_grants_validation_no_grants(): + model = create_sql_model("db.table", parse_one("SELECT 1 AS id"), kind="FULL") + assert model.grants is None + + +def test_grants_validation_empty_grantees(): + model = create_sql_model( + "db.table", parse_one("SELECT 1 AS id"), kind="FULL", grants={"select": []} + ) + assert model.grants == {"select": []} + + +def test_grants_single_value_conversions(): + expressions = d.parse(f""" + MODEL ( + name test.nested_arrays, + kind FULL, + grants ( + 'select' = "user1", update = user2 + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model(expressions) + assert model.grants == {"select": ["user1"], "update": ["user2"]} + + model = create_sql_model( + "db.table", + parse_one("SELECT 1 AS id"), + kind="FULL", + grants={"select": "user1", "insert": 123}, + ) + assert model.grants == {"select": ["user1"], "insert": ["123"]} + + +@pytest.mark.parametrize( + "grantees", + [ + "('user1', ('user2', 'user3'), 'user4')", + "('user1', ['user2', 'user3'], user4)", + "['user1', ['user2', user3], 'user4']", + "[user1, ('user2', \"user3\"), 'user4']", + ], +) +def test_grants_array_flattening(grantees: str): + expressions = d.parse(f""" + MODEL ( + name test.nested_arrays, + kind FULL, + grants ( + 'select' = {grantees} + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model(expressions) + assert model.grants == {"select": ["user1", "user2", "user3", "user4"]} + + +def test_grants_macro_var_resolved(): + expressions = d.parse(""" + MODEL ( + name test.macro_grants, + kind FULL, + grants ( + 'select' = @VAR('readers'), + 'insert' = @VAR('writers') + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model( + expressions, variables={"readers": ["user1", "user2"], "writers": "admin"} + ) + assert model.grants == { + "select": ["user1", "user2"], + "insert": ["admin"], + } + + +def test_grants_macro_var_in_array_flattening(): + expressions = d.parse(""" + MODEL ( + name test.macro_in_array, + kind FULL, + grants ( + 'select' = ['user1', @VAR('admins'), 'user3'] + ) + ); + SELECT 1 as id + """) + + model = load_sql_based_model(expressions, variables={"admins": ["admin1", "admin2"]}) + assert model.grants == {"select": ["user1", "admin1", "admin2", "user3"]} + + model2 = load_sql_based_model(expressions, variables={"admins": "super_admin"}) + assert model2.grants == {"select": ["user1", "super_admin", "user3"]} + + +def test_grants_dynamic_permission_names(): + expressions = d.parse(""" + MODEL ( + name test.dynamic_keys, + kind FULL, + grants ( + @VAR('read_perm') = ['user1', 'user2'], + @VAR('write_perm') = ['admin'] + ) + ); + SELECT 1 as id + """) + model = load_sql_based_model( + expressions, variables={"read_perm": "select", "write_perm": "insert"} + ) + assert model.grants == {"select": ["user1", "user2"], "insert": ["admin"]} + + +def test_grants_unresolved_macro_errors(): + expressions1 = d.parse(""" + MODEL (name test.bad1, kind FULL, grants ('select' = @VAR('undefined'))); + SELECT 1 as id + """) + with pytest.raises(ConfigError, match=r"Invalid grants configuration for 'select': NULL value"): + load_sql_based_model(expressions1) + + expressions2 = d.parse(""" + MODEL (name test.bad2, kind FULL, grants (@VAR('undefined') = ['user'])); + SELECT 1 as id + """) + with pytest.raises(ConfigError, match=r"Invalid grants configuration.*NULL value"): + load_sql_based_model(expressions2) + + expressions3 = d.parse(""" + MODEL (name test.bad3, kind FULL, grants ('select' = ['user', @VAR('undefined')])); + SELECT 1 as id + """) + with pytest.raises(ConfigError, match=r"Invalid grants configuration for 'select': NULL value"): + load_sql_based_model(expressions3) + + +def test_grants_empty_values(): + model1 = create_sql_model( + "db.table", parse_one("SELECT 1 AS id"), kind="FULL", grants={"select": []} + ) + assert model1.grants == {"select": []} + + model2 = create_sql_model("db.table", parse_one("SELECT 1 AS id"), kind="FULL") + assert model2.grants is None + + +@pytest.mark.parametrize( + "kind, expected", + [ + ("VIEW", DataObjectType.VIEW), + ("FULL", DataObjectType.TABLE), + ("MANAGED", DataObjectType.MANAGED_TABLE), + (ViewKind(materialized=True), DataObjectType.MATERIALIZED_VIEW), + ], +) +def test_grants_table_type(kind: t.Union[str, _ModelKind], expected: DataObjectType): + model = create_sql_model("test_table", parse_one("SELECT 1 as id"), kind=kind) + assert model.grants_table_type == expected + + +def test_model_macro_using_locals_called_from_jinja(assert_exp_eq) -> None: + @macro() + def execution_date(evaluator): + return f"""'{evaluator.locals.get("execution_date")}'""" + + expressions = d.parse( + """ + MODEL (name db.table); + + JINJA_QUERY_BEGIN; + SELECT {{ execution_date() }} AS col; + JINJA_END; + """ + ) + model = load_sql_based_model(expressions) + assert_exp_eq(model.render_query(), '''SELECT '1970-01-01' AS "col"''') + + +def test_audits_in_embedded_model(): + expression = d.parse( + """ + MODEL ( + name test.embedded_with_audits, + kind EMBEDDED, + audits (not_null (columns := (id))) + ); + + SELECT 1 AS id, 'A' as value + """ + ) + with pytest.raises(ConfigError, match="Audits are not supported for embedded models"): + load_sql_based_model(expression).validate_definition() + + +def test_default_catalog_not_leaked_to_unsupported_gateway(): + """ + Regression test for https://github.com/SQLMesh/sqlmesh/issues/5748 + + When a model targets a gateway that is NOT in default_catalog_per_gateway, + the global default_catalog should be cleared (set to None) instead of + leaking through from the default gateway. + """ + from sqlglot import parse + + expressions = parse( + """ + MODEL ( + name my_schema.my_model, + kind FULL, + gateway clickhouse_gw, + dialect clickhouse, + ); + + SELECT 1 AS id + """, + read="clickhouse", + ) + + default_catalog_per_gateway = { + "default_gw": "example_catalog", + } + + models = load_sql_based_models( + expressions, + get_variables=lambda gw: {}, + dialect="clickhouse", + default_catalog_per_gateway=default_catalog_per_gateway, + default_catalog="example_catalog", + ) + + assert len(models) == 1 + model = models[0] + + assert not model.catalog, ( + f"Default gateway catalog leaked into catalog-unsupported gateway model. " + f"Expected no catalog, got: {model.catalog}" + ) + assert "example_catalog" not in model.fqn, ( + f"Default gateway catalog found in model FQN: {model.fqn}" + ) + + +def test_default_catalog_still_applied_to_supported_gateway(): + """ + Control test: when a model targets a gateway that IS in default_catalog_per_gateway, + the catalog should still be correctly applied. + """ + from sqlglot import parse + + expressions = parse( + """ + MODEL ( + name my_schema.my_model, + kind FULL, + gateway other_duckdb, + ); + + SELECT 1 AS id + """, + read="duckdb", + ) + + default_catalog_per_gateway = { + "default_gw": "example_catalog", + "other_duckdb": "other_db", + } + + models = load_sql_based_models( + expressions, + get_variables=lambda gw: {}, + dialect="duckdb", + default_catalog_per_gateway=default_catalog_per_gateway, + default_catalog="example_catalog", + ) + + assert len(models) == 1 + model = models[0] + + assert model.catalog == "other_db", f"Expected catalog 'other_db', got: {model.catalog}" + + +def test_no_gateway_uses_global_default_catalog(): + """ + Control test: when a model does NOT specify a gateway, the global + default_catalog should still be applied as before. + """ + from sqlglot import parse + + expressions = parse( + """ + MODEL ( + name my_schema.my_model, + kind FULL, + ); + + SELECT 1 AS id + """, + read="duckdb", + ) + + model = load_sql_based_model( + expressions, + default_catalog="example_catalog", + dialect="duckdb", + ) + + assert model.catalog == "example_catalog" + + +def test_blueprint_catalog_not_cross_contaminated(): + """ + When blueprints iterate over different gateways, the catalog from one + blueprint iteration should not leak into the next. A ClickHouse blueprint + setting default_catalog to None should not prevent the following blueprint + from getting its correct catalog. + """ + from sqlglot import parse + + expressions = parse( + """ + MODEL ( + name @{blueprint}.my_model, + kind FULL, + gateway @{gw}, + blueprints ( + (blueprint := ch_schema, gw := clickhouse_gw), + (blueprint := db_schema, gw := default_gw), + ), + ); + + SELECT 1 AS id + """, + read="duckdb", + ) + + default_catalog_per_gateway = { + "default_gw": "example_catalog", + } + + models = load_sql_based_models( + expressions, + get_variables=lambda gw: {}, + dialect="duckdb", + default_catalog_per_gateway=default_catalog_per_gateway, + default_catalog="example_catalog", + ) + + assert len(models) == 2 + + ch_model = next(m for m in models if "ch_schema" in m.fqn) + db_model = next(m for m in models if "db_schema" in m.fqn) + + assert not ch_model.catalog, ( + f"Catalog leaked into ClickHouse blueprint. Got: {ch_model.catalog}" + ) + + assert db_model.catalog == "example_catalog", ( + f"Catalog lost for DuckDB blueprint after ClickHouse iteration. Got: {db_model.catalog}" + ) diff --git a/tests/core/test_plan.py b/tests/core/test_plan.py index c9c19376d9..590cda01ec 100644 --- a/tests/core/test_plan.py +++ b/tests/core/test_plan.py @@ -26,7 +26,7 @@ SqlModel, ModelKindName, ) -from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange +from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange, ViewKind from sqlmesh.core.model.seed import Seed from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals from sqlmesh.core.snapshot import ( @@ -826,6 +826,7 @@ def test_missing_intervals_lookback(make_snapshot, mocker: MockerFixture): indirectly_modified={}, deployability_index=DeployabilityIndex.all_deployable(), restatements={}, + restate_all_snapshots=False, end_bounded=False, ensure_finalized_snapshots=False, start_override_per_model=None, @@ -1074,36 +1075,6 @@ def test_restate_missing_model(make_snapshot, mocker: MockerFixture): PlanBuilder(context_diff, restate_models=["missing"]).build() -def test_new_snapshots_with_restatements(make_snapshot, mocker: MockerFixture): - snapshot_a = make_snapshot(SqlModel(name="a", query=parse_one("select 1, ds"))) - - context_diff = ContextDiff( - environment="test_environment", - is_new_environment=True, - is_unfinalized_environment=False, - normalize_environment_name=True, - create_from="prod", - create_from_env_exists=True, - added=set(), - removed_snapshots={}, - modified_snapshots={}, - snapshots={snapshot_a.snapshot_id: snapshot_a}, - new_snapshots={snapshot_a.snapshot_id: snapshot_a}, - previous_plan_id=None, - previously_promoted_snapshot_ids=set(), - previous_finalized_snapshots=None, - previous_gateway_managed_virtual_layer=False, - gateway_managed_virtual_layer=False, - environment_statements=[], - ) - - with pytest.raises( - PlanError, - match=r"Model changes and restatements can't be a part of the same plan.*", - ): - PlanBuilder(context_diff, restate_models=["a"]).build() - - def test_end_validation(make_snapshot, mocker: MockerFixture): snapshot_a = make_snapshot( SqlModel( @@ -1218,6 +1189,66 @@ def test_forward_only_plan_seed_models(make_snapshot, mocker: MockerFixture): assert not snapshot_a_updated.is_forward_only +def test_seed_model_metadata_change_no_missing_intervals( + make_snapshot: t.Callable[..., Snapshot], +): + snapshot_a = make_snapshot( + SeedModel( + name="a", + kind=SeedKind(path="./path/to/seed"), + seed=Seed(content="content"), + column_hashes={"col": "hash1"}, + depends_on=set(), + ) + ) + snapshot_a.categorize_as(SnapshotChangeCategory.BREAKING) + snapshot_a.add_interval("2022-01-01", now()) + + snapshot_a_metadata_updated = make_snapshot( + SeedModel( + name="a", + kind=SeedKind(path="./path/to/seed"), + seed=Seed(content="content"), + column_hashes={"col": "hash1"}, + depends_on=set(), + description="foo", + ) + ) + snapshot_a_metadata_updated.previous_versions = snapshot_a.all_versions + assert snapshot_a_metadata_updated.version is None + assert snapshot_a_metadata_updated.change_category is None + + context_diff = ContextDiff( + environment="prod", + is_new_environment=True, + is_unfinalized_environment=False, + normalize_environment_name=True, + create_from="prod", + create_from_env_exists=True, + added=set(), + removed_snapshots={}, + modified_snapshots={ + snapshot_a_metadata_updated.name: (snapshot_a_metadata_updated, snapshot_a) + }, + snapshots={snapshot_a_metadata_updated.snapshot_id: snapshot_a_metadata_updated}, + new_snapshots={snapshot_a_metadata_updated.snapshot_id: snapshot_a}, + previous_plan_id=None, + previously_promoted_snapshot_ids=set(), + previous_finalized_snapshots=None, + previous_gateway_managed_virtual_layer=False, + gateway_managed_virtual_layer=False, + environment_statements=[], + ) + + plan = PlanBuilder(context_diff).build() + assert snapshot_a_metadata_updated.change_category == SnapshotChangeCategory.METADATA + assert not snapshot_a_metadata_updated.is_forward_only + assert not plan.missing_intervals # plan should have no missing intervals + assert ( + snapshot_a_metadata_updated.intervals == snapshot_a.intervals + ) # intervals should have been copied + + def test_start_inference(make_snapshot, mocker: MockerFixture): snapshot_a = make_snapshot( SqlModel(name="a", query=parse_one("select 1, ds"), start="2022-01-01") @@ -1764,7 +1795,7 @@ def test_forward_only_models_model_kind_changed(make_snapshot, mocker: MockerFix ) def test_forward_only_models_model_kind_changed_to_incremental_by_time_range( make_snapshot, - partitioned_by: t.List[exp.Expression], + partitioned_by: t.List[exp.Expr], expected_forward_only: bool, ): snapshot = make_snapshot( @@ -4131,3 +4162,143 @@ def test_plan_ignore_cron_flag(make_snapshot): ], ) ] + + +def test_indirect_change_to_materialized_view_is_breaking(make_snapshot): + snapshot_a_old = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a"), + kind=ViewKind(materialized=True), + ) + ) + snapshot_a_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_b_old = make_snapshot( + SqlModel( + name="b", + query=parse_one("select col_a from a"), + kind=ViewKind(materialized=True), + ), + nodes={'"a"': snapshot_a_old.model}, + ) + snapshot_b_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_a_new = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a, 2 as col_b"), + kind=ViewKind(materialized=True), + ) + ) + + snapshot_a_new.previous_versions = snapshot_a_old.all_versions + + snapshot_b_new = make_snapshot( + snapshot_b_old.model, + nodes={'"a"': snapshot_a_new.model}, + ) + snapshot_b_new.previous_versions = snapshot_b_old.all_versions + + context_diff = ContextDiff( + environment="test_environment", + is_new_environment=True, + is_unfinalized_environment=False, + normalize_environment_name=True, + create_from="prod", + create_from_env_exists=True, + added=set(), + removed_snapshots={}, + modified_snapshots={ + snapshot_a_new.name: (snapshot_a_new, snapshot_a_old), + snapshot_b_new.name: (snapshot_b_new, snapshot_b_old), + }, + snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + new_snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + previous_plan_id=None, + previously_promoted_snapshot_ids=set(), + previous_finalized_snapshots=None, + previous_gateway_managed_virtual_layer=False, + gateway_managed_virtual_layer=False, + environment_statements=[], + ) + + PlanBuilder(context_diff, forward_only=False).build() + + assert snapshot_b_new.change_category == SnapshotChangeCategory.INDIRECT_BREAKING + + +def test_forward_only_indirect_change_to_materialized_view(make_snapshot): + snapshot_a_old = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a"), + ) + ) + snapshot_a_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_b_old = make_snapshot( + SqlModel( + name="b", + query=parse_one("select col_a from a"), + kind=ViewKind(materialized=True), + ), + nodes={'"a"': snapshot_a_old.model}, + ) + snapshot_b_old.categorize_as(SnapshotChangeCategory.BREAKING) + + snapshot_a_new = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1 as col_a, 2 as col_b"), + ) + ) + + snapshot_a_new.previous_versions = snapshot_a_old.all_versions + + snapshot_b_new = make_snapshot( + snapshot_b_old.model, + nodes={'"a"': snapshot_a_new.model}, + ) + snapshot_b_new.previous_versions = snapshot_b_old.all_versions + + context_diff = ContextDiff( + environment="test_environment", + is_new_environment=True, + is_unfinalized_environment=False, + normalize_environment_name=True, + create_from="prod", + create_from_env_exists=True, + added=set(), + removed_snapshots={}, + modified_snapshots={ + snapshot_a_new.name: (snapshot_a_new, snapshot_a_old), + snapshot_b_new.name: (snapshot_b_new, snapshot_b_old), + }, + snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + new_snapshots={ + snapshot_a_new.snapshot_id: snapshot_a_new, + snapshot_b_new.snapshot_id: snapshot_b_new, + }, + previous_plan_id=None, + previously_promoted_snapshot_ids=set(), + previous_finalized_snapshots=None, + previous_gateway_managed_virtual_layer=False, + gateway_managed_virtual_layer=False, + environment_statements=[], + ) + + PlanBuilder(context_diff, forward_only=True).build() + + # Forward-only indirect changes to MVs should not always be classified as indirect breaking. + # Instead, we want to preserve the standard categorization. + assert snapshot_b_new.change_category == SnapshotChangeCategory.INDIRECT_NON_BREAKING diff --git a/tests/core/test_plan_stages.py b/tests/core/test_plan_stages.py index 744c7d18bf..f93a8a4780 100644 --- a/tests/core/test_plan_stages.py +++ b/tests/core/test_plan_stages.py @@ -6,6 +6,7 @@ from sqlmesh.core.config import EnvironmentSuffixTarget from sqlmesh.core.config.common import VirtualEnvironmentMode from sqlmesh.core.model import SqlModel, ModelKindName +from sqlmesh.core.plan.common import SnapshotIntervalClearRequest from sqlmesh.core.plan.definition import EvaluatablePlan from sqlmesh.core.plan.stages import ( build_plan_stages, @@ -23,11 +24,13 @@ FinalizeEnvironmentStage, UnpauseStage, ) +from sqlmesh.core.plan.explainer import ExplainableRestatementStage from sqlmesh.core.snapshot.definition import ( SnapshotChangeCategory, DeployabilityIndex, Snapshot, SnapshotId, + SnapshotIdLike, ) from sqlmesh.core.state_sync import StateReader from sqlmesh.core.environment import Environment, EnvironmentStatements @@ -103,6 +106,7 @@ def test_build_plan_stages_basic( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -212,6 +216,7 @@ def test_build_plan_stages_with_before_all_and_after_all( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -322,6 +327,7 @@ def test_build_plan_stages_select_models( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -423,6 +429,7 @@ def test_build_plan_stages_basic_no_backfill( skip_backfill=skip_backfill, empty_backfill=empty_backfill, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -499,15 +506,29 @@ def test_build_plan_stages_basic_no_backfill( assert isinstance(stages[7], FinalizeEnvironmentStage) -def test_build_plan_stages_restatement( +def test_build_plan_stages_restatement_prod_only( snapshot_a: Snapshot, snapshot_b: Snapshot, mocker: MockerFixture ) -> None: + """ + Scenario: + - Prod restatement triggered in a project with no dev environments + + Expected Outcome: + - Plan still contains a RestatementStage in case a dev environment was + created during restatement + """ + # Mock state reader to return existing snapshots and environment state_reader = mocker.Mock(spec=StateReader) state_reader.get_snapshots.return_value = { snapshot_a.snapshot_id: snapshot_a, snapshot_b.snapshot_id: snapshot_b, } + state_reader.get_snapshots_by_names.return_value = { + snapshot_a.id_and_version, + snapshot_b.id_and_version, + } + existing_environment = Environment( name="prod", snapshots=[snapshot_a.table_info, snapshot_b.table_info], @@ -518,7 +539,9 @@ def test_build_plan_stages_restatement( promoted_snapshot_ids=[snapshot_a.snapshot_id, snapshot_b.snapshot_id], finalized_ts=to_timestamp("2023-01-02"), ) + state_reader.get_environment.return_value = existing_environment + state_reader.get_environments_summary.return_value = [existing_environment.summary] environment = Environment( name="prod", @@ -543,6 +566,7 @@ def test_build_plan_stages_restatement( '"a"': (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), '"b"': (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), }, + restate_all_snapshots=True, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -577,17 +601,165 @@ def test_build_plan_stages_restatement( snapshot_b.snapshot_id, } - # Verify RestatementStage - restatement_stage = stages[1] + # Verify BackfillStage + backfill_stage = stages[1] + assert isinstance(backfill_stage, BackfillStage) + assert len(backfill_stage.snapshot_to_intervals) == 2 + assert backfill_stage.deployability_index == DeployabilityIndex.all_deployable() + expected_backfill_interval = [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))] + for intervals in backfill_stage.snapshot_to_intervals.values(): + assert intervals == expected_backfill_interval + + # Verify RestatementStage exists but is empty + restatement_stage = stages[2] assert isinstance(restatement_stage, RestatementStage) - assert len(restatement_stage.snapshot_intervals) == 2 - expected_interval = (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")) - for snapshot_info, interval in restatement_stage.snapshot_intervals.items(): - assert interval == expected_interval - assert snapshot_info.name in ('"a"', '"b"') + restatement_stage = ExplainableRestatementStage.from_restatement_stage( + restatement_stage, state_reader, plan + ) + assert not restatement_stage.snapshot_intervals_to_clear + + # Verify EnvironmentRecordUpdateStage + assert isinstance(stages[3], EnvironmentRecordUpdateStage) + + # Verify FinalizeEnvironmentStage + assert isinstance(stages[4], FinalizeEnvironmentStage) + + +def test_build_plan_stages_restatement_prod_identifies_dev_intervals( + snapshot_a: Snapshot, + snapshot_b: Snapshot, + make_snapshot: t.Callable[..., Snapshot], + mocker: MockerFixture, +) -> None: + """ + Scenario: + - Prod restatement triggered in a project with a dev environment + - The dev environment contains a different physical version of the affected model + + Expected Outcome: + - Plan contains a RestatementStage that highlights the affected dev version + """ + # Dev version of snapshot_a, same name but different version + snapshot_a_dev = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1, changed, ds"), + kind=dict(name=ModelKindName.INCREMENTAL_BY_TIME_RANGE, time_column="ds"), + ) + ) + snapshot_a_dev.categorize_as(SnapshotChangeCategory.BREAKING) + assert snapshot_a_dev.snapshot_id != snapshot_a.snapshot_id + assert snapshot_a_dev.table_info != snapshot_a.table_info + + # Mock state reader to return existing snapshots and environment + state_reader = mocker.Mock(spec=StateReader) + snapshots_in_state = { + snapshot_a.snapshot_id: snapshot_a, + snapshot_b.snapshot_id: snapshot_b, + snapshot_a_dev.snapshot_id: snapshot_a_dev, + } + + def _get_snapshots(snapshot_ids: t.Iterable[SnapshotIdLike]): + return { + k: v + for k, v in snapshots_in_state.items() + if k in {s.snapshot_id for s in snapshot_ids} + } + + state_reader.get_snapshots.side_effect = _get_snapshots + state_reader.get_snapshots_by_names.return_value = set() + + existing_prod_environment = Environment( + name="prod", + snapshots=[snapshot_a.table_info, snapshot_b.table_info], + start_at="2023-01-01", + end_at="2023-01-02", + plan_id="previous_plan", + previous_plan_id=None, + promoted_snapshot_ids=[snapshot_a.snapshot_id, snapshot_b.snapshot_id], + finalized_ts=to_timestamp("2023-01-02"), + ) + + # dev has new version of snapshot_a but same version of snapshot_b + existing_dev_environment = Environment( + name="dev", + snapshots=[snapshot_a_dev.table_info, snapshot_b.table_info], + start_at="2023-01-01", + end_at="2023-01-02", + plan_id="previous_plan", + previous_plan_id=None, + promoted_snapshot_ids=[snapshot_a_dev.snapshot_id, snapshot_b.snapshot_id], + finalized_ts=to_timestamp("2023-01-02"), + ) + + state_reader.get_environment.side_effect = ( + lambda name: existing_dev_environment if name == "dev" else existing_prod_environment + ) + state_reader.get_environments_summary.return_value = [ + existing_prod_environment.summary, + existing_dev_environment.summary, + ] + + environment = Environment( + name="prod", + snapshots=[snapshot_a.table_info, snapshot_b.table_info], + start_at="2023-01-01", + end_at="2023-01-02", + plan_id="test_plan", + previous_plan_id="previous_plan", + promoted_snapshot_ids=[snapshot_a.snapshot_id, snapshot_b.snapshot_id], + ) + + # Create evaluatable plan with restatements + plan = EvaluatablePlan( + start="2023-01-01", + end="2023-01-02", + new_snapshots=[], # No new snapshots + environment=environment, + no_gaps=False, + skip_backfill=False, + empty_backfill=False, + restatements={ + '"a"': (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + '"b"': (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + }, + restate_all_snapshots=True, + is_dev=False, + allow_destructive_models=set(), + allow_additive_models=set(), + forward_only=False, + end_bounded=False, + ensure_finalized_snapshots=False, + ignore_cron=False, + directly_modified_snapshots=[], # No changes + indirectly_modified_snapshots={}, # No changes + metadata_updated_snapshots=[], + removed_snapshots=[], + requires_backfill=True, + models_to_backfill=None, + execution_time="2023-01-02", + disabled_restatement_models=set(), + environment_statements=None, + user_provided_flags=None, + ) + + # Build plan stages + stages = build_plan_stages(plan, state_reader, None) + + # Verify stages + assert len(stages) == 5 + + # Verify PhysicalLayerSchemaCreationStage + physical_stage = stages[0] + assert isinstance(physical_stage, PhysicalLayerSchemaCreationStage) + assert len(physical_stage.snapshots) == 2 + assert {s.snapshot_id for s in physical_stage.snapshots} == { + snapshot_a.snapshot_id, + snapshot_b.snapshot_id, + } # Verify BackfillStage - backfill_stage = stages[2] + backfill_stage = stages[1] assert isinstance(backfill_stage, BackfillStage) assert len(backfill_stage.snapshot_to_intervals) == 2 assert backfill_stage.deployability_index == DeployabilityIndex.all_deployable() @@ -595,6 +767,25 @@ def test_build_plan_stages_restatement( for intervals in backfill_stage.snapshot_to_intervals.values(): assert intervals == expected_backfill_interval + # Verify RestatementStage + restatement_stage = stages[2] + assert isinstance(restatement_stage, RestatementStage) + restatement_stage = ExplainableRestatementStage.from_restatement_stage( + restatement_stage, state_reader, plan + ) + + # note: we only clear the intervals from state for "a" in dev, we leave prod alone + assert restatement_stage.snapshot_intervals_to_clear + assert len(restatement_stage.snapshot_intervals_to_clear) == 1 + snapshot_name, clear_requests = list(restatement_stage.snapshot_intervals_to_clear.items())[0] + assert snapshot_name == '"a"' + assert len(clear_requests) == 1 + clear_request = clear_requests[0] + assert isinstance(clear_request, SnapshotIntervalClearRequest) + assert clear_request.snapshot_id == snapshot_a_dev.snapshot_id + assert clear_request.snapshot == snapshot_a_dev.id_and_version + assert clear_request.interval == (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")) + # Verify EnvironmentRecordUpdateStage assert isinstance(stages[3], EnvironmentRecordUpdateStage) @@ -602,6 +793,156 @@ def test_build_plan_stages_restatement( assert isinstance(stages[4], FinalizeEnvironmentStage) +def test_build_plan_stages_restatement_dev_does_not_clear_intervals( + snapshot_a: Snapshot, + snapshot_b: Snapshot, + make_snapshot: t.Callable[..., Snapshot], + mocker: MockerFixture, +) -> None: + """ + Scenario: + - Restatement triggered against the dev environment + + Expected Outcome: + - BackfillStage only touches models in that dev environment + - Plan does not contain a RestatementStage because making changes in dev doesnt mean we need + to clear intervals from other environments + """ + # Dev version of snapshot_a, same name but different version + snapshot_a_dev = make_snapshot( + SqlModel( + name="a", + query=parse_one("select 1, changed, ds"), + kind=dict(name=ModelKindName.INCREMENTAL_BY_TIME_RANGE, time_column="ds"), + ) + ) + snapshot_a_dev.categorize_as(SnapshotChangeCategory.BREAKING) + assert snapshot_a_dev.snapshot_id != snapshot_a.snapshot_id + assert snapshot_a_dev.table_info != snapshot_a.table_info + + # Mock state reader to return existing snapshots and environment + state_reader = mocker.Mock(spec=StateReader) + snapshots_in_state = { + snapshot_a.snapshot_id: snapshot_a, + snapshot_b.snapshot_id: snapshot_b, + snapshot_a_dev.snapshot_id: snapshot_a_dev, + } + state_reader.get_snapshots.side_effect = lambda snapshot_info_like: { + k: v + for k, v in snapshots_in_state.items() + if k in [sil.snapshot_id for sil in snapshot_info_like] + } + + # prod has snapshot_a, snapshot_b + existing_prod_environment = Environment( + name="prod", + snapshots=[snapshot_a.table_info, snapshot_b.table_info], + start_at="2023-01-01", + end_at="2023-01-02", + plan_id="previous_prod_plan", + previous_plan_id=None, + promoted_snapshot_ids=[snapshot_a.snapshot_id, snapshot_b.snapshot_id], + finalized_ts=to_timestamp("2023-01-02"), + ) + + # dev has new version of snapshot_a + existing_dev_environment = Environment( + name="dev", + snapshots=[snapshot_a_dev.table_info], + start_at="2023-01-01", + end_at="2023-01-02", + plan_id="previous_dev_plan", + previous_plan_id=None, + promoted_snapshot_ids=[snapshot_a_dev.snapshot_id], + finalized_ts=to_timestamp("2023-01-02"), + ) + + state_reader.get_environment.side_effect = ( + lambda name: existing_dev_environment if name == "dev" else existing_prod_environment + ) + state_reader.get_environments_summary.return_value = [ + existing_prod_environment.summary, + existing_dev_environment.summary, + ] + + environment = Environment( + name="dev", + snapshots=[snapshot_a_dev.table_info], + start_at="2023-01-01", + end_at="2023-01-02", + plan_id="test_plan", + previous_plan_id="previous_dev_plan", + promoted_snapshot_ids=[snapshot_a_dev.snapshot_id], + ) + + # Create evaluatable plan with restatements + plan = EvaluatablePlan( + start="2023-01-01", + end="2023-01-02", + new_snapshots=[], # No new snapshots + environment=environment, + no_gaps=False, + skip_backfill=False, + empty_backfill=False, + restatements={ + '"a"': (to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + }, + restate_all_snapshots=False, + is_dev=True, + allow_destructive_models=set(), + allow_additive_models=set(), + forward_only=False, + end_bounded=False, + ensure_finalized_snapshots=False, + ignore_cron=False, + directly_modified_snapshots=[], # No changes + indirectly_modified_snapshots={}, # No changes + metadata_updated_snapshots=[], + removed_snapshots=[], + requires_backfill=True, + models_to_backfill=None, + execution_time="2023-01-02", + disabled_restatement_models=set(), + environment_statements=None, + user_provided_flags=None, + ) + + # Build plan stages + stages = build_plan_stages(plan, state_reader, None) + + # Verify stages + assert len(stages) == 5 + + # Verify no RestatementStage + assert not any(s for s in stages if isinstance(s, RestatementStage)) + + # Verify PhysicalLayerSchemaCreationStage + physical_stage = stages[0] + assert isinstance(physical_stage, PhysicalLayerSchemaCreationStage) + assert len(physical_stage.snapshots) == 1 + assert {s.snapshot_id for s in physical_stage.snapshots} == { + snapshot_a_dev.snapshot_id, + } + + # Verify BackfillStage + backfill_stage = stages[1] + assert isinstance(backfill_stage, BackfillStage) + assert len(backfill_stage.snapshot_to_intervals) == 1 + assert backfill_stage.deployability_index == DeployabilityIndex.all_deployable() + backfill_snapshot, backfill_intervals = list(backfill_stage.snapshot_to_intervals.items())[0] + assert backfill_snapshot.snapshot_id == snapshot_a_dev.snapshot_id + assert backfill_intervals == [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))] + + # Verify EnvironmentRecordUpdateStage + assert isinstance(stages[2], EnvironmentRecordUpdateStage) + + # Verify VirtualLayerUpdateStage (all non-prod plans get this regardless) + assert isinstance(stages[3], VirtualLayerUpdateStage) + + # Verify FinalizeEnvironmentStage + assert isinstance(stages[4], FinalizeEnvironmentStage) + + def test_build_plan_stages_forward_only( snapshot_a: Snapshot, snapshot_b: Snapshot, make_snapshot, mocker: MockerFixture ) -> None: @@ -654,6 +995,7 @@ def test_build_plan_stages_forward_only( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -782,6 +1124,7 @@ def test_build_plan_stages_forward_only_dev( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=True, allow_destructive_models=set(), allow_additive_models=set(), @@ -907,6 +1250,7 @@ def _get_snapshots(snapshot_ids: t.List[SnapshotId]) -> t.Dict[SnapshotId, Snaps skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=True, allow_destructive_models=set(), allow_additive_models=set(), @@ -1044,6 +1388,7 @@ def test_build_plan_stages_forward_only_ensure_finalized_snapshots( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -1120,6 +1465,7 @@ def test_build_plan_stages_removed_model( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -1203,6 +1549,7 @@ def test_build_plan_stages_environment_suffix_target_changed( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=True, allow_destructive_models=set(), allow_additive_models=set(), @@ -1302,6 +1649,7 @@ def test_build_plan_stages_indirect_non_breaking_view_migration( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -1327,16 +1675,17 @@ def test_build_plan_stages_indirect_non_breaking_view_migration( stages = build_plan_stages(plan, state_reader, None) # Verify stages - assert len(stages) == 8 + assert len(stages) == 9 assert isinstance(stages[0], CreateSnapshotRecordsStage) assert isinstance(stages[1], PhysicalLayerSchemaCreationStage) assert isinstance(stages[2], BackfillStage) assert isinstance(stages[3], EnvironmentRecordUpdateStage) - assert isinstance(stages[4], UnpauseStage) - assert isinstance(stages[5], BackfillStage) - assert isinstance(stages[6], VirtualLayerUpdateStage) - assert isinstance(stages[7], FinalizeEnvironmentStage) + assert isinstance(stages[4], MigrateSchemasStage) + assert isinstance(stages[5], UnpauseStage) + assert isinstance(stages[6], BackfillStage) + assert isinstance(stages[7], VirtualLayerUpdateStage) + assert isinstance(stages[8], FinalizeEnvironmentStage) def test_build_plan_stages_virtual_environment_mode_filtering( @@ -1388,6 +1737,7 @@ def test_build_plan_stages_virtual_environment_mode_filtering( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=True, allow_destructive_models=set(), allow_additive_models=set(), @@ -1441,6 +1791,7 @@ def test_build_plan_stages_virtual_environment_mode_filtering( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -1504,6 +1855,7 @@ def test_build_plan_stages_virtual_environment_mode_filtering( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -1578,6 +1930,7 @@ def test_build_plan_stages_virtual_environment_mode_no_updates( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -1642,6 +1995,7 @@ def test_adjust_intervals_new_forward_only_dev_intervals( skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=True, # Dev environment allow_destructive_models=set(), allow_additive_models=set(), @@ -1686,6 +2040,7 @@ def test_adjust_intervals_restatement_removal( state_reader.refresh_snapshot_intervals = mocker.Mock() state_reader.get_snapshots.return_value = {} state_reader.get_environment.return_value = None + state_reader.get_environments_summary.return_value = [] environment = Environment( snapshots=[snapshot_a.table_info, snapshot_b.table_info], @@ -1710,6 +2065,7 @@ def test_adjust_intervals_restatement_removal( skip_backfill=False, empty_backfill=False, restatements=restatements, + restate_all_snapshots=True, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), @@ -1738,8 +2094,6 @@ def test_adjust_intervals_restatement_removal( restatement_stages = [stage for stage in stages if isinstance(stage, RestatementStage)] assert len(restatement_stages) == 1 - restatement_stage = restatement_stages[0] - assert len(restatement_stage.snapshot_intervals) == 2 backfill_stages = [stage for stage in stages if isinstance(stage, BackfillStage)] assert len(backfill_stages) == 1 @@ -1803,6 +2157,7 @@ def test_adjust_intervals_should_force_rebuild(make_snapshot, mocker: MockerFixt skip_backfill=False, empty_backfill=False, restatements={}, + restate_all_snapshots=False, is_dev=False, allow_destructive_models=set(), allow_additive_models=set(), diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py index 71803f58a4..cd32d2451d 100644 --- a/tests/core/test_scheduler.py +++ b/tests/core/test_scheduler.py @@ -1126,3 +1126,90 @@ def test_dag_multiple_chain_transitive_deps(mocker: MockerFixture, make_snapshot ) }, } + + +def test_dag_upstream_dependency_caching_with_complex_diamond(mocker: MockerFixture, make_snapshot): + r""" + Test that the upstream dependency caching correctly handles a complex diamond dependency graph. + + Dependency graph: + A (has intervals) + / \ + B C (no intervals - transitive) + / \ / \ + D E F (no intervals - transitive) + \ / \ / + G H (has intervals - selected) + + This creates multiple paths from G and H to A. Without caching, A's dependencies would be + computed multiple times (once for each path). With caching, they should be computed once + and reused. + """ + snapshots = {} + + for name in ["a", "b", "c", "d", "e", "f", "g", "h"]: + snapshots[name] = make_snapshot(SqlModel(name=name, query=parse_one("SELECT 1 as id"))) + snapshots[name].categorize_as(SnapshotChangeCategory.BREAKING) + + # A is the root + snapshots["b"] = snapshots["b"].model_copy(update={"parents": (snapshots["a"].snapshot_id,)}) + snapshots["c"] = snapshots["c"].model_copy(update={"parents": (snapshots["a"].snapshot_id,)}) + + # Middle layer: D, E, F depend on B and/or C + snapshots["d"] = snapshots["d"].model_copy(update={"parents": (snapshots["b"].snapshot_id,)}) + snapshots["e"] = snapshots["e"].model_copy( + update={"parents": (snapshots["b"].snapshot_id, snapshots["c"].snapshot_id)} + ) + snapshots["f"] = snapshots["f"].model_copy(update={"parents": (snapshots["c"].snapshot_id,)}) + + # Bottom layer: G and H depend on D/E and E/F respectively + snapshots["g"] = snapshots["g"].model_copy( + update={"parents": (snapshots["d"].snapshot_id, snapshots["e"].snapshot_id)} + ) + snapshots["h"] = snapshots["h"].model_copy( + update={"parents": (snapshots["e"].snapshot_id, snapshots["f"].snapshot_id)} + ) + + scheduler = Scheduler( + snapshots=list(snapshots.values()), + snapshot_evaluator=mocker.Mock(), + state_sync=mocker.Mock(), + default_catalog=None, + ) + + batched_intervals = { + snapshots["a"]: [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))], + snapshots["g"]: [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))], + snapshots["h"]: [(to_timestamp("2023-01-01"), to_timestamp("2023-01-02"))], + } + + full_dag = snapshots_to_dag(snapshots.values()) + dag = scheduler._dag(batched_intervals, snapshot_dag=full_dag) + + # Verify the DAG structure: + # 1. A should be evaluated first (no dependencies) + # 2. Both G and H should depend on A (through transitive dependencies) + # 3. Transitive nodes (B, C, D, E, F) should not appear as separate evaluation nodes + expected_a_node = EvaluateNode( + snapshot_name='"a"', + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + batch_index=0, + ) + + expected_g_node = EvaluateNode( + snapshot_name='"g"', + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + batch_index=0, + ) + + expected_h_node = EvaluateNode( + snapshot_name='"h"', + interval=(to_timestamp("2023-01-01"), to_timestamp("2023-01-02")), + batch_index=0, + ) + + assert dag.graph == { + expected_a_node: set(), + expected_g_node: {expected_a_node}, + expected_h_node: {expected_a_node}, + } diff --git a/tests/core/test_selector_dbt.py b/tests/core/test_selector_dbt.py new file mode 100644 index 0000000000..112c5740ac --- /dev/null +++ b/tests/core/test_selector_dbt.py @@ -0,0 +1,63 @@ +import typing as t +import pytest +from pytest_mock import MockerFixture +from sqlglot import exp +from sqlmesh.core.model.kind import SeedKind, ExternalKind, FullKind +from sqlmesh.core.model.seed import Seed +from sqlmesh.core.model.definition import SqlModel, SeedModel, ExternalModel +from sqlmesh.core.audit.definition import StandaloneAudit +from sqlmesh.core.snapshot.definition import Node +from sqlmesh.core.selector import DbtSelector +from sqlmesh.core.selector import parse, ResourceType +from sqlmesh.utils.errors import SQLMeshError +import sqlmesh.core.dialect as d +from sqlmesh.utils import UniqueKeyDict + + +def test_parse_resource_type(): + assert parse("resource_type:foo") == ResourceType(this=exp.Var(this="foo")) + + +@pytest.mark.parametrize( + "resource_type,expected", + [ + ("model", {'"test"."normal_model"'}), + ("seed", {'"test"."seed_model"'}), + ("test", {'"test"."standalone_audit"'}), + ("source", {'"external"."model"'}), + ], +) +def test_expand_model_selections_resource_type( + mocker: MockerFixture, resource_type: str, expected: t.Set[str] +): + models: t.Dict[str, Node] = { + '"test"."normal_model"': SqlModel( + name="test.normal_model", + kind=FullKind(), + query=d.parse_one("SELECT 'normal_model' AS what"), + ), + '"test"."seed_model"': SeedModel( + name="test.seed_model", kind=SeedKind(path="/tmp/foo"), seed=Seed(content="id,name") + ), + '"test"."standalone_audit"': StandaloneAudit( + name="test.standalone_audit", query=d.parse_one("SELECT 'standalone_audit' AS what") + ), + '"external"."model"': ExternalModel(name="external.model", kind=ExternalKind()), + } + + selector = DbtSelector(state_reader=mocker.Mock(), models=UniqueKeyDict("models")) + + assert selector.expand_model_selections([f"resource_type:{resource_type}"], models) == expected + + +def test_unsupported_resource_type(mocker: MockerFixture): + selector = DbtSelector(state_reader=mocker.Mock(), models=UniqueKeyDict("models")) + + models: t.Dict[str, Node] = { + '"test"."normal_model"': SqlModel( + name="test.normal_model", query=d.parse_one("SELECT 'normal_model' AS what") + ), + } + + with pytest.raises(SQLMeshError, match="Unsupported"): + selector.expand_model_selections(["resource_type:analysis"], models) diff --git a/tests/core/test_selector.py b/tests/core/test_selector_native.py similarity index 87% rename from tests/core/test_selector.py rename to tests/core/test_selector_native.py index 80b9ef691e..5889efadda 100644 --- a/tests/core/test_selector.py +++ b/tests/core/test_selector_native.py @@ -6,16 +6,18 @@ import pytest from pytest_mock.plugin import MockerFixture +import subprocess from sqlmesh.core import dialect as d from sqlmesh.core.audit import StandaloneAudit from sqlmesh.core.environment import Environment from sqlmesh.core.model import Model, SqlModel from sqlmesh.core.model.common import ParsableSql -from sqlmesh.core.selector import Selector +from sqlmesh.core.selector import NativeSelector from sqlmesh.core.snapshot import SnapshotChangeCategory from sqlmesh.utils import UniqueKeyDict from sqlmesh.utils.date import now_timestamp +from sqlmesh.utils.git import GitClient @pytest.mark.parametrize( @@ -88,7 +90,7 @@ def test_select_models(mocker: MockerFixture, make_snapshot, default_catalog: t. local_models[modified_model_v2.fqn] = modified_model_v2.copy( update={"mapping_schema": added_model_schema} ) - selector = Selector(state_reader_mock, local_models, default_catalog=default_catalog) + selector = NativeSelector(state_reader_mock, local_models, default_catalog=default_catalog) _assert_models_equal( selector.select_models(["db.added_model"], env_name), @@ -243,7 +245,7 @@ def test_select_models_expired_environment(mocker: MockerFixture, make_snapshot) local_models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") local_models[modified_model_v2.fqn] = modified_model_v2 - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) _assert_models_equal( selector.select_models(["*.modified_model"], env_name, fallback_env_name="prod"), @@ -305,7 +307,7 @@ def test_select_change_schema(mocker: MockerFixture, make_snapshot): local_child = child.copy(update={"mapping_schema": {'"db"': {'"parent"': {"b": "INT"}}}}) local_models[local_child.fqn] = local_child - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) selected = selector.select_models(["db.parent"], env_name) assert selected[local_child.fqn].render_query() != child.render_query() @@ -339,7 +341,7 @@ def test_select_models_missing_env(mocker: MockerFixture, make_snapshot): local_models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") local_models[model.fqn] = model - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) assert selector.select_models([model.name], "missing_env").keys() == {model.fqn} assert not selector.select_models(["missing"], "missing_env") @@ -563,7 +565,7 @@ def test_expand_model_selections( ) models[model.fqn] = model - selector = Selector(mocker.Mock(), models) + selector = NativeSelector(mocker.Mock(), models) assert selector.expand_model_selections(selections) == output @@ -576,7 +578,7 @@ def test_model_selection_normalized(mocker: MockerFixture, make_snapshot): dialect="bigquery", ) models[model.fqn] = model - selector = Selector(mocker.Mock(), models, dialect="bigquery") + selector = NativeSelector(mocker.Mock(), models, dialect="bigquery") assert selector.expand_model_selections(["db.test_Model"]) == {'"db"."test_Model"'} @@ -624,7 +626,7 @@ def test_expand_git_selection( git_client_mock.list_uncommitted_changed_files.return_value = [] git_client_mock.list_committed_changed_files.return_value = [model_a._path, model_c._path] - selector = Selector(mocker.Mock(), models) + selector = NativeSelector(mocker.Mock(), models) selector._git_client = git_client_mock assert selector.expand_model_selections(expressions) == expected_fqns @@ -634,6 +636,92 @@ def test_expand_git_selection( git_client_mock.list_untracked_files.assert_called_once() +def test_expand_git_selection_integration(tmp_path: Path, mocker: MockerFixture): + repo_path = tmp_path / "test_repo" + repo_path.mkdir() + subprocess.run(["git", "init", "-b", "main"], cwd=repo_path, check=True, capture_output=True) + + models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") + model_a_path = repo_path / "model_a.sql" + model_a_path.write_text("SELECT 1 AS a") + model_a = SqlModel(name="test_model_a", query=d.parse_one("SELECT 1 AS a")) + model_a._path = model_a_path + models[model_a.fqn] = model_a + + model_b_path = repo_path / "model_b.sql" + model_b_path.write_text("SELECT 2 AS b") + model_b = SqlModel(name="test_model_b", query=d.parse_one("SELECT 2 AS b")) + model_b._path = model_b_path + models[model_b.fqn] = model_b + + subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=repo_path, + check=True, + capture_output=True, + ) + + # no changes should select nothing + git_client = GitClient(repo_path) + selector = NativeSelector(mocker.Mock(), models) + selector._git_client = git_client + assert selector.expand_model_selections([f"git:main"]) == set() + + # modify A but dont stage it, should be only selected + model_a_path.write_text("SELECT 10 AS a") + assert selector.expand_model_selections([f"git:main"]) == {'"test_model_a"'} + + # stage model A, should still select it + subprocess.run(["git", "add", "model_a.sql"], cwd=repo_path, check=True, capture_output=True) + assert selector.expand_model_selections([f"git:main"]) == {'"test_model_a"'} + + # now add unstaged change to B and both should be selected + model_b_path.write_text("SELECT 20 AS b") + assert selector.expand_model_selections([f"git:main"]) == { + '"test_model_a"', + '"test_model_b"', + } + + subprocess.run( + ["git", "checkout", "-b", "dev"], + cwd=repo_path, + check=True, + capture_output=True, + ) + + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Update model_a", + ], + cwd=repo_path, + check=True, + capture_output=True, + ) + + # now A is committed in the dev branch and B unstaged but should both be selected + assert selector.expand_model_selections([f"git:main"]) == { + '"test_model_a"', + '"test_model_b"', + } + + def test_select_models_with_external_parent(mocker: MockerFixture): default_catalog = "test_catalog" added_model = SqlModel( @@ -658,7 +746,7 @@ def test_select_models_with_external_parent(mocker: MockerFixture): local_models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") local_models[added_model.fqn] = added_model - selector = Selector(state_reader_mock, local_models, default_catalog=default_catalog) + selector = NativeSelector(state_reader_mock, local_models, default_catalog=default_catalog) expanded_selections = selector.expand_model_selections(["+*added_model*"]) assert expanded_selections == {added_model.fqn} @@ -699,7 +787,7 @@ def test_select_models_local_tags_take_precedence_over_remote( local_models[local_existing.fqn] = local_existing local_models[local_new.fqn] = local_new - selector = Selector(state_reader_mock, local_models) + selector = NativeSelector(state_reader_mock, local_models) selected = selector.select_models(["tag:a"], env_name) diff --git a/tests/core/test_snapshot.py b/tests/core/test_snapshot.py index c769991b86..1acc6cc265 100644 --- a/tests/core/test_snapshot.py +++ b/tests/core/test_snapshot.py @@ -168,6 +168,7 @@ def test_json(snapshot: Snapshot): "enabled": True, "extract_dependencies_from_query": True, "virtual_environment_mode": "full", + "grants_target_layer": "virtual", }, "name": '"name"', "parents": [{"name": '"parent"."tbl"', "identifier": snapshot.parents[0].identifier}], @@ -181,6 +182,36 @@ def test_json(snapshot: Snapshot): } +def test_json_with_grants(make_snapshot: t.Callable): + from sqlmesh.core.model.meta import GrantsTargetLayer + + model = SqlModel( + name="name", + kind=dict(time_column="ds", batch_size=30, name=ModelKindName.INCREMENTAL_BY_TIME_RANGE), + owner="owner", + dialect="spark", + cron="1 0 * * *", + start="2020-01-01", + query=parse_one("SELECT @EACH([1, 2], x -> x), ds FROM parent.tbl"), + grants={"SELECT": ["role1", "role2"], "INSERT": ["role3"]}, + grants_target_layer=GrantsTargetLayer.VIRTUAL, + ) + snapshot = make_snapshot(model) + + json_str = snapshot.json() + json_data = json.loads(json_str) + assert ( + json_data["node"]["grants"] + == "('SELECT' = ARRAY('role1', 'role2'), 'INSERT' = ARRAY('role3'))" + ) + assert json_data["node"]["grants_target_layer"] == "virtual" + + reparsed_snapshot = Snapshot.model_validate_json(json_str) + assert isinstance(reparsed_snapshot.node, SqlModel) + assert reparsed_snapshot.node.grants == {"SELECT": ["role1", "role2"], "INSERT": ["role3"]} + assert reparsed_snapshot.node.grants_target_layer == GrantsTargetLayer.VIRTUAL + + def test_json_custom_materialization(make_snapshot: t.Callable): model = SqlModel( name="name", @@ -954,7 +985,7 @@ def test_fingerprint(model: Model, parent_model: Model): original_fingerprint = SnapshotFingerprint( data_hash="2406542604", - metadata_hash="3341445192", + metadata_hash="1056339358", ) assert fingerprint == original_fingerprint @@ -1014,8 +1045,8 @@ def test_fingerprint_seed_model(): ) expected_fingerprint = SnapshotFingerprint( - data_hash="1586624913", - metadata_hash="2315134974", + data_hash="2112858704", + metadata_hash="2674364560", ) model = load_sql_based_model(expressions, path=Path("./examples/sushi/models/test_model.sql")) @@ -1054,7 +1085,7 @@ def test_fingerprint_jinja_macros(model: Model): ) original_fingerprint = SnapshotFingerprint( data_hash="93332825", - metadata_hash="3341445192", + metadata_hash="1056339358", ) fingerprint = fingerprint_from_node(model, nodes={}) @@ -1131,6 +1162,40 @@ def test_fingerprint_virtual_properties(model: Model, parent_model: Model): assert updated_fingerprint.data_hash == fingerprint.data_hash +def test_fingerprint_grants(model: Model, parent_model: Model): + from sqlmesh.core.model.meta import GrantsTargetLayer + + original_model = deepcopy(model) + fingerprint = fingerprint_from_node(model, nodes={}) + + updated_model = SqlModel( + **original_model.dict(), + grants={"SELECT": ["role1", "role2"]}, + ) + updated_fingerprint = fingerprint_from_node(updated_model, nodes={}) + + assert updated_fingerprint != fingerprint + assert updated_fingerprint.metadata_hash != fingerprint.metadata_hash + assert updated_fingerprint.data_hash == fingerprint.data_hash + + different_grants_model = SqlModel( + **original_model.dict(), + grants={"SELECT": ["role3"], "INSERT": ["role4"]}, + ) + different_grants_fingerprint = fingerprint_from_node(different_grants_model, nodes={}) + + assert different_grants_fingerprint.metadata_hash != updated_fingerprint.metadata_hash + assert different_grants_fingerprint.metadata_hash != fingerprint.metadata_hash + + target_layer_model = SqlModel( + **{**original_model.dict(), "grants_target_layer": GrantsTargetLayer.PHYSICAL}, + grants={"SELECT": ["role1", "role2"]}, + ) + target_layer_fingerprint = fingerprint_from_node(target_layer_model, nodes={}) + + assert target_layer_fingerprint.metadata_hash != updated_fingerprint.metadata_hash + + def test_tableinfo_equality(): snapshot_a = SnapshotTableInfo( name="test_schema.a", diff --git a/tests/core/test_snapshot_evaluator.py b/tests/core/test_snapshot_evaluator.py index 6a39f600de..f3fae15e8a 100644 --- a/tests/core/test_snapshot_evaluator.py +++ b/tests/core/test_snapshot_evaluator.py @@ -3,6 +3,7 @@ from typing_extensions import Self from unittest.mock import call, patch, Mock +import contextlib import re import logging import pytest @@ -41,8 +42,10 @@ load_sql_based_model, ExternalModel, model, + create_sql_model, ) from sqlmesh.core.model.kind import OnDestructiveChange, ExternalKind, OnAdditiveChange +from sqlmesh.core.model.meta import GrantsTargetLayer from sqlmesh.core.node import IntervalUnit from sqlmesh.core.snapshot import ( DeployabilityIndex, @@ -55,7 +58,19 @@ SnapshotTableCleanupTask, ) from sqlmesh.core.snapshot.definition import to_view_mapping -from sqlmesh.core.snapshot.evaluator import CustomMaterialization, SnapshotCreationFailedError +from sqlmesh.core.snapshot.evaluator import ( + CustomMaterialization, + EngineManagedStrategy, + FullRefreshStrategy, + IncrementalByPartitionStrategy, + IncrementalByTimeRangeStrategy, + IncrementalByUniqueKeyStrategy, + IncrementalUnmanagedStrategy, + MaterializableStrategy, + SCDType2Strategy, + SnapshotCreationFailedError, + ViewStrategy, +) from sqlmesh.utils.concurrency import NodeExecutionFailedError from sqlmesh.utils.date import to_timestamp from sqlmesh.utils.errors import ( @@ -436,10 +451,14 @@ def create_and_cleanup(name: str, dev_table_only: bool): snapshot.categorize_as(SnapshotChangeCategory.BREAKING, forward_only=True) snapshot.version = "test_version" + on_cleanup_mock = mocker.Mock() + evaluator.promote([snapshot], EnvironmentNamingInfo(name="test_env")) evaluator.cleanup( - [SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=dev_table_only)] + [SnapshotTableCleanupTask(snapshot=snapshot.table_info, dev_table_only=dev_table_only)], + on_complete=on_cleanup_mock, ) + assert on_cleanup_mock.call_count == 1 if dev_table_only else 2 return snapshot snapshot = create_and_cleanup("catalog.test_schema.test_model", True) @@ -611,6 +630,39 @@ def create_and_cleanup_external_model(name: str, dev_table_only: bool): adapter_mock.drop_table.assert_not_called() +def test_cleanup_symbolic_and_audit_snapshots_no_callback( + mocker: MockerFixture, adapter_mock, make_snapshot +): + evaluator = SnapshotEvaluator(adapter_mock) + on_complete_mock = mocker.Mock() + + # Test external model + external_model = ExternalModel( + name="test_schema.external_model", + kind=ExternalKind(), + ) + external_snapshot = make_snapshot(external_model) + external_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + # Test standalone audit + audit = StandaloneAudit(name="test_audit", query=parse_one("SELECT NULL LIMIT 0")) + audit_snapshot = make_snapshot(audit) + audit_snapshot.categorize_as(SnapshotChangeCategory.NON_BREAKING) + + evaluator.cleanup( + [ + SnapshotTableCleanupTask(snapshot=external_snapshot.table_info, dev_table_only=False), + SnapshotTableCleanupTask(snapshot=audit_snapshot.table_info, dev_table_only=False), + ], + on_complete=on_complete_mock, + ) + + # Verify that no physical tables were attempted to be dropped + adapter_mock.drop_table.assert_not_called() + adapter_mock.get_data_object.assert_not_called() + on_complete_mock.assert_not_called() + + @pytest.mark.parametrize("view_exists", [True, False]) def test_evaluate_materialized_view( mocker: MockerFixture, adapter_mock, make_snapshot, view_exists: bool @@ -851,9 +903,59 @@ def test_create_prod_table_exists(mocker: MockerFixture, adapter_mock, make_snap { f"test_schema__test_model__{snapshot.version}", }, + safe_to_cache=True, ) +def test_pre_hook_forward_only_clone( + mocker: MockerFixture, make_mocked_engine_adapter, make_snapshot +): + """ + Verifies that pre-statements are executed when creating a clone of a forward-only model. + """ + pre_statement = """CREATE TEMPORARY FUNCTION "example_udf"("x" BIGINT) AS ("x" + 1)""" + model = load_sql_based_model( + parse( # type: ignore + f""" + MODEL ( + name test_schema.test_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ds + ) + ); + + {pre_statement}; + + SELECT a::int, ds::string FROM tbl; + """ + ), + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING, forward_only=True) + snapshot.previous_versions = snapshot.all_versions + + adapter = make_mocked_engine_adapter(EngineAdapter) + adapter.with_settings = lambda **kwargs: adapter + adapter.table_exists = lambda _: True # type: ignore + adapter.SUPPORTS_CLONING = True + mocker.patch.object( + adapter, + "get_data_objects", + return_value=[], + ) + mocker.patch.object( + adapter, + "get_alter_operations", + return_value=[], + ) + + evaluator = SnapshotEvaluator(adapter) + + evaluator.create([snapshot], {}, deployability_index=DeployabilityIndex.none_deployable()) + adapter.cursor.execute.assert_any_call(pre_statement) + + def test_create_only_dev_table_exists(mocker: MockerFixture, adapter_mock, make_snapshot): model = load_sql_based_model( parse( # type: ignore @@ -888,6 +990,7 @@ def test_create_only_dev_table_exists(mocker: MockerFixture, adapter_mock, make_ { f"test_schema__test_model__{snapshot.version}__dev", }, + safe_to_cache=True, ) @@ -937,6 +1040,7 @@ def test_create_new_forward_only_model(mocker: MockerFixture, adapter_mock, make { f"test_schema__test_model__{snapshot.dev_version}__dev", }, + safe_to_cache=True, ) @@ -1027,6 +1131,7 @@ def test_create_tables_exist( adapter_mock.get_data_objects.assert_called_once_with( schema_("sqlmesh__db"), {table_name}, + safe_to_cache=True, ) adapter_mock.create_schema.assert_not_called() adapter_mock.create_table.assert_not_called() @@ -1064,6 +1169,7 @@ def test_create_prod_table_exists_forward_only(mocker: MockerFixture, adapter_mo { f"test_schema__test_model__{snapshot.version}", }, + safe_to_cache=True, ) adapter_mock.create_table.assert_not_called() @@ -1255,9 +1361,11 @@ def test_promote_deployable(mocker: MockerFixture, make_snapshot): { f"test_schema__test_model__{snapshot.version}", }, + safe_to_cache=True, ) adapter_mock.create_table.assert_not_called() + adapter_mock.get_data_objects.return_value = [] evaluator.promote([snapshot], EnvironmentNamingInfo(name="test_env")) adapter_mock.create_schema.assert_called_once_with(to_schema("test_schema__test_env")) @@ -1310,7 +1418,7 @@ def columns(table_name): "get_data_objects", return_value=[ DataObject( - schema="test_schema", + schema="sqlmesh__test_schema", name=f"test_schema__test_model__{snapshot.version}", type="table", ) @@ -1392,7 +1500,7 @@ def test_migrate_view( "sqlmesh.core.engine_adapter.base.EngineAdapter.get_data_objects", return_value=[ DataObject( - schema="test_schema", + schema="sqlmesh__test_schema", name=f"test_schema__test_model__{snapshot.version}", type="view", ) @@ -1402,7 +1510,13 @@ def test_migrate_view( evaluator = SnapshotEvaluator(adapter) evaluator.migrate([snapshot], {}) - adapter.cursor.execute.assert_not_called() + adapter.cursor.execute.assert_has_calls( + [ + call( + f'CREATE OR REPLACE VIEW "sqlmesh__test_schema"."test_schema__test_model__{snapshot.version}" ("c", "a") AS SELECT "c" AS "c", "a" AS "a" FROM "tbl" AS "tbl"' + ), + ] + ) def test_migrate_snapshot_data_object_type_mismatch( @@ -1630,6 +1744,49 @@ def python_func(**kwargs): assert adapter_mock.insert_overwrite_by_time_partition.call_args[0][1].to_dict() == output_dict +def test_snapshot_evaluator_yield_empty_pd(adapter_mock, make_snapshot): + adapter_mock.is_pyspark_df.return_value = False + adapter_mock.INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.INSERT_OVERWRITE + adapter_mock.try_get_df = lambda x: x + evaluator = SnapshotEvaluator(adapter_mock) + + snapshot = make_snapshot( + PythonModel( + name="db.model", + entrypoint="python_func", + kind=IncrementalByTimeRangeKind(time_column=TimeColumn(column="ds", format="%Y-%m-%d")), + columns={ + "a": "INT", + "ds": "STRING", + }, + python_env={ + "python_func": Executable( + name="python_func", + alias="python_func", + path="test_snapshot_evaluator.py", + payload="""def python_func(**kwargs): + yield from ()""", + ) + }, + ) + ) + + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator.create([snapshot], {}) + + # This should not raise a TypeError from reduce() with empty sequence + evaluator.evaluate( + snapshot, + start="2023-01-01", + end="2023-01-09", + execution_time="2023-01-09", + snapshots={}, + ) + + # When there are no dataframes to process, insert_overwrite_by_time_partition should not be called + adapter_mock.insert_overwrite_by_time_partition.assert_not_called() + + def test_create_clone_in_dev(mocker: MockerFixture, adapter_mock, make_snapshot): adapter_mock.SUPPORTS_CLONING = True adapter_mock.get_alter_operations.return_value = [] @@ -1672,7 +1829,6 @@ def test_create_clone_in_dev(mocker: MockerFixture, adapter_mock, make_snapshot) adapter_mock.clone_table.assert_called_once_with( f"sqlmesh__test_schema.test_schema__test_model__{snapshot.dev_version}__dev", f"sqlmesh__test_schema.test_schema__test_model__{snapshot.version}", - replace=True, rendered_physical_properties={}, ) @@ -1695,7 +1851,7 @@ def test_drop_clone_in_dev_when_migration_fails(mocker: MockerFixture, adapter_m adapter_mock.get_alter_operations.return_value = [] evaluator = SnapshotEvaluator(adapter_mock) - adapter_mock.alter_table.side_effect = Exception("Migration failed") + adapter_mock.alter_table.side_effect = DestructiveChangeError("Migration failed") model = load_sql_based_model( parse( # type: ignore @@ -1722,7 +1878,6 @@ def test_drop_clone_in_dev_when_migration_fails(mocker: MockerFixture, adapter_m adapter_mock.clone_table.assert_called_once_with( f"sqlmesh__test_schema.test_schema__test_model__{snapshot.version}__dev", f"sqlmesh__test_schema.test_schema__test_model__{snapshot.version}", - replace=True, rendered_physical_properties={}, ) @@ -1838,7 +1993,7 @@ def columns(table_name): "sqlmesh.core.engine_adapter.base.EngineAdapter.get_data_objects", return_value=[ DataObject( - schema="test_schema", + schema="sqlmesh__test_schema", name=f"test_schema__test_model__{snapshot.version}", type=DataObjectType.TABLE, ) @@ -1925,7 +2080,7 @@ def columns(table_name): "sqlmesh.core.engine_adapter.base.EngineAdapter.get_data_objects", return_value=[ DataObject( - schema="test_schema", + schema="sqlmesh__test_schema", name=f"test_schema__test_model__{snapshot.version}", type=DataObjectType.TABLE, ) @@ -1966,6 +2121,69 @@ def columns(table_name): ) +def test_temp_table_includes_schema_for_ignore_changes( + mocker: MockerFixture, + make_snapshot, + make_mocked_engine_adapter, +): + """Test that temp table creation includes the physical schema when on_destructive_change or on_additive_change is IGNORE.""" + # Create a model with on_destructive_change=IGNORE to trigger temp table creation + model = SqlModel( + name="test_schema.test_model", + kind=IncrementalByTimeRangeKind( + time_column="ds", on_destructive_change=OnDestructiveChange.IGNORE + ), + query=parse_one("SELECT c, a FROM tbl WHERE ds BETWEEN @start_ds and @end_ds"), + ) + snapshot = make_snapshot(model, version="1") + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + # Set up the mocked adapter + adapter = make_mocked_engine_adapter(EngineAdapter) + adapter.with_settings = lambda **kwargs: adapter # type: ignore + adapter.table_exists = lambda _: True # type: ignore + + # Mock columns method to return existing columns + def columns(table_name): + return { + "c": exp.DataType.build("int"), + "a": exp.DataType.build("int"), + "ds": exp.DataType.build("timestamp"), + } + + adapter.columns = columns # type: ignore + + # Create a mock for the temp_table context manager + temp_table_name_captured = None + + @contextlib.contextmanager + def mock_temp_table(query_or_df, name="diff", **kwargs): + nonlocal temp_table_name_captured + temp_table_name_captured = exp.to_table(name) + # Return a table that temp_table would normally return + yield exp.table_("__temp_diff_12345", db=temp_table_name_captured.db) + + adapter.temp_table = mock_temp_table # type: ignore + adapter.insert_append = lambda *args, **kwargs: None # type: ignore + + evaluator = SnapshotEvaluator(adapter) + + # Call the append method which will trigger _get_target_and_source_columns + evaluator.evaluate( + snapshot, + start="2020-01-01", + end="2020-01-02", + execution_time="2020-01-02", + snapshots={}, + ) + + # Verify that temp_table was called with a name that includes the schema + assert temp_table_name_captured is not None + assert temp_table_name_captured.name == "diff" + assert temp_table_name_captured.db == model.physical_schema + assert str(temp_table_name_captured.db) == "sqlmesh__test_schema" + + def test_forward_only_snapshot_for_added_model(mocker: MockerFixture, adapter_mock, make_snapshot): adapter_mock.SUPPORTS_CLONING = False evaluator = SnapshotEvaluator(adapter_mock) @@ -2379,7 +2597,7 @@ def test_insert_into_scd_type_2_by_column( target_columns_to_types=table_columns, table_format=None, unique_key=[exp.to_column("id", quoted=True)], - check_columns=exp.Star(), + check_columns=[exp.Star()], valid_from_col=exp.column("valid_from", quoted=True), valid_to_col=exp.column("valid_to", quoted=True), execution_time="2020-01-02", @@ -3121,11 +3339,11 @@ def test_create_post_statements_use_non_deployable_table( evaluator.create([snapshot], {}, DeployabilityIndex.none_deployable()) call_args = adapter_mock.execute.call_args_list - pre_calls = call_args[0][0][0] + pre_calls = call_args[1][0][0] assert len(pre_calls) == 1 assert pre_calls[0].sql(dialect="postgres") == expected_call - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert post_calls[0].sql(dialect="postgres") == expected_call @@ -3183,11 +3401,11 @@ def model_with_statements(context, **kwargs): expected_call = f'CREATE INDEX IF NOT EXISTS "idx" ON "sqlmesh__db"."db__test_model__{snapshot.version}__dev" /* db.test_model */("id")' call_args = adapter_mock.execute.call_args_list - pre_calls = call_args[0][0][0] + pre_calls = call_args[1][0][0] assert len(pre_calls) == 1 assert pre_calls[0].sql(dialect="postgres") == expected_call - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert post_calls[0].sql(dialect="postgres") == expected_call @@ -3245,14 +3463,14 @@ def create_log_table(evaluator, view_name): ) call_args = adapter_mock.execute.call_args_list - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert ( post_calls[0].sql(dialect="postgres") == f'CREATE INDEX IF NOT EXISTS "test_idx" ON "sqlmesh__test_schema"."test_schema__test_model__{snapshot.version}__dev" /* test_schema.test_model */("a")' ) - on_virtual_update_calls = call_args[2][0][0] + on_virtual_update_calls = call_args[4][0][0] assert ( on_virtual_update_calls[0].sql(dialect="postgres") == 'GRANT SELECT ON VIEW "test_schema__test_env"."test_model" /* test_schema.test_model */ TO ROLE "admin"' @@ -3330,7 +3548,7 @@ def model_with_statements(context, **kwargs): ) call_args = adapter_mock.execute.call_args_list - on_virtual_update_call = call_args[2][0][0][0] + on_virtual_update_call = call_args[4][0][0][0] assert ( on_virtual_update_call.sql(dialect="postgres") == 'CREATE INDEX IF NOT EXISTS "idx" ON "db"."test_model_3" /* db.test_model_3 */("id")' @@ -3465,7 +3683,7 @@ def test_custom_materialization_strategy_with_custom_properties(adapter_mock, ma custom_insert_kind = None class TestCustomKind(CustomKind): - _primary_key: t.List[exp.Expression] # type: ignore[no-untyped-def] + _primary_key: t.List[exp.Expr] # type: ignore[no-untyped-def] @model_validator(mode="after") def _validate_model(self) -> Self: @@ -3477,7 +3695,7 @@ def _validate_model(self) -> Self: return self @property - def primary_key(self) -> t.List[exp.Expression]: + def primary_key(self) -> t.List[exp.Expr]: return self._primary_key class TestCustomMaterializationStrategy(CustomMaterialization[TestCustomKind]): @@ -3842,7 +4060,7 @@ def test_migrate_snapshot(snapshot: Snapshot, mocker: MockerFixture, adapter_moc adapter_mock.get_data_objects.return_value = [ DataObject( - schema="test_schema", + schema="sqlmesh__db", name=f"db__model__{new_snapshot.version}", type=DataObjectType.TABLE, ) @@ -3902,6 +4120,63 @@ def test_migrate_snapshot(snapshot: Snapshot, mocker: MockerFixture, adapter_moc ) +def test_migrate_only_processes_target_snapshots( + mocker: MockerFixture, adapter_mock, make_snapshot +): + evaluator = SnapshotEvaluator(adapter_mock) + + target_model = SqlModel( + name="test_schema.target_model", + kind=FullKind(), + query=parse_one("SELECT 1 AS a"), + ) + extra_model = SqlModel( + name="test_schema.extra_model", + kind=FullKind(), + query=parse_one("SELECT 1 AS a"), + ) + + target_snapshot = make_snapshot(target_model) + extra_snapshot = make_snapshot(extra_model) + target_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + extra_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + target_snapshots = [target_snapshot] + snapshots = { + target_snapshot.snapshot_id: target_snapshot, + extra_snapshot.snapshot_id: extra_snapshot, + } + + mocker.patch.object( + evaluator, + "_get_data_objects", + return_value={target_snapshot.snapshot_id: mocker.Mock()}, + ) + migrate_mock = mocker.patch.object(evaluator, "_migrate_snapshot") + + def apply_side_effect(snapshot_iterable, fn, *_args, **_kwargs): + for snapshot in snapshot_iterable: + fn(snapshot) + return ([], []) + + apply_mock = mocker.patch( + "sqlmesh.core.snapshot.evaluator.concurrent_apply_to_snapshots", + side_effect=apply_side_effect, + ) + + evaluator.migrate(target_snapshots=target_snapshots, snapshots=snapshots) + + assert apply_mock.call_count == 1 + called_snapshots = list(apply_mock.call_args.args[0]) + assert called_snapshots == target_snapshots + + migrate_mock.assert_called_once() + called_snapshot, snapshots_by_name, *_ = migrate_mock.call_args.args + assert called_snapshot is target_snapshot + assert target_snapshot.name in snapshots_by_name + assert extra_snapshot.name in snapshots_by_name + + def test_migrate_managed(adapter_mock, make_snapshot, mocker: MockerFixture): evaluator = SnapshotEvaluator(adapter_mock) @@ -3923,7 +4198,7 @@ def test_migrate_managed(adapter_mock, make_snapshot, mocker: MockerFixture): adapter_mock.get_data_objects.return_value = [ DataObject( - schema="test_schema", + schema="sqlmesh__test_schema", name=f"test_schema__test_model__{snapshot.version}", type=DataObjectType.MANAGED_TABLE, ) @@ -4019,11 +4294,11 @@ def test_multiple_engine_creation(snapshot: Snapshot, adapters, make_snapshot): assert view_args[1][0][0] == "test_schema__test_env.test_model" call_args = engine_adapters["secondary"].execute.call_args_list - pre_calls = call_args[0][0][0] + pre_calls = call_args[1][0][0] assert len(pre_calls) == 1 assert pre_calls[0].sql(dialect="postgres") == expected_call - post_calls = call_args[1][0][0] + post_calls = call_args[2][0][0] assert len(post_calls) == 1 assert post_calls[0].sql(dialect="postgres") == expected_call @@ -4041,18 +4316,20 @@ def test_multiple_engine_promotion(mocker: MockerFixture, adapter_mock, make_sna connection_mock.cursor.return_value = cursor_mock adapter = EngineAdapter(lambda: connection_mock, "") adapter.with_settings = lambda **kwargs: adapter # type: ignore + adapter._get_data_objects = lambda *args, **kwargs: [] # type: ignore engine_adapters = {"default": adapter_mock, "secondary": adapter} def columns(table_name): return { "a": exp.DataType.build("int"), + "ds": exp.DataType.build("timestamp"), } adapter.columns = columns # type: ignore model = SqlModel( name="test_schema.test_model", - kind=IncrementalByTimeRangeKind(time_column="a"), + kind=IncrementalByTimeRangeKind(time_column="ds"), gateway="secondary", query=parse_one("SELECT a FROM tbl WHERE ds BETWEEN @start_ds and @end_ds"), ) @@ -4075,10 +4352,10 @@ def columns(table_name): cursor_mock.execute.assert_has_calls( [ call( - f'DELETE FROM "sqlmesh__test_schema"."test_schema__test_model__{snapshot.version}" WHERE "a" BETWEEN 2020-01-01 00:00:00+00:00 AND 2020-01-02 23:59:59.999999+00:00' + f'DELETE FROM "sqlmesh__test_schema"."test_schema__test_model__{snapshot.version}" WHERE "ds" BETWEEN CAST(\'2020-01-01 00:00:00\' AS TIMESTAMP) AND CAST(\'2020-01-02 23:59:59.999999\' AS TIMESTAMP)' ), call( - f'INSERT INTO "sqlmesh__test_schema"."test_schema__test_model__{snapshot.version}" ("a") SELECT "a" FROM (SELECT "a" AS "a" FROM "tbl" AS "tbl" WHERE "ds" BETWEEN \'2020-01-01\' AND \'2020-01-02\') AS "_subquery" WHERE "a" BETWEEN 2020-01-01 00:00:00+00:00 AND 2020-01-02 23:59:59.999999+00:00' + f'INSERT INTO "sqlmesh__test_schema"."test_schema__test_model__{snapshot.version}" ("a", "ds") SELECT "a", "ds" FROM (SELECT "a" AS "a" FROM "tbl" AS "tbl" WHERE "ds" BETWEEN \'2020-01-01\' AND \'2020-01-02\') AS "_subquery" WHERE "ds" BETWEEN CAST(\'2020-01-01 00:00:00\' AS TIMESTAMP) AND CAST(\'2020-01-02 23:59:59.999999\' AS TIMESTAMP)' ), ] ) @@ -4148,12 +4425,12 @@ def columns(table_name): "sqlmesh.core.engine_adapter.base.EngineAdapter.get_data_objects", return_value=[ DataObject( - schema="test_schema", + schema="sqlmesh__test_schema", name=f"test_schema__test_model__{snapshot_1.version}", type=DataObjectType.TABLE, ), DataObject( - schema="test_schema", + schema="sqlmesh__test_schema", name=f"test_schema__test_model_2__{snapshot_2.version}", type=DataObjectType.TABLE, ), @@ -4290,7 +4567,7 @@ def model_with_statements(context, **kwargs): # For the pre/post statements verify the model-specific gateway was used engine_adapters["default"].execute.assert_called_once() - assert len(engine_adapters["secondary"].execute.call_args_list) == 2 + assert len(engine_adapters["secondary"].execute.call_args_list) == 4 # Validate that the get_catalog_type method was called only on the secondary engine from the macro evaluator engine_adapters["default"].get_catalog_type.assert_not_called() @@ -4627,3 +4904,600 @@ def test_wap_publish_failure(adapter_mock: Mock, make_snapshot: t.Callable[..., # Execute audit with WAP ID and expect it to raise the exception with pytest.raises(Exception, match="WAP publish failed"): evaluator.audit(snapshot, snapshots={}, wap_id=wap_id) + + +def test_properties_are_preserved_in_both_create_statements( + adapter_mock: Mock, make_snapshot: t.Callable[..., Snapshot] +) -> None: + # the below mocks are needed to create a situation + # where we trigger two create statements during evaluation + transaction_mock = Mock() + transaction_mock.__enter__ = Mock() + transaction_mock.__exit__ = Mock() + session_mock = Mock() + session_mock.__enter__ = Mock() + session_mock.__exit__ = Mock() + adapter_mock = Mock() + adapter_mock.transaction.return_value = transaction_mock + adapter_mock.session.return_value = session_mock + adapter_mock.dialect = "trino" + adapter_mock.HAS_VIEW_BINDING = False + adapter_mock.wap_supported.return_value = False + adapter_mock.get_data_objects.return_value = [] + adapter_mock.with_settings.return_value = adapter_mock + adapter_mock.table_exists.return_value = False + + props = [] + + def mutate_view_properties(*args, **kwargs): + view_props = kwargs.get("view_properties") + if isinstance(view_props, dict): + props.append(view_props["creatable_type"].sql()) + # simulate view pop + view_props.pop("creatable_type") + return None + + adapter_mock.create_view.side_effect = mutate_view_properties + + evaluator = SnapshotEvaluator(adapter_mock) + + # create a view model with SECURITY INVOKER physical property + # AND self referenctial to trigger two create statements + model = load_sql_based_model( + parse( # type: ignore + """ + MODEL ( + name test_schema.security_view, + kind VIEW, + physical_properties ( + 'creatable_type' = 'SECURITY INVOKER' + ) + ); + + SELECT 1 as col from test_schema.security_view; + """ + ), + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator.evaluate( + snapshot, + start="2024-01-01", + end="2024-01-02", + execution_time="2024-01-02", + snapshots={}, + ) + + # Verify create_view was called twice + assert adapter_mock.create_view.call_count == 2 + first_call = adapter_mock.create_view.call_args_list[0] + second_call = adapter_mock.create_view.call_args_list[1] + + # First call should be CREATE VIEW (replace=False) second CREATE OR REPLACE VIEW (replace=True) + assert first_call.kwargs.get("replace") == False + assert second_call.kwargs.get("replace") == True + + # Both calls should have view_properties with security invoker + assert props == ["'SECURITY INVOKER'", "'SECURITY INVOKER'"] + + +def _create_grants_test_model( + grants=None, kind="FULL", grants_target_layer=None, virtual_environment_mode=None +): + if kind == "SEED": + from sqlmesh.core.model.definition import create_seed_model + from sqlmesh.core.model.kind import SeedKind + import tempfile + import os + + # Create a temporary CSV file for the test + temp_csv = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) + temp_csv.write("id,name\n1,test\n2,test2\n") + temp_csv.flush() + temp_csv.close() + + seed_kind_config = {"name": "SEED", "path": temp_csv.name} + seed_kind = SeedKind(**seed_kind_config) + + kwargs = {} + if grants is not None: + kwargs["grants"] = grants + if grants_target_layer is not None: + kwargs["grants_target_layer"] = grants_target_layer + + model = create_seed_model("test_model", seed_kind, **kwargs) + + # Clean up the temporary file + os.unlink(temp_csv.name) + + return model + + # Handle regular SQL models + kwargs = { + "kind": kind, + } + if grants is not None: + kwargs["grants"] = grants + if grants_target_layer is not None: + kwargs["grants_target_layer"] = grants_target_layer + if virtual_environment_mode is not None: + kwargs["virtual_environment_mode"] = virtual_environment_mode + + # Add column annotations for non-SEED models to ensure table creation + if kind != "SEED": + kwargs["columns"] = { + "id": "INT", + "ds": "DATE", + "updated_at": "TIMESTAMP", + } + + # Add required fields for specific model kinds + if kind == "INCREMENTAL_BY_TIME_RANGE": + kwargs["kind"] = {"name": "INCREMENTAL_BY_TIME_RANGE", "time_column": "ds"} + elif kind == "INCREMENTAL_BY_PARTITION": + kwargs["kind"] = {"name": "INCREMENTAL_BY_PARTITION"} + kwargs["partitioned_by"] = ["ds"] # This goes on the model, not the kind + elif kind == "INCREMENTAL_BY_UNIQUE_KEY": + kwargs["kind"] = {"name": "INCREMENTAL_BY_UNIQUE_KEY", "unique_key": ["id"]} + elif kind == "INCREMENTAL_UNMANAGED": + kwargs["kind"] = {"name": "INCREMENTAL_UNMANAGED"} + elif kind == "SCD_TYPE_2": + kwargs["kind"] = { + "name": "SCD_TYPE_2", + "unique_key": ["id"], + "updated_at_name": "updated_at", + } + + return create_sql_model( + "test_model", + parse_one("SELECT 1 as id, CURRENT_DATE as ds, CURRENT_TIMESTAMP as updated_at"), + **kwargs, + ) + + +@pytest.mark.parametrize( + "target_layer,apply_layer,expected_call_count", + [ + (GrantsTargetLayer.ALL, GrantsTargetLayer.PHYSICAL, 1), + (GrantsTargetLayer.ALL, GrantsTargetLayer.VIRTUAL, 1), + (GrantsTargetLayer.PHYSICAL, GrantsTargetLayer.PHYSICAL, 1), + (GrantsTargetLayer.PHYSICAL, GrantsTargetLayer.VIRTUAL, 0), + (GrantsTargetLayer.VIRTUAL, GrantsTargetLayer.PHYSICAL, 0), + (GrantsTargetLayer.VIRTUAL, GrantsTargetLayer.VIRTUAL, 1), + ], +) +def test_apply_grants_target_layer( + target_layer: GrantsTargetLayer, + apply_layer: GrantsTargetLayer, + expected_call_count: int, + adapter_mock: Mock, + mocker: MockerFixture, +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + strategy = ViewStrategy(adapter_mock) + + model = _create_grants_test_model( + grants={"select": ["user1"]}, grants_target_layer=target_layer + ) + + strategy._apply_grants(model, "test_table", apply_layer) + + if expected_call_count > 0: + assert sync_grants_mock.call_count == expected_call_count + else: + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "model_kind_name", + [ + "FULL", + "INCREMENTAL_BY_TIME_RANGE", + "SEED", + "MANAGED", + "SCD_TYPE_2", + "VIEW", + ], +) +def test_grants_create_model_kind( + model_kind_name: str, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + grants = {"select": ["user1"]} + model = _create_grants_test_model( + grants=grants, kind=model_kind_name, grants_target_layer=GrantsTargetLayer.ALL + ) + snapshot = make_snapshot(model) + + evaluator = SnapshotEvaluator(adapter_mock) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator.create([snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + + +@pytest.mark.parametrize( + "target_layer", + [ + GrantsTargetLayer.PHYSICAL, + GrantsTargetLayer.VIRTUAL, + GrantsTargetLayer.ALL, + ], +) +def test_grants_target_layer( + target_layer: GrantsTargetLayer, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + evaluator = SnapshotEvaluator(adapter_mock) + + grants = {"select": ["user1"]} + model = create_sql_model( + "test_schema.test_model", + parse_one("SELECT 1 as id"), + kind="FULL", + grants=grants, + grants_target_layer=target_layer, + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator.create([snapshot], {}) + if target_layer == GrantsTargetLayer.VIRTUAL: + assert sync_grants_mock.call_count == 0 + else: + assert sync_grants_mock.call_count == 1 + assert sync_grants_mock.call_args[0][1] == grants + sync_grants_mock.reset_mock() + evaluator.promote([snapshot], EnvironmentNamingInfo(name="prod")) + if target_layer == GrantsTargetLayer.VIRTUAL: + assert sync_grants_mock.call_count == 1 + elif target_layer == GrantsTargetLayer.PHYSICAL: + # Physical layer: no grants applied during promotion (already applied during create) + assert sync_grants_mock.call_count == 0 + else: # target_layer == GrantsTargetLayer.ALL + # All layers: only virtual grants applied during promotion (physical already done in create) + assert sync_grants_mock.call_count == 1 + + +def test_grants_update( + adapter_mock: Mock, mocker: MockerFixture, make_snapshot: t.Callable[..., Snapshot] +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + + model = create_sql_model( + "test_schema.test_model", + parse_one("SELECT 1 as id"), + kind="FULL", + grants={"select": ["user1"]}, + grants_target_layer=GrantsTargetLayer.ALL, + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator.create([snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user1"]} + + # Update model query AND change grants + updated_model_dict = model.dict() + updated_model_dict["query"] = parse_one("SELECT 1 as id, 2 as value") + updated_model_dict["grants"] = {"select": ["user2", "user3"], "insert": ["admin"]} + updated_model = SqlModel.parse_obj(updated_model_dict) + + new_snapshot = make_snapshot(updated_model) + new_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + sync_grants_mock.reset_mock() + evaluator.create([new_snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user2", "user3"], "insert": ["admin"]} + + # Update model query AND remove grants + updated_model_dict = model.dict() + updated_model_dict["query"] = parse_one("SELECT 1 as id, 'updated' as status") + updated_model_dict["grants"] = {} + updated_model = SqlModel.parse_obj(updated_model_dict) + + new_snapshot = make_snapshot(updated_model) + new_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + sync_grants_mock.reset_mock() + evaluator.create([new_snapshot], {}) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {} + + +def test_grants_create_and_evaluate( + adapter_mock: Mock, mocker: MockerFixture, make_snapshot: t.Callable[..., Snapshot] +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + + model = load_sql_based_model( + parse( # type: ignore + """ + MODEL ( + name test_schema.test_model, + kind INCREMENTAL_BY_TIME_RANGE (time_column ds), + grants ( + 'select' = ['reader1', 'reader2'], + 'insert' = ['writer'] + ), + grants_target_layer 'all' + ); + SELECT ds::DATE, value::INT FROM source WHERE ds BETWEEN @start_ds AND @end_ds; + """ + ) + ) + + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator.create([snapshot], {}) + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == { + "select": ["reader1", "reader2"], + "insert": ["writer"], + } + + sync_grants_mock.reset_mock() + evaluator.evaluate( + snapshot, start="2020-01-01", end="2020-01-02", execution_time="2020-01-02", snapshots={} + ) + # Evaluate should not reapply grants + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "strategy_class", + [ + EngineManagedStrategy, + FullRefreshStrategy, + IncrementalByTimeRangeStrategy, + IncrementalByPartitionStrategy, + IncrementalUnmanagedStrategy, + IncrementalByUniqueKeyStrategy, + SCDType2Strategy, + # SeedStrategy excluded because seeds do not support migrations + ], +) +def test_grants_materializable_strategy_migrate( + strategy_class: t.Type[MaterializableStrategy], + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + adapter_mock.get_alter_operations.return_value = [] + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + strategy = strategy_class(adapter_mock) + grants = {"select": ["user1"]} + model = _create_grants_test_model(grants=grants, grants_target_layer=GrantsTargetLayer.ALL) + snapshot = make_snapshot(model) + + strategy.migrate( + "target_table", + "source_table", + snapshot, + ignore_destructive=False, + ignore_additive=False, + allow_destructive_snapshots=set(), + allow_additive_snapshots=set(), + ) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + + +def test_grants_clone_snapshot_in_dev( + adapter_mock: Mock, mocker: MockerFixture, make_snapshot: t.Callable[..., Snapshot] +): + adapter_mock.SUPPORTS_CLONING = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + grants = {"select": ["user1", "user2"]} + model = _create_grants_test_model(grants=grants, grants_target_layer=GrantsTargetLayer.ALL) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator._clone_snapshot_in_dev( + snapshot, {}, DeployabilityIndex.all_deployable(), {}, {}, set(), set() + ) + + sync_grants_mock.assert_called_once() + assert ( + sync_grants_mock.call_args[0][0].sql() + == f"sqlmesh__default.test_model__{snapshot.version}__dev" + ) + assert sync_grants_mock.call_args[0][1] == grants + + +@pytest.mark.parametrize( + "model_kind_name", + [ + "INCREMENTAL_BY_TIME_RANGE", + "SEED", + ], +) +def test_grants_evaluator_insert_without_replace_query_for_model( + model_kind_name: str, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + adapter_mock.table_exists.return_value = False # Table doesn't exist + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + evaluator = SnapshotEvaluator(adapter_mock) + + grants = {"select": ["reader1", "reader2"]} + model = _create_grants_test_model( + grants=grants, kind=model_kind_name, grants_target_layer=GrantsTargetLayer.ALL + ) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + evaluator.evaluate( + snapshot, + start="2023-01-01", + end="2023-01-01", + execution_time="2023-01-01", + snapshots={}, + ) + + # Grants are applied during the table creation phase, not during insert + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + + sync_grants_mock.reset_mock() + adapter_mock.table_exists.return_value = True + snapshot.add_interval("2023-01-01", "2023-01-01") + evaluator.evaluate( + snapshot, + start="2023-01-02", # Different date from existing interval + end="2023-01-02", + execution_time="2023-01-02", + snapshots={}, + ) + + # Should not apply grants since it's not the first insert + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "model_kind_name", + [ + "INCREMENTAL_BY_PARTITION", + "INCREMENTAL_BY_UNIQUE_KEY", + "INCREMENTAL_UNMANAGED", + "FULL", + "SCD_TYPE_2", + ], +) +def test_grants_evaluator_insert_with_replace_query_for_model( + model_kind_name: str, + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + adapter_mock.table_exists.return_value = False # Table doesn't exist + adapter_mock.columns.return_value = { + "id": exp.DataType.build("int"), + "ds": exp.DataType.build("date"), + } + + evaluator = SnapshotEvaluator(adapter_mock) + + grants = {"select": ["user1"]} + model = _create_grants_test_model( + grants=grants, kind=model_kind_name, grants_target_layer=GrantsTargetLayer.ALL + ) + snapshot = make_snapshot(model) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + # Now evaluate the snapshot (this should apply grants during first insert) + evaluator.evaluate( + snapshot, + start="2023-01-01", + end="2023-01-01", + execution_time="2023-01-01", + snapshots={}, + ) + + # Should be called twice more during evaluate: once creating table, + # once during first insert with _replace_query_for_model() + assert sync_grants_mock.call_count == 2 + assert sync_grants_mock.call_args[0][1] == grants + + sync_grants_mock.reset_mock() + adapter_mock.table_exists.return_value = True + snapshot.add_interval("2023-01-01", "2023-01-01") + evaluator.evaluate( + snapshot, + start="2023-01-02", # Different date from existing interval + end="2023-01-02", + execution_time="2023-01-02", + snapshots={}, + ) + + if model_kind_name in ("FULL", "SCD_TYPE_2"): + # Full refresh and SCD_TYPE_2 always recreate the table, so grants are always applied + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == grants + else: + # Should not apply grants since it's not the first insert + sync_grants_mock.assert_not_called() + + +@pytest.mark.parametrize( + "model_grants_target_layer", + [ + GrantsTargetLayer.ALL, + GrantsTargetLayer.VIRTUAL, + GrantsTargetLayer.PHYSICAL, + ], +) +def test_grants_in_production_with_dev_only_vde( + adapter_mock: Mock, + mocker: MockerFixture, + make_snapshot: t.Callable[..., Snapshot], + model_grants_target_layer: GrantsTargetLayer, +): + adapter_mock.SUPPORTS_GRANTS = True + sync_grants_mock = mocker.patch.object(adapter_mock, "sync_grants_config") + + from sqlmesh.core.model.meta import VirtualEnvironmentMode, GrantsTargetLayer + from sqlmesh.core.snapshot.definition import DeployabilityIndex + + model_virtual_grants = _create_grants_test_model( + grants={"select": ["user1"], "insert": ["role1"]}, + grants_target_layer=model_grants_target_layer, + virtual_environment_mode=VirtualEnvironmentMode.DEV_ONLY, + ) + + snapshot = make_snapshot(model_virtual_grants) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + evaluator = SnapshotEvaluator(adapter_mock) + # create will apply grants to physical layer tables + deployability_index = DeployabilityIndex.all_deployable() + evaluator.create([snapshot], {}, deployability_index=deployability_index) + + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user1"], "insert": ["role1"]} + + # Non-deployable (dev) env + sync_grants_mock.reset_mock() + deployability_index = DeployabilityIndex.none_deployable() + evaluator.create([snapshot], {}, deployability_index=deployability_index) + if model_grants_target_layer == GrantsTargetLayer.VIRTUAL: + sync_grants_mock.assert_not_called() + else: + # Should still apply grants to physical table when target layer is ALL or PHYSICAL + sync_grants_mock.assert_called_once() + assert sync_grants_mock.call_args[0][1] == {"select": ["user1"], "insert": ["role1"]} diff --git a/tests/core/test_table_diff.py b/tests/core/test_table_diff.py index 839cbb415e..c2e293e4c2 100644 --- a/tests/core/test_table_diff.py +++ b/tests/core/test_table_diff.py @@ -144,7 +144,7 @@ def test_data_diff(sushi_context_fixed_date, capsys, caplog): assert row_diff.t_sample.shape == (1, 6) -def test_data_diff_decimals(sushi_context_fixed_date): +def test_data_diff_decimals_on_float(sushi_context_fixed_date): engine_adapter = sushi_context_fixed_date.engine_adapter engine_adapter.ctas( @@ -230,6 +230,49 @@ def test_data_diff_decimals(sushi_context_fixed_date): assert stripped_output == stripped_expected +def test_data_diff_decimals_on_numeric(): + engine_adapter = DuckDBConnectionConfig().create_engine_adapter() + + columns_to_types = { + "key": exp.DataType.build("int"), + "value": exp.DataType.build("decimal(10,5)"), + } + + engine_adapter.create_table("src", columns_to_types) + engine_adapter.create_table("target", columns_to_types) + + src_records = [ + (1, "25.12344"), + (2, "25.1234"), + (3, "25.124"), + (4, "25.14"), + (5, "25.4"), + ] + + target_records = [ + (1, "25.12343"), + (2, "25.1233"), + (3, "25.123"), + (4, "25.13"), + (5, "25.3"), + ] + + src_df = pd.DataFrame(data=src_records, columns=columns_to_types.keys()) + target_df = pd.DataFrame(data=target_records, columns=columns_to_types.keys()) + + engine_adapter.insert_append("src", src_df) + engine_adapter.insert_append("target", target_df) + + for decimals in range(5, 0, -1): + table_diff = TableDiff( + adapter=engine_adapter, source="src", target="target", on=["key"], decimals=decimals + ) + diff = table_diff.row_diff() + + assert diff.full_match_count == 5 - decimals + assert diff.partial_match_count + diff.full_match_count == 5 + + def test_grain_check(sushi_context_fixed_date): expressions = d.parse( """ @@ -344,7 +387,7 @@ def test_generated_sql(sushi_context_fixed_date: Context, mocker: MockerFixture) ), ) - query_sql = 'CREATE TABLE IF NOT EXISTS "memory"."sqlmesh_temp_test"."__temp_diff_abcdefgh" AS WITH "__source" AS (SELECT "s"."key", "s"."value", "s"."key" AS "__sqlmesh_join_key" FROM "table_diff_source" AS "s"), "__target" AS (SELECT "t"."key", "t"."value", "t"."key" AS "__sqlmesh_join_key" FROM "table_diff_target" AS "t"), "__stats" AS (SELECT "s"."key" AS "s__key", "s"."value" AS "s__value", "s"."__sqlmesh_join_key" AS "s____sqlmesh_join_key", "t"."key" AS "t__key", "t"."value" AS "t__value", "t"."__sqlmesh_join_key" AS "t____sqlmesh_join_key", CASE WHEN NOT "s"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "s_exists", CASE WHEN NOT "t"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "t_exists", CASE WHEN "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key" THEN 1 ELSE 0 END AS "row_joined", CASE WHEN "s"."key" IS NULL AND "t"."key" IS NULL THEN 1 ELSE 0 END AS "null_grain", CASE WHEN "s"."key" = "t"."key" THEN 1 WHEN ("s"."key" IS NULL) AND ("t"."key" IS NULL) THEN 1 WHEN ("s"."key" IS NULL) OR ("t"."key" IS NULL) THEN 0 ELSE 0 END AS "key_matches", CASE WHEN ROUND("s"."value", 3) = ROUND("t"."value", 3) THEN 1 WHEN ("s"."value" IS NULL) AND ("t"."value" IS NULL) THEN 1 WHEN ("s"."value" IS NULL) OR ("t"."value" IS NULL) THEN 0 ELSE 0 END AS "value_matches" FROM "__source" AS "s" FULL JOIN "__target" AS "t" ON "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key") SELECT *, CASE WHEN "key_matches" = 1 AND "value_matches" = 1 THEN 1 ELSE 0 END AS "row_full_match" FROM "__stats"' + query_sql = 'CREATE TABLE IF NOT EXISTS "memory"."sqlmesh_temp_test"."__temp_diff_abcdefgh" AS WITH "__source" AS (SELECT "s"."key", "s"."value", "s"."key" AS "__sqlmesh_join_key" FROM "table_diff_source" AS "s"), "__target" AS (SELECT "t"."key", "t"."value", "t"."key" AS "__sqlmesh_join_key" FROM "table_diff_target" AS "t"), "__stats" AS (SELECT "s"."key" AS "s__key", "s"."value" AS "s__value", "s"."__sqlmesh_join_key" AS "s____sqlmesh_join_key", "t"."key" AS "t__key", "t"."value" AS "t__value", "t"."__sqlmesh_join_key" AS "t____sqlmesh_join_key", CASE WHEN NOT "s"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "s_exists", CASE WHEN NOT "t"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "t_exists", CASE WHEN "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key" THEN 1 ELSE 0 END AS "row_joined", CASE WHEN "s"."key" IS NULL AND "t"."key" IS NULL THEN 1 ELSE 0 END AS "null_grain", CASE WHEN "s"."key" = "t"."key" THEN 1 WHEN ("s"."key" IS NULL) AND ("t"."key" IS NULL) THEN 1 WHEN ("s"."key" IS NULL) OR ("t"."key" IS NULL) THEN 0 ELSE 0 END AS "key_matches", CASE WHEN CAST(CAST("s"."value" AS DOUBLE) AS DECIMAL(38, 3)) = CAST(CAST("t"."value" AS DOUBLE) AS DECIMAL(38, 3)) THEN 1 WHEN ("s"."value" IS NULL) AND ("t"."value" IS NULL) THEN 1 WHEN ("s"."value" IS NULL) OR ("t"."value" IS NULL) THEN 0 ELSE 0 END AS "value_matches" FROM "__source" AS "s" FULL JOIN "__target" AS "t" ON "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key") SELECT *, CASE WHEN "key_matches" = 1 AND "value_matches" = 1 THEN 1 ELSE 0 END AS "row_full_match" FROM "__stats"' summary_query_sql = 'SELECT SUM("s_exists") AS "s_count", SUM("t_exists") AS "t_count", SUM("row_joined") AS "join_count", SUM("null_grain") AS "null_grain_count", SUM("row_full_match") AS "full_match_count", SUM("key_matches") AS "key_matches", SUM("value_matches") AS "value_matches", COUNT(DISTINCT ("s____sqlmesh_join_key")) AS "distinct_count_s", COUNT(DISTINCT ("t____sqlmesh_join_key")) AS "distinct_count_t" FROM "memory"."sqlmesh_temp_test"."__temp_diff_abcdefgh"' compare_sql = 'SELECT ROUND(100 * (CAST(SUM("key_matches") AS DECIMAL) / COUNT("key_matches")), 9) AS "key_matches", ROUND(100 * (CAST(SUM("value_matches") AS DECIMAL) / COUNT("value_matches")), 9) AS "value_matches" FROM "memory"."sqlmesh_temp_test"."__temp_diff_abcdefgh" WHERE "row_joined" = 1' sample_query_sql = 'WITH "source_only" AS (SELECT \'source_only\' AS "__sqlmesh_sample_type", "s__key", "s__value", "s____sqlmesh_join_key", "t__key", "t__value", "t____sqlmesh_join_key" FROM "memory"."sqlmesh_temp_test"."__temp_diff_abcdefgh" WHERE "s_exists" = 1 AND "row_joined" = 0 ORDER BY "s__key" NULLS FIRST LIMIT 20), "target_only" AS (SELECT \'target_only\' AS "__sqlmesh_sample_type", "s__key", "s__value", "s____sqlmesh_join_key", "t__key", "t__value", "t____sqlmesh_join_key" FROM "memory"."sqlmesh_temp_test"."__temp_diff_abcdefgh" WHERE "t_exists" = 1 AND "row_joined" = 0 ORDER BY "t__key" NULLS FIRST LIMIT 20), "common_rows" AS (SELECT \'common_rows\' AS "__sqlmesh_sample_type", "s__key", "s__value", "s____sqlmesh_join_key", "t__key", "t__value", "t____sqlmesh_join_key" FROM "memory"."sqlmesh_temp_test"."__temp_diff_abcdefgh" WHERE "row_joined" = 1 AND "row_full_match" = 0 ORDER BY "s__key" NULLS FIRST, "t__key" NULLS FIRST LIMIT 20) SELECT "__sqlmesh_sample_type", "s__key", "s__value", "s____sqlmesh_join_key", "t__key", "t__value", "t____sqlmesh_join_key" FROM "source_only" UNION ALL SELECT "__sqlmesh_sample_type", "s__key", "s__value", "s____sqlmesh_join_key", "t__key", "t__value", "t____sqlmesh_join_key" FROM "target_only" UNION ALL SELECT "__sqlmesh_sample_type", "s__key", "s__value", "s____sqlmesh_join_key", "t__key", "t__value", "t____sqlmesh_join_key" FROM "common_rows"' @@ -384,7 +427,7 @@ def test_generated_sql(sushi_context_fixed_date: Context, mocker: MockerFixture) where="key = 2", ) - query_sql_where = 'CREATE TABLE IF NOT EXISTS "memory"."sqlmesh_temp"."__temp_diff_abcdefgh" AS WITH "__source" AS (SELECT "s"."key", "s"."value", "s"."key" AS "__sqlmesh_join_key" FROM "table_diff_source" AS "s" WHERE "s"."key" = 2), "__target" AS (SELECT "t"."key", "t"."value", "t"."key" AS "__sqlmesh_join_key" FROM "table_diff_target" AS "t" WHERE "t"."key" = 2), "__stats" AS (SELECT "s"."key" AS "s__key", "s"."value" AS "s__value", "s"."__sqlmesh_join_key" AS "s____sqlmesh_join_key", "t"."key" AS "t__key", "t"."value" AS "t__value", "t"."__sqlmesh_join_key" AS "t____sqlmesh_join_key", CASE WHEN NOT "s"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "s_exists", CASE WHEN NOT "t"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "t_exists", CASE WHEN "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key" THEN 1 ELSE 0 END AS "row_joined", CASE WHEN "s"."key" IS NULL AND "t"."key" IS NULL THEN 1 ELSE 0 END AS "null_grain", CASE WHEN "s"."key" = "t"."key" THEN 1 WHEN ("s"."key" IS NULL) AND ("t"."key" IS NULL) THEN 1 WHEN ("s"."key" IS NULL) OR ("t"."key" IS NULL) THEN 0 ELSE 0 END AS "key_matches", CASE WHEN ROUND("s"."value", 3) = ROUND("t"."value", 3) THEN 1 WHEN ("s"."value" IS NULL) AND ("t"."value" IS NULL) THEN 1 WHEN ("s"."value" IS NULL) OR ("t"."value" IS NULL) THEN 0 ELSE 0 END AS "value_matches" FROM "__source" AS "s" FULL JOIN "__target" AS "t" ON "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key") SELECT *, CASE WHEN "key_matches" = 1 AND "value_matches" = 1 THEN 1 ELSE 0 END AS "row_full_match" FROM "__stats"' + query_sql_where = 'CREATE TABLE IF NOT EXISTS "memory"."sqlmesh_temp"."__temp_diff_abcdefgh" AS WITH "__source" AS (SELECT "s"."key", "s"."value", "s"."key" AS "__sqlmesh_join_key" FROM "table_diff_source" AS "s" WHERE "s"."key" = 2), "__target" AS (SELECT "t"."key", "t"."value", "t"."key" AS "__sqlmesh_join_key" FROM "table_diff_target" AS "t" WHERE "t"."key" = 2), "__stats" AS (SELECT "s"."key" AS "s__key", "s"."value" AS "s__value", "s"."__sqlmesh_join_key" AS "s____sqlmesh_join_key", "t"."key" AS "t__key", "t"."value" AS "t__value", "t"."__sqlmesh_join_key" AS "t____sqlmesh_join_key", CASE WHEN NOT "s"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "s_exists", CASE WHEN NOT "t"."__sqlmesh_join_key" IS NULL THEN 1 ELSE 0 END AS "t_exists", CASE WHEN "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key" THEN 1 ELSE 0 END AS "row_joined", CASE WHEN "s"."key" IS NULL AND "t"."key" IS NULL THEN 1 ELSE 0 END AS "null_grain", CASE WHEN "s"."key" = "t"."key" THEN 1 WHEN ("s"."key" IS NULL) AND ("t"."key" IS NULL) THEN 1 WHEN ("s"."key" IS NULL) OR ("t"."key" IS NULL) THEN 0 ELSE 0 END AS "key_matches", CASE WHEN CAST(CAST("s"."value" AS DOUBLE) AS DECIMAL(38, 3)) = CAST(CAST("t"."value" AS DOUBLE) AS DECIMAL(38, 3)) THEN 1 WHEN ("s"."value" IS NULL) AND ("t"."value" IS NULL) THEN 1 WHEN ("s"."value" IS NULL) OR ("t"."value" IS NULL) THEN 0 ELSE 0 END AS "value_matches" FROM "__source" AS "s" FULL JOIN "__target" AS "t" ON "s"."__sqlmesh_join_key" = "t"."__sqlmesh_join_key") SELECT *, CASE WHEN "key_matches" = 1 AND "value_matches" = 1 THEN 1 ELSE 0 END AS "row_full_match" FROM "__stats"' spy_execute.assert_any_call(query_sql_where, False) diff --git a/tests/core/test_test.py b/tests/core/test_test.py index d889c7bb33..d679f09393 100644 --- a/tests/core/test_test.py +++ b/tests/core/test_test.py @@ -770,6 +770,42 @@ def test_partial_data_column_order(sushi_context: Context) -> None: ).run() ) + # - output df must differ if sorted by (id, event_date) vs. (event_date, id) + # - output partial must be true + _check_successful_or_raise( + _create_test( + body=load_yaml( + """ +test_foo: + model: sushi.foo + inputs: + sushi.items: + - id: 9876 + event_date: 2020-01-01 + - id: 1234 + name: hello + event_date: 2020-01-02 + outputs: + partial: true + query: + - event_date: 2020-01-01 + id: 9876 + - event_date: 2020-01-02 + id: 1234 + name: hello + """ + ), + test_name="test_foo", + model=sushi_context.upsert_model( + _create_model( + "SELECT id, name, price, event_date FROM sushi.items", + default_catalog=sushi_context.default_catalog, + ) + ), + context=sushi_context, + ).run() + ) + def test_partial_data_missing_schemas(sushi_context: Context) -> None: _check_successful_or_raise( @@ -1149,6 +1185,27 @@ def test_unknown_column_error() -> None: ) +def test_invalid_outputs_error() -> None: + with pytest.raises(TestError, match="Incomplete test, outputs must contain 'query' or 'ctes'"): + _create_test( + body=load_yaml( + """ +test_foo: + model: sushi.foo + inputs: + raw: + - id: 1 + outputs: + rows: + - id: 1 + """ + ), + test_name="test_foo", + model=_create_model("SELECT id FROM raw"), + context=Context(config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb"))), + ) + + def test_empty_rows(sushi_context: Context) -> None: _check_successful_or_raise( _create_test( @@ -1482,6 +1539,9 @@ def test_gateway(copy_to_temp_path: t.Callable, mocker: MockerFixture) -> None: with open(test_path, "w", encoding="utf-8") as file: dump_yaml(test_dict, file) + # Re-initialize context to pick up the modified test file + context = Context(paths=path, config=config) + spy_execute = mocker.spy(EngineAdapter, "_execute") mocker.patch("sqlmesh.core.test.definition.random_id", return_value="jzngz56a") @@ -1658,10 +1718,12 @@ def test_generate_input_data_using_sql(mocker: MockerFixture, tmp_path: Path) -> ) +@pytest.mark.pyspark def test_pyspark_python_model(tmp_path: Path) -> None: spark_connection_config = SparkConnectionConfig( config={ "spark.master": "local", + "spark.driver.memory": "512m", "spark.sql.warehouse.dir": f"{tmp_path}/data_dir", "spark.driver.extraJavaOptions": f"-Dderby.system.home={tmp_path}/derby_dir", }, @@ -2391,6 +2453,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: copy_test_file(original_test_file, tmp_path / "tests" / f"test_success_{i}.yaml", i) copy_test_file(new_test_file, tmp_path / "tests" / f"test_failure_{i}.yaml", i) + # Re-initialize context to pick up the new test files + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() @@ -2406,13 +2471,12 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: "SELECT 1 AS col_1, 2 AS col_2, 3 AS col_3, 4 AS col_4, 5 AS col_5, 6 AS col_6, 7 AS col_7" ) - context.upsert_model( - _create_model( - meta="MODEL(name test.test_wide_model)", - query=wide_model_query, - default_catalog=context.default_catalog, - ) + wide_model = _create_model( + meta="MODEL(name test.test_wide_model)", + query=wide_model_query, + default_catalog=context.default_catalog, ) + context.upsert_model(wide_model) tests_dir = tmp_path / "tests" tests_dir.mkdir() @@ -2436,6 +2500,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: wide_test_file.write_text(wide_test_file_content) + context.load() + context.upsert_model(wide_model) + with capture_output() as captured_output: context.test() @@ -2492,6 +2559,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: """ ) + # Re-initialize context to pick up the modified test file + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() @@ -3310,6 +3380,56 @@ def execute(context: ExecutionContext, **kwargs: t.Any) -> pd.DataFrame: _check_successful_or_raise(test_default_vars.run()) +def test_python_model_sorting(tmp_path: Path) -> None: + py_model = tmp_path / "models" / "test_sort_model.py" + py_model.parent.mkdir(parents=True, exist_ok=True) + py_model.write_text( + """ +import pandas as pd # noqa: TID253 +from sqlmesh import model, ExecutionContext +import typing as t + +@model( + name="test_sort_model", + columns={"id": "int", "value": "varchar"}, +) +def execute(context: ExecutionContext, **kwargs: t.Any) -> pd.DataFrame: + # Return rows in a potentially non-deterministic order + # (simulating a model that doesn't guarantee order) + return pd.DataFrame([ + {"id": 3, "value": "c"}, + {"id": 1, "value": "a"}, + {"id": 2, "value": "b"}, + ])""" + ) + + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) + context = Context(config=config, paths=tmp_path) + + python_model = context.models['"test_sort_model"'] + + _check_successful_or_raise( + _create_test( + body=load_yaml(""" + test_without_sort: + model: test_sort_model + outputs: + query: + rows: + - id: 1 + value: "a" + - id: 2 + value: "b" + - id: 3 + value: "c" + """), + test_name="test_without_sort", + model=python_model, + context=context, + ).run() + ) + + @use_terminal_console def test_cte_failure(tmp_path: Path) -> None: models_dir = tmp_path / "models" @@ -3365,6 +3485,9 @@ def test_cte_failure(tmp_path: Path) -> None: """ ) + # Re-initialize context to pick up the new test file + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() @@ -3391,6 +3514,9 @@ def test_cte_failure(tmp_path: Path) -> None: """ ) + # Re-initialize context to pick up the modified test file + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() diff --git a/tests/dbt/cli/conftest.py b/tests/dbt/cli/conftest.py index dfad2f0046..26757bf3ab 100644 --- a/tests/dbt/cli/conftest.py +++ b/tests/dbt/cli/conftest.py @@ -1,27 +1,9 @@ import typing as t -from pathlib import Path -import os import functools from click.testing import CliRunner, Result import pytest -@pytest.fixture -def jaffle_shop_duckdb(copy_to_temp_path: t.Callable[..., t.List[Path]]) -> t.Iterable[Path]: - fixture_path = Path(__file__).parent / "fixtures" / "jaffle_shop_duckdb" - assert fixture_path.exists() - - current_path = os.getcwd() - output_path = copy_to_temp_path(paths=fixture_path)[0] - - # so that we can invoke commands from the perspective of a user that is alrady in the correct directory - os.chdir(output_path) - - yield output_path - - os.chdir(current_path) - - @pytest.fixture def invoke_cli() -> t.Callable[..., Result]: from sqlmesh_dbt.cli import dbt diff --git a/tests/dbt/cli/test_global_flags.py b/tests/dbt/cli/test_global_flags.py index 66dee7236c..7e2262bd80 100644 --- a/tests/dbt/cli/test_global_flags.py +++ b/tests/dbt/cli/test_global_flags.py @@ -1,10 +1,12 @@ import typing as t from pathlib import Path import pytest +import logging from pytest_mock import MockerFixture from click.testing import Result from sqlmesh.utils.errors import SQLMeshError from sqlglot.errors import SqlglotError +from tests.dbt.conftest import EmptyProjectCreator pytestmark = pytest.mark.slow @@ -93,3 +95,93 @@ def test_run_error_handler( assert result.exit_code == 1 assert "Error: Error with selector" in result.output assert "Traceback" not in result.output + + +def test_log_level(invoke_cli: t.Callable[..., Result], create_empty_project: EmptyProjectCreator): + create_empty_project() + + result = invoke_cli(["--log-level", "info", "list"]) + assert result.exit_code == 0 + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.INFO + + result = invoke_cli(["--log-level", "debug", "list"]) + assert result.exit_code == 0 + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.DEBUG + + +def test_profiles_dir( + invoke_cli: t.Callable[..., Result], create_empty_project: EmptyProjectCreator, tmp_path: Path +): + project_dir, _ = create_empty_project(project_name="test_profiles_dir") + + orig_profiles_yml = project_dir / "profiles.yml" + assert orig_profiles_yml.exists() + + new_profiles_yml = tmp_path / "some_other_place" / "profiles.yml" + new_profiles_yml.parent.mkdir(parents=True) + + orig_profiles_yml.rename(new_profiles_yml) + assert not orig_profiles_yml.exists() + assert new_profiles_yml.exists() + + # should fail if we don't specify --profiles-dir + result = invoke_cli(["list"]) + assert result.exit_code > 0, result.output + + # alternative ~/.dbt/profiles.yml might exist but doesn't contain the profile + assert "profiles.yml not found" in result.output or "not found in profiles" in result.output + + # should pass if we specify --profiles-dir + result = invoke_cli(["--profiles-dir", str(new_profiles_yml.parent), "list"]) + assert result.exit_code == 0, result.output + assert "Models in project" in result.output + + +def test_project_dir( + invoke_cli: t.Callable[..., Result], create_empty_project: EmptyProjectCreator +): + orig_project_dir, _ = create_empty_project(project_name="test_project_dir") + + orig_project_yml = orig_project_dir / "dbt_project.yml" + assert orig_project_yml.exists() + + new_project_yml = orig_project_dir / "nested" / "dbt_project.yml" + new_project_yml.parent.mkdir(parents=True) + + orig_project_yml.rename(new_project_yml) + assert not orig_project_yml.exists() + assert new_project_yml.exists() + + # should fail if we don't specify --project-dir + result = invoke_cli(["list"]) + assert result.exit_code != 0, result.output + assert "Error:" in result.output + + # should fail if the profiles.yml also doesnt exist at that --project-dir + result = invoke_cli(["--project-dir", str(new_project_yml.parent), "list"]) + assert result.exit_code != 0, result.output + + # profiles.yml might exist but doesn't contain the profile + assert "profiles.yml not found" in result.output or "not found in profiles" in result.output + + # should pass if it can find both files, either because we specified --profiles-dir explicitly or the profiles.yml was found in --project-dir + result = invoke_cli( + [ + "--project-dir", + str(new_project_yml.parent), + "--profiles-dir", + str(orig_project_dir), + "list", + ] + ) + assert result.exit_code == 0, result.output + assert "Models in project" in result.output + + orig_profiles_yml = orig_project_dir / "profiles.yml" + new_profiles_yml = new_project_yml.parent / "profiles.yml" + assert orig_profiles_yml.exists() + orig_profiles_yml.rename(new_profiles_yml) + + result = invoke_cli(["--project-dir", str(new_project_yml.parent), "list"]) + assert result.exit_code == 0, result.output + assert "Models in project" in result.output diff --git a/tests/dbt/cli/test_list.py b/tests/dbt/cli/test_list.py index 1bc22ce87e..3e6a55125c 100644 --- a/tests/dbt/cli/test_list.py +++ b/tests/dbt/cli/test_list.py @@ -12,65 +12,93 @@ def test_list(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): assert result.exit_code == 0 assert not result.exception - assert "main.orders" in result.output - assert "main.customers" in result.output - assert "main.stg_payments" in result.output - assert "main.raw_orders" in result.output + assert "─ jaffle_shop.orders" in result.output + assert "─ jaffle_shop.customers" in result.output + assert "─ jaffle_shop.staging.stg_payments" in result.output + assert "─ jaffle_shop.raw_orders" in result.output def test_list_select(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): - result = invoke_cli(["list", "--select", "main.raw_customers+"]) + result = invoke_cli(["list", "--select", "raw_customers+"]) assert result.exit_code == 0 assert not result.exception - assert "main.customers" in result.output - assert "main.stg_customers" in result.output - assert "main.raw_customers" in result.output + assert "─ jaffle_shop.customers" in result.output + assert "─ jaffle_shop.staging.stg_customers" in result.output + assert "─ jaffle_shop.raw_customers" in result.output - assert "main.stg_payments" not in result.output - assert "main.raw_orders" not in result.output + assert "─ jaffle_shop.staging.stg_payments" not in result.output + assert "─ jaffle_shop.raw_orders" not in result.output def test_list_select_exclude(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): # single exclude - result = invoke_cli(["list", "--select", "main.raw_customers+", "--exclude", "main.orders"]) + result = invoke_cli(["list", "--select", "raw_customers+", "--exclude", "orders"]) assert result.exit_code == 0 assert not result.exception - assert "main.customers" in result.output - assert "main.stg_customers" in result.output - assert "main.raw_customers" in result.output + assert "─ jaffle_shop.customers" in result.output + assert "─ jaffle_shop.staging.stg_customers" in result.output + assert "─ jaffle_shop.raw_customers" in result.output - assert "main.orders" not in result.output - assert "main.stg_payments" not in result.output - assert "main.raw_orders" not in result.output + assert "─ jaffle_shop.orders" not in result.output + assert "─ jaffle_shop.staging.stg_payments" not in result.output + assert "─ jaffle_shop.raw_orders" not in result.output # multiple exclude for args in ( - ["--select", "main.stg_orders+", "--exclude", "main.customers", "--exclude", "main.orders"], - ["--select", "main.stg_orders+", "--exclude", "main.customers main.orders"], + ["--select", "stg_orders+", "--exclude", "customers", "--exclude", "orders"], + ["--select", "stg_orders+", "--exclude", "customers orders"], ): result = invoke_cli(["list", *args]) assert result.exit_code == 0 assert not result.exception - assert "main.stg_orders" in result.output + assert "─ jaffle_shop.staging.stg_orders" in result.output - assert "main.customers" not in result.output - assert "main.orders" not in result.output + assert "─ jaffle_shop.customers" not in result.output + assert "─ jaffle_shop.orders" not in result.output def test_list_with_vars(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): - (jaffle_shop_duckdb / "models" / "aliased_model.sql").write_text(""" - {{ config(alias='model_' + var('foo')) }} - select 1 + ( + jaffle_shop_duckdb / "models" / "vars_model.sql" + ).write_text(""" + select * from {{ ref('custom' + var('foo')) }} """) - result = invoke_cli(["list", "--vars", "foo: bar"]) + result = invoke_cli(["list", "--vars", "foo: ers"]) assert result.exit_code == 0 assert not result.exception - assert "model_bar" in result.output + assert ( + """├── jaffle_shop.vars_model +│ └── depends_on: jaffle_shop.customers""" + in result.output + ) + + +def test_list_models_mutually_exclusive( + jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result] +): + result = invoke_cli(["list", "--select", "foo", "--models", "bar"]) + assert result.exit_code != 0 + assert '"models" and "select" are mutually exclusive arguments' in result.output + + result = invoke_cli(["list", "--resource-type", "test", "--models", "bar"]) + assert result.exit_code != 0 + assert '"models" and "resource_type" are mutually exclusive arguments' in result.output + + +def test_list_models(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): + result = invoke_cli(["list", "--models", "jaffle_shop"]) + assert result.exit_code == 0 + assert not result.exception + + assert "─ jaffle_shop.customers" in result.output + assert ( + "─ jaffle_shop.raw_customers" not in result.output + ) # should be excluded because dbt --models excludes seeds diff --git a/tests/dbt/cli/test_operations.py b/tests/dbt/cli/test_operations.py index f8ce239d3b..4aa508e21f 100644 --- a/tests/dbt/cli/test_operations.py +++ b/tests/dbt/cli/test_operations.py @@ -2,17 +2,19 @@ from pathlib import Path import pytest from sqlmesh_dbt.operations import create +from sqlmesh_dbt.console import DbtCliConsole from sqlmesh.utils import yaml from sqlmesh.utils.errors import SQLMeshError import time_machine -from sqlmesh.core.console import NoopConsole from sqlmesh.core.plan import PlanBuilder from sqlmesh.core.config.common import VirtualEnvironmentMode +from tests.dbt.conftest import EmptyProjectCreator +import logging pytestmark = pytest.mark.slow -class PlanCapturingConsole(NoopConsole): +class PlanCapturingConsole(DbtCliConsole): def plan( self, plan_builder: PlanBuilder, @@ -137,7 +139,7 @@ def test_run_option_mapping(jaffle_shop_duckdb: Path): assert plan.selected_models_to_backfill is None assert {s.name for s in plan.snapshots} == {k for k in operations.context.snapshots} - plan = operations.run(select=["main.stg_orders+"]) + plan = operations.run(select=["stg_orders+"]) assert plan.environment.name == "prod" assert console.no_prompts is True assert console.no_diff is True @@ -154,7 +156,7 @@ def test_run_option_mapping(jaffle_shop_duckdb: Path): plan.selected_models_to_backfill | {standalone_audit_name} ) - plan = operations.run(select=["main.stg_orders+"], exclude=["main.customers"]) + plan = operations.run(select=["stg_orders+"], exclude=["customers"]) assert plan.environment.name == "prod" assert console.no_prompts is True assert console.no_diff is True @@ -170,7 +172,7 @@ def test_run_option_mapping(jaffle_shop_duckdb: Path): plan.selected_models_to_backfill | {standalone_audit_name} ) - plan = operations.run(exclude=["main.customers"]) + plan = operations.run(exclude=["customers"]) assert plan.environment.name == "prod" assert console.no_prompts is True assert console.no_diff is True @@ -237,7 +239,7 @@ def test_run_option_mapping_dev(jaffle_shop_duckdb: Path): assert plan.skip_backfill is True assert plan.selected_models_to_backfill == {'"jaffle_shop"."main"."new_model"'} - plan = operations.run(environment="dev", select=["main.stg_orders+"]) + plan = operations.run(environment="dev", select=["stg_orders+"]) assert plan.environment.name == "dev" assert console.no_prompts is True assert console.no_diff is True @@ -257,3 +259,118 @@ def test_run_option_mapping_dev(jaffle_shop_duckdb: Path): '"jaffle_shop"."main"."orders"', '"jaffle_shop"."main"."stg_orders"', } + + +@pytest.mark.parametrize( + "env_name,vde_mode", + [ + ("prod", VirtualEnvironmentMode.DEV_ONLY), + ("prod", VirtualEnvironmentMode.FULL), + ("dev", VirtualEnvironmentMode.DEV_ONLY), + ("dev", VirtualEnvironmentMode.FULL), + ], +) +def test_run_option_full_refresh( + create_empty_project: EmptyProjectCreator, env_name: str, vde_mode: VirtualEnvironmentMode +): + # create config file prior to load + project_path, models_path = create_empty_project(project_name="test") + + config_path = project_path / "sqlmesh.yaml" + config = yaml.load(config_path) + config["virtual_environment_mode"] = vde_mode.value + + with config_path.open("w") as f: + yaml.dump(config, f) + + (models_path / "model_a.sql").write_text("select 1") + (models_path / "model_b.sql").write_text("select 2") + + operations = create(project_dir=project_path) + + assert operations.context.config.virtual_environment_mode == vde_mode + + console = PlanCapturingConsole() + operations.context.console = console + + plan = operations.run(environment=env_name, full_refresh=True) + + # both models added as backfills + restatements regardless of env / vde mode setting + assert plan.environment.name == env_name + assert len(plan.restatements) == 2 + assert list(plan.restatements)[0].name == '"test"."main"."model_a"' + assert list(plan.restatements)[1].name == '"test"."main"."model_b"' + + assert plan.requires_backfill + assert not plan.empty_backfill + assert not plan.skip_backfill + assert plan.models_to_backfill == set(['"test"."main"."model_a"', '"test"."main"."model_b"']) + + if vde_mode == VirtualEnvironmentMode.DEV_ONLY: + # We do not clear intervals across all model versions in the default DEV_ONLY mode, even when targeting prod, + # because dev data is hardcoded to preview only so by definition and can never be deployed + assert not plan.restate_all_snapshots + else: + if env_name == "prod": + # in FULL mode, we do it for prod + assert plan.restate_all_snapshots + else: + # but not dev + assert not plan.restate_all_snapshots + + +def test_run_option_full_refresh_with_selector(jaffle_shop_duckdb: Path): + operations = create(project_dir=jaffle_shop_duckdb) + assert len(operations.context.models) > 5 + + console = PlanCapturingConsole() + operations.context.console = console + + plan = operations.run(select=["stg_customers"], full_refresh=True) + assert len(plan.restatements) == 1 + assert list(plan.restatements)[0].name == '"jaffle_shop"."main"."stg_customers"' + + assert plan.requires_backfill + assert not plan.empty_backfill + assert not plan.skip_backfill + assert plan.models_to_backfill == set(['"jaffle_shop"."main"."stg_customers"']) + + +def test_create_sets_concurrent_tasks_based_on_threads(create_empty_project: EmptyProjectCreator): + project_dir, _ = create_empty_project(project_name="test") + + # add a postgres target because duckdb overrides to concurrent_tasks=1 regardless of what gets specified + profiles_yml_file = project_dir / "profiles.yml" + profiles_yml = yaml.load(profiles_yml_file) + profiles_yml["test"]["outputs"]["postgres"] = { + "type": "postgres", + "host": "localhost", + "port": 5432, + "user": "postgres", + "password": "postgres", + "dbname": "test", + "schema": "test", + } + profiles_yml_file.write_text(yaml.dump(profiles_yml)) + + operations = create(project_dir=project_dir, target="postgres") + + assert operations.context.concurrent_tasks == 1 # 1 is the default + + operations = create(project_dir=project_dir, threads=16, target="postgres") + + assert operations.context.concurrent_tasks == 16 + assert all( + g.connection and g.connection.concurrent_tasks == 16 + for g in operations.context.config.gateways.values() + ) + + +def test_create_configures_log_level(create_empty_project: EmptyProjectCreator): + project_dir, _ = create_empty_project() + + create(project_dir=project_dir, log_level="info") + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.INFO + + create(project_dir=project_dir, log_level="error") + assert logging.getLogger("sqlmesh").getEffectiveLevel() == logging.ERROR diff --git a/tests/dbt/cli/test_run.py b/tests/dbt/cli/test_run.py index 4d80514fc8..c640950a27 100644 --- a/tests/dbt/cli/test_run.py +++ b/tests/dbt/cli/test_run.py @@ -1,9 +1,12 @@ import typing as t import pytest from pathlib import Path +import shutil from click.testing import Result import time_machine +from sqlmesh_dbt.operations import create from tests.cli.test_cli import FREEZE_TIME +from tests.dbt.conftest import EmptyProjectCreator pytestmark = pytest.mark.slow @@ -25,7 +28,7 @@ def test_run_with_selectors(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[... assert result.exit_code == 0 assert "main.orders" in result.output - result = invoke_cli(["run", "--select", "main.raw_customers+", "--exclude", "main.orders"]) + result = invoke_cli(["run", "--select", "raw_customers+", "--exclude", "orders"]) assert result.exit_code == 0 assert not result.exception @@ -38,3 +41,58 @@ def test_run_with_selectors(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[... assert "main.orders" not in result.output assert "Model batches executed" in result.output + + +def test_run_with_changes_and_full_refresh( + create_empty_project: EmptyProjectCreator, invoke_cli: t.Callable[..., Result] +): + project_path, models_path = create_empty_project(project_name="test") + + engine_adapter = create(project_path).context.engine_adapter + engine_adapter.execute("create table external_table as select 'foo' as a, 'bar' as b") + + (models_path / "model_a.sql").write_text("select a, b from external_table") + (models_path / "model_b.sql").write_text("select a, b from {{ ref('model_a') }}") + + # populate initial env + result = invoke_cli(["run"]) + assert result.exit_code == 0 + assert not result.exception + + assert engine_adapter.fetchall("select a, b from model_b") == [("foo", "bar")] + + engine_adapter.execute("insert into external_table (a, b) values ('baz', 'bing')") + (project_path / "models" / "model_b.sql").write_text( + "select a, b, 'changed' as c from {{ ref('model_a') }}" + ) + + # Clear dbt's partial parse cache to ensure file changes are detected + # Without it dbt may use stale cached model definitions, causing flakiness + partial_parse_file = project_path / "target" / "sqlmesh_partial_parse.msgpack" + if partial_parse_file.exists(): + partial_parse_file.unlink() + + cache_dir = project_path / ".cache" + if cache_dir.exists(): + shutil.rmtree(cache_dir) + + # run with --full-refresh. this should: + # - fully refresh model_a (pick up the new records from external_table) + # - deploy the local change to model_b (introducing the 'changed' column) + result = invoke_cli(["run", "--full-refresh"]) + assert result.exit_code == 0 + assert not result.exception + + assert engine_adapter.fetchall("select a, b from model_a") == [("foo", "bar"), ("baz", "bing")] + assert engine_adapter.fetchall("select a, b, c from model_b") == [ + ("foo", "bar", "changed"), + ("baz", "bing", "changed"), + ] + + +def test_run_with_threads(jaffle_shop_duckdb: Path, invoke_cli: t.Callable[..., Result]): + result = invoke_cli(["run", "--threads", "4"]) + assert result.exit_code == 0 + assert not result.exception + + assert "Model batches executed" in result.output diff --git a/tests/dbt/cli/test_selectors.py b/tests/dbt/cli/test_selectors.py index 6041a50d0a..17f0195f58 100644 --- a/tests/dbt/cli/test_selectors.py +++ b/tests/dbt/cli/test_selectors.py @@ -1,6 +1,9 @@ import typing as t import pytest from sqlmesh_dbt import selectors +from sqlmesh.core.selector import DbtSelector +from sqlmesh.core.context import Context +from pathlib import Path @pytest.mark.parametrize( @@ -77,3 +80,252 @@ def test_split_unions_and_intersections( expression: str, expected: t.Tuple[t.List[str], t.List[str]] ): assert selectors._split_unions_and_intersections(expression) == expected + + +@pytest.mark.parametrize( + "dbt_select,expected", + [ + (["aging"], set()), + ( + ["staging"], + { + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + }, + ), + (["staging.stg_customers"], {'"jaffle_shop"."main"."stg_customers"'}), + (["stg_customers.staging"], set()), + ( + ["+customers"], + { + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + '"jaffle_shop"."main"."raw_customers"', + '"jaffle_shop"."main"."raw_orders"', + '"jaffle_shop"."main"."raw_payments"', + }, + ), + (["customers+"], {'"jaffle_shop"."main"."customers"'}), + ( + ["customers+", "stg_orders"], + {'"jaffle_shop"."main"."customers"', '"jaffle_shop"."main"."stg_orders"'}, + ), + (["*.staging.stg_c*"], {'"jaffle_shop"."main"."stg_customers"'}), + (["tag:agg"], {'"jaffle_shop"."main"."agg_orders"'}), + ( + ["staging.stg_customers", "tag:agg"], + { + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."agg_orders"', + }, + ), + ( + ["+tag:agg"], + { + '"jaffle_shop"."main"."agg_orders"', + '"jaffle_shop"."main"."orders"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + '"jaffle_shop"."main"."raw_orders"', + '"jaffle_shop"."main"."raw_payments"', + }, + ), + ( + ["tag:agg+"], + { + '"jaffle_shop"."main"."agg_orders"', + }, + ), + ( + ["tag:b*"], + set(), + ), + ( + ["tag:a*"], + { + '"jaffle_shop"."main"."agg_orders"', + }, + ), + ], +) +def test_select_by_dbt_names( + jaffle_shop_duckdb: Path, + jaffle_shop_duckdb_context: Context, + dbt_select: t.List[str], + expected: t.Set[str], +): + (jaffle_shop_duckdb / "models" / "agg_orders.sql").write_text(""" + {{ config(tags=["agg"]) }} + select order_date, count(*) as num_orders from {{ ref('orders') }} + """) + + ctx = jaffle_shop_duckdb_context + ctx.load() + assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + assert ctx.get_model('"jaffle_shop"."main"."agg_orders"').tags == ["agg"] + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + sqlmesh_selector = selectors.to_sqlmesh(dbt_select=dbt_select, dbt_exclude=[]) + assert sqlmesh_selector + + assert selector.expand_model_selections([sqlmesh_selector]) == expected + + +@pytest.mark.parametrize( + "dbt_exclude,expected", + [ + (["jaffle_shop"], set()), + ( + ["staging"], + { + '"jaffle_shop"."main"."agg_orders"', + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."orders"', + '"jaffle_shop"."main"."raw_customers"', + '"jaffle_shop"."main"."raw_orders"', + '"jaffle_shop"."main"."raw_payments"', + }, + ), + (["+customers"], {'"jaffle_shop"."main"."orders"', '"jaffle_shop"."main"."agg_orders"'}), + ( + ["+tag:agg"], + { + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."raw_customers"', + }, + ), + ], +) +def test_exclude_by_dbt_names( + jaffle_shop_duckdb: Path, + jaffle_shop_duckdb_context: Context, + dbt_exclude: t.List[str], + expected: t.Set[str], +): + (jaffle_shop_duckdb / "models" / "agg_orders.sql").write_text(""" + {{ config(tags=["agg"]) }} + select order_date, count(*) as num_orders from {{ ref('orders') }} + """) + + ctx = jaffle_shop_duckdb_context + ctx.load() + assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + assert ctx.get_model('"jaffle_shop"."main"."agg_orders"').tags == ["agg"] + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + sqlmesh_selector = selectors.to_sqlmesh(dbt_select=[], dbt_exclude=dbt_exclude) + assert sqlmesh_selector + + assert selector.expand_model_selections([sqlmesh_selector]) == expected + + +@pytest.mark.parametrize( + "dbt_select,dbt_exclude,expected", + [ + (["jaffle_shop"], ["jaffle_shop"], set()), + ( + ["staging"], + ["stg_customers"], + { + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + }, + ), + ( + ["staging.stg_customers", "tag:agg"], + ["tag:agg"], + { + '"jaffle_shop"."main"."stg_customers"', + }, + ), + ], +) +def test_selection_and_exclusion_by_dbt_names( + jaffle_shop_duckdb: Path, + jaffle_shop_duckdb_context: Context, + dbt_select: t.List[str], + dbt_exclude: t.List[str], + expected: t.Set[str], +): + (jaffle_shop_duckdb / "models" / "agg_orders.sql").write_text(""" + {{ config(tags=["agg"]) }} + select order_date, count(*) as num_orders from {{ ref('orders') }} + """) + + ctx = jaffle_shop_duckdb_context + ctx.load() + assert '"jaffle_shop"."main"."agg_orders"' in ctx.models + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + sqlmesh_selector = selectors.to_sqlmesh(dbt_select=dbt_select, dbt_exclude=dbt_exclude) + assert sqlmesh_selector + + assert selector.expand_model_selections([sqlmesh_selector]) == expected + + +@pytest.mark.parametrize( + "input_args,expected", + [ + ( + dict(select=["jaffle_shop"], models=["jaffle_shop"]), + '"models" and "select" are mutually exclusive', + ), + ( + dict(models=["jaffle_shop"], resource_type="test"), + '"models" and "resource_type" are mutually exclusive', + ), + ( + dict(select=["jaffle_shop"], resource_type="test"), + (["resource_type:test,jaffle_shop"], []), + ), + (dict(resource_type="model"), (["resource_type:model"], [])), + (dict(models=["stg_customers"]), (["resource_type:model,stg_customers"], [])), + ( + dict(models=["stg_customers"], exclude=["orders"]), + (["resource_type:model,stg_customers"], ["orders"]), + ), + ], +) +def test_consolidate(input_args: t.Dict[str, t.Any], expected: t.Union[t.Tuple[str, str], str]): + all_input_args: t.Dict[str, t.Any] = dict(select=[], exclude=[], models=[], resource_type=None) + + all_input_args.update(input_args) + + def _do_assert(): + assert selectors.consolidate(**all_input_args) == expected + + if isinstance(expected, str): + with pytest.raises(ValueError, match=expected): + _do_assert() + else: + _do_assert() + + +def test_models_by_dbt_names(jaffle_shop_duckdb_context: Context): + ctx = jaffle_shop_duckdb_context + + selector = ctx._new_selector() + assert isinstance(selector, DbtSelector) + + selector_expr = selectors.to_sqlmesh( + *selectors.consolidate(select=[], exclude=[], models=["jaffle_shop"], resource_type=None) + ) + assert selector_expr + + assert selector.expand_model_selections([selector_expr]) == { + '"jaffle_shop"."main"."customers"', + '"jaffle_shop"."main"."orders"', + '"jaffle_shop"."main"."stg_customers"', + '"jaffle_shop"."main"."stg_orders"', + '"jaffle_shop"."main"."stg_payments"', + } diff --git a/tests/dbt/conftest.py b/tests/dbt/conftest.py index 5875d9f575..5e6444c8e6 100644 --- a/tests/dbt/conftest.py +++ b/tests/dbt/conftest.py @@ -1,13 +1,27 @@ from __future__ import annotations import typing as t +import os +from pathlib import Path import pytest from sqlmesh.core.context import Context +from sqlmesh.core.selector import DbtSelector from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.project import Project from sqlmesh.dbt.target import PostgresConfig +from sqlmesh_dbt.operations import init_project_if_required +import uuid + + +class EmptyProjectCreator(t.Protocol): + def __call__( + self, + project_name: t.Optional[str] = None, + target_name: t.Optional[str] = None, + start: t.Optional[str] = None, + ) -> t.Tuple[Path, Path]: ... @pytest.fixture() @@ -15,6 +29,80 @@ def sushi_test_project(sushi_test_dbt_context: Context) -> Project: return sushi_test_dbt_context._loaders[0]._load_projects()[0] # type: ignore +@pytest.fixture +def create_empty_project( + copy_to_temp_path: t.Callable[..., t.List[Path]], +) -> t.Iterable[EmptyProjectCreator]: + default_project_name = f"test_{str(uuid.uuid4())[:8]}" + default_target_name = "duckdb" + fixture_path = Path(__file__).parent.parent / "fixtures" / "dbt" / "empty_project" + assert fixture_path.exists() + + current_path = os.getcwd() + + def _create_empty_project( + project_name: t.Optional[str] = None, + target_name: t.Optional[str] = None, + start: t.Optional[str] = None, + ) -> t.Tuple[Path, Path]: + project_name = project_name or default_project_name + target_name = target_name or default_target_name + output_path = copy_to_temp_path(paths=fixture_path)[0] + + dbt_project_yml = output_path / "dbt_project.yml" + profiles_yml = output_path / "profiles.yml" + + assert dbt_project_yml.exists() + assert profiles_yml.exists() + + models_path = output_path / "models" + (models_path).mkdir() + (output_path / "seeds").mkdir() + + dbt_project_yml.write_text( + dbt_project_yml.read_text().replace("empty_project", project_name) + ) + profiles_yml.write_text( + profiles_yml.read_text() + .replace("empty_project", project_name) + .replace("__DEFAULT_TARGET__", target_name) + ) + + init_project_if_required(output_path, start) + + # so that we can invoke commands from the perspective of a user that is already in the correct directory + os.chdir(output_path) + + return output_path, models_path + + yield _create_empty_project + + # cleanup - switch cwd back to original + os.chdir(current_path) + + +@pytest.fixture +def jaffle_shop_duckdb(copy_to_temp_path: t.Callable[..., t.List[Path]]) -> t.Iterable[Path]: + fixture_path = Path(__file__).parent.parent / "fixtures" / "dbt" / "jaffle_shop_duckdb" + assert fixture_path.exists() + + current_path = os.getcwd() + output_path = copy_to_temp_path(paths=fixture_path)[0] + + # so that we can invoke commands from the perspective of a user that is alrady in the correct directory + os.chdir(output_path) + + yield output_path + + os.chdir(current_path) + + +@pytest.fixture +def jaffle_shop_duckdb_context(jaffle_shop_duckdb: Path) -> Context: + init_project_if_required(jaffle_shop_duckdb) + return Context(paths=[jaffle_shop_duckdb], selector=DbtSelector) + + @pytest.fixture() def runtime_renderer() -> t.Callable: def create_renderer(context: DbtContext, **kwargs: t.Any) -> t.Callable: @@ -39,3 +127,26 @@ def dbt_dummy_postgres_config() -> PostgresConfig: port=5432, schema="schema", ) + + +@pytest.fixture(scope="function", autouse=True) +def reset_dbt_globals(): + # This fixture is used to clear the memoized cache for _get_package_with_retries + # in dbt.clients.registry. This is necessary because the cache is shared across + # tests and can cause unexpected behavior if not cleared as some tests depend on + # the deprecation warning that _get_package_with_retries fires + yield + # https://github.com/dbt-labs/dbt-core/blob/main/tests/functional/conftest.py#L9 + try: + from dbt.clients.registry import _get_cached + + _get_cached.cache = {} + except Exception: + pass + # https://github.com/dbt-labs/dbt-core/blob/main/core/dbt/tests/util.py#L82 + try: + from dbt_common.events.functions import reset_metadata_vars + + reset_metadata_vars() + except Exception: + pass diff --git a/tests/dbt/test_adapter.py b/tests/dbt/test_adapter.py index 85dfa29559..5570212668 100644 --- a/tests/dbt/test_adapter.py +++ b/tests/dbt/test_adapter.py @@ -39,7 +39,7 @@ def test_adapter_relation(sushi_test_project: Project, runtime_renderer: t.Calla table_name="foo.another", target_columns_to_types={"col": exp.DataType.build("int")} ) engine_adapter.create_view( - view_name="foo.bar_view", query_or_df=parse_one("select * from foo.bar") + view_name="foo.bar_view", query_or_df=t.cast(exp.Query, parse_one("select * from foo.bar")) ) engine_adapter.create_table( table_name="ignored.ignore", target_columns_to_types={"col": exp.DataType.build("int")} @@ -242,6 +242,35 @@ def test_adapter_dispatch(sushi_test_project: Project, runtime_renderer: t.Calla assert renderer("{{ adapter.dispatch('current_engine', 'customers')() }}") == "duckdb" assert renderer("{{ adapter.dispatch('current_timestamp')() }}") == "now()" assert renderer("{{ adapter.dispatch('current_timestamp', 'dbt')() }}") == "now()" + assert renderer("{{ adapter.dispatch('select_distinct', 'customers')() }}") == "distinct" + + # test with keyword arguments + assert ( + renderer( + "{{ adapter.dispatch(macro_name='current_engine', macro_namespace='customers')() }}" + ) + == "duckdb" + ) + assert renderer("{{ adapter.dispatch(macro_name='current_timestamp')() }}") == "now()" + assert ( + renderer("{{ adapter.dispatch(macro_name='current_timestamp', macro_namespace='dbt')() }}") + == "now()" + ) + + # mixing positional and keyword arguments + assert ( + renderer("{{ adapter.dispatch('current_engine', macro_namespace='customers')() }}") + == "duckdb" + ) + assert ( + renderer("{{ adapter.dispatch('current_timestamp', macro_namespace=None)() }}") == "now()" + ) + assert ( + renderer("{{ adapter.dispatch('current_timestamp', macro_namespace='dbt')() }}") == "now()" + ) + + with pytest.raises(ConfigError, match=r"Macro 'current_engine'.*was not found."): + renderer("{{ adapter.dispatch(macro_name='current_engine')() }}") with pytest.raises(ConfigError, match=r"Macro 'current_engine'.*was not found."): renderer("{{ adapter.dispatch('current_engine')() }}") diff --git a/tests/dbt/test_config.py b/tests/dbt/test_config.py index 4e3e78eea9..5dccd90ed2 100644 --- a/tests/dbt/test_config.py +++ b/tests/dbt/test_config.py @@ -15,6 +15,7 @@ from sqlmesh.core.dialect import jinja_query from sqlmesh.core.model import SqlModel from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange +from sqlmesh.core.state_sync import CachingStateSync, EngineAdapterStateSync from sqlmesh.dbt.builtin import Api from sqlmesh.dbt.column import ColumnConfig from sqlmesh.dbt.common import Dependencies @@ -46,7 +47,8 @@ ) from sqlmesh.dbt.test import TestConfig from sqlmesh.utils.errors import ConfigError -from sqlmesh.utils.yaml import load as yaml_load +from sqlmesh.utils.yaml import load as yaml_load, dump as yaml_dump +from tests.dbt.conftest import EmptyProjectCreator pytestmark = pytest.mark.dbt @@ -91,8 +93,10 @@ def test_update(current: t.Dict[str, t.Any], new: t.Dict[str, t.Any], expected: def test_model_to_sqlmesh_fields(dbt_dummy_postgres_config: PostgresConfig): model_config = ModelConfig( + unique_id="model.package.name", name="name", package_name="package", + fqn=["package", "name"], alias="model", schema="custom", database="database", @@ -123,6 +127,8 @@ def test_model_to_sqlmesh_fields(dbt_dummy_postgres_config: PostgresConfig): assert isinstance(model, SqlModel) assert model.name == "database.custom.model" + assert model.dbt_unique_id == "model.package.name" + assert model.dbt_fqn == "package.name" assert model.description == "test model" assert ( model.render_query_or_raise().sql() @@ -185,7 +191,9 @@ def test_model_to_sqlmesh_fields(dbt_dummy_postgres_config: PostgresConfig): def test_test_to_sqlmesh_fields(): sql = "SELECT * FROM FOO WHERE cost > 100" test_config = TestConfig( + unique_id="test.test_package.foo_test", name="foo_test", + fqn=["test_package", "foo_test"], sql=sql, model_name="Foo", column_name="cost", @@ -199,6 +207,8 @@ def test_test_to_sqlmesh_fields(): audit = test_config.to_sqlmesh(context) assert audit.name == "foo_test" + assert audit.dbt_unique_id == "test.test_package.foo_test" + assert audit.dbt_fqn == "test_package.foo_test" assert audit.dialect == "duckdb" assert not audit.skip assert audit.blocking @@ -237,6 +247,31 @@ def test_test_to_sqlmesh_fields(): assert audit.dialect == "bigquery" +def test_test_config_canonical_name(): + test_config_upper_case_package = TestConfig( + name="foo_test", + package_name="TEST_PACKAGE", + sql="SELECT 1", + ) + + assert test_config_upper_case_package.canonical_name == "test_package.foo_test" + + test_config_mixed_case_package = TestConfig( + name="Bar_Test", + package_name="MixedCase_Package", + sql="SELECT 1", + ) + + assert test_config_mixed_case_package.canonical_name == "mixedcase_package.bar_test" + + test_config_no_package = TestConfig( + name="foo_bar_test", + sql="SELECT 1", + ) + + assert test_config_no_package.canonical_name == "foo_bar_test" + + def test_singular_test_to_standalone_audit(dbt_dummy_postgres_config: PostgresConfig): sql = "SELECT * FROM FOO.BAR WHERE cost > 100" test_config = TestConfig( @@ -343,19 +378,42 @@ def test_variables(assert_exp_eq, sushi_test_project): "customers:customer_id": "customer_id", "some_var": ["foo", "bar"], }, + "some_var": "should be overridden in customers package", + "invalid_var": "{{ ref('ref_without_closing_paren' }}", } expected_customer_variables = { - "some_var": ["foo", "bar"], + "some_var": ["foo", "bar"], # Takes precedence over the root project variable "some_other_var": 5, - "yet_another_var": 5, "customers:bla": False, "customers:customer_id": "customer_id", + "yet_another_var": 1, # Make sure that the project variable takes precedence + "top_waiters:limit": "{{ get_top_waiters_limit() }}", + "top_waiters:revenue": "revenue", + "customers:boo": ["a", "b"], + "nested_vars": { + "some_nested_var": 2, + }, + "dynamic_test_var": 3, + "list_var": [ + {"name": "item1", "value": 1}, + {"name": "item2", "value": 2}, + ], + "invalid_var": "{{ ref('ref_without_closing_paren' }}", } - assert sushi_test_project.packages["sushi"].variables == expected_sushi_variables assert sushi_test_project.packages["customers"].variables == expected_customer_variables +@pytest.mark.slow +def test_variables_override(init_and_plan_context: t.Callable): + context, _ = init_and_plan_context( + "tests/fixtures/dbt/sushi_test", config="test_config_with_var_override" + ) + dbt_project = context._loaders[0]._load_projects()[0] # type: ignore + assert dbt_project.packages["sushi"].variables["some_var"] == "overridden_from_config_py" + assert dbt_project.packages["customers"].variables["some_var"] == "overridden_from_config_py" + + @pytest.mark.slow def test_jinja_in_dbt_variables(sushi_test_dbt_context: Context): assert sushi_test_dbt_context.render("sushi.top_waiters").sql().endswith("LIMIT 10") @@ -1155,3 +1213,37 @@ def test_empty_vars_config(tmp_path): # Verify the variables are empty (not causing any issues) assert project.packages["test_empty_vars"].variables == {} assert project.context.variables == {} + + +def test_infer_state_schema_name(create_empty_project: EmptyProjectCreator): + project_dir, _ = create_empty_project("test_foo", "dev") + + # infer_state_schema_name defaults to False if omitted + config = sqlmesh_config(project_root=project_dir) + assert config.dbt + assert not config.dbt.infer_state_schema_name + assert config.get_state_schema() == "sqlmesh" + + # create_empty_project() uses the default dbt template for sqlmesh yaml config which + # sets infer_state_schema_name=True + ctx = Context(paths=[project_dir]) + assert ctx.config.dbt + assert ctx.config.dbt.infer_state_schema_name + assert ctx.config.get_state_schema() == "sqlmesh_state_test_foo_main" + assert isinstance(ctx.state_sync, CachingStateSync) + assert isinstance(ctx.state_sync.state_sync, EngineAdapterStateSync) + assert ctx.state_sync.state_sync.schema == "sqlmesh_state_test_foo_main" + + # If the user delberately overrides state_schema then we should respect this choice + config_file = project_dir / "sqlmesh.yaml" + config_yaml = yaml_load(config_file) + config_yaml["gateways"] = {"dev": {"state_schema": "state_override"}} + config_file.write_text(yaml_dump(config_yaml)) + + ctx = Context(paths=[project_dir]) + assert ctx.config.dbt + assert ctx.config.dbt.infer_state_schema_name + assert ctx.config.get_state_schema() == "state_override" + assert isinstance(ctx.state_sync, CachingStateSync) + assert isinstance(ctx.state_sync.state_sync, EngineAdapterStateSync) + assert ctx.state_sync.state_sync.schema == "state_override" diff --git a/tests/dbt/test_custom_materializations.py b/tests/dbt/test_custom_materializations.py new file mode 100644 index 0000000000..c1625d0251 --- /dev/null +++ b/tests/dbt/test_custom_materializations.py @@ -0,0 +1,777 @@ +from __future__ import annotations + +import typing as t +from pathlib import Path + +import pytest + +from sqlmesh import Context +from sqlmesh.core.config import ModelDefaultsConfig +from sqlmesh.core.engine_adapter import DuckDBEngineAdapter +from sqlmesh.core.model.kind import DbtCustomKind +from sqlmesh.dbt.context import DbtContext +from sqlmesh.dbt.manifest import ManifestHelper +from sqlmesh.dbt.model import ModelConfig +from sqlmesh.dbt.profile import Profile +from sqlmesh.dbt.basemodel import Materialization + +pytestmark = pytest.mark.dbt + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_manifest_loading(): + project_path = Path("tests/fixtures/dbt/sushi_test") + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + materializations = helper.materializations() + + # custom materialization should have loaded from the manifest + assert "custom_incremental_default" in materializations + custom_incremental = materializations["custom_incremental_default"] + assert custom_incremental.name == "custom_incremental" + assert custom_incremental.adapter == "default" + assert "make_temp_relation(new_relation)" in custom_incremental.definition + assert "run_hooks(pre_hooks)" in custom_incremental.definition + assert " {{ return({'relations': [new_relation]}) }}" in custom_incremental.definition + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_model_config(): + project_path = Path("tests/fixtures/dbt/sushi_test") + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + models = helper.models() + + custom_model = models["custom_incremental_model"] + assert isinstance(custom_model, ModelConfig) + assert custom_model.materialized == "custom_incremental" + assert custom_model.model_materialization == Materialization.CUSTOM + + # pre and post hooks should also be handled in custom materializations + assert len(custom_model.pre_hook) == 2 + assert ( + custom_model.pre_hook[1].sql + == "CREATE TABLE IF NOT EXISTS hook_table (id INTEGER, length_col TEXT, updated_at TIMESTAMP)" + ) + assert len(custom_model.post_hook) == 2 + assert "COALESCE(MAX(id), 0)" in custom_model.post_hook[1].sql + + custom_filter_model = models["custom_incremental_with_filter"] + assert isinstance(custom_filter_model, ModelConfig) + assert custom_filter_model.materialized == "custom_incremental" + assert custom_filter_model.model_materialization == Materialization.CUSTOM + assert custom_filter_model.interval == "2 day" + assert custom_filter_model.time_column == "created_at" + + # verify also that the global hooks are inherited in the model without + assert len(custom_filter_model.pre_hook) == 1 + assert len(custom_filter_model.post_hook) == 1 + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_model_kind(): + project_path = Path("tests/fixtures/dbt/sushi_test") + context = DbtContext(project_path) + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + context._target = profile.target + context._manifest = helper + models = helper.models() + + # custom materialization models get DbtCustomKind populated + custom_model = models["custom_incremental_model"] + kind = custom_model.model_kind(context) + assert isinstance(kind, DbtCustomKind) + assert kind.materialization == "custom_incremental" + assert kind.adapter == "default" + assert "create_table_as" in kind.definition + + custom_filter_model = models["custom_incremental_with_filter"] + kind = custom_filter_model.model_kind(context) + assert isinstance(kind, DbtCustomKind) + assert kind.materialization == "custom_incremental" + assert kind.adapter == "default" + assert "run_hooks" in kind.definition + + # the DbtCustomKind shouldnt be set for normal strategies + regular_model = models["simple_model_a"] + regular_kind = regular_model.model_kind(context) + assert not isinstance(regular_kind, DbtCustomKind) + + # verify in sqlmesh as well + sqlmesh_context = Context( + paths=["tests/fixtures/dbt/sushi_test"], + config=None, + ) + + custom_incremental = sqlmesh_context.get_model("sushi.custom_incremental_model") + assert isinstance(custom_incremental.kind, DbtCustomKind) + assert custom_incremental.kind.materialization == "custom_incremental" + + custom_with_filter = sqlmesh_context.get_model("sushi.custom_incremental_with_filter") + assert isinstance(custom_with_filter.kind, DbtCustomKind) + assert custom_with_filter.kind.materialization == "custom_incremental" + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_dependencies(): + project_path = Path("tests/fixtures/dbt/sushi_test") + context = DbtContext(project_path) + profile = Profile.load(DbtContext(project_path)) + + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + context._target = profile.target + context._manifest = helper + models = helper.models() + + # custom materialization uses macros that should appear in dependencies + for model_name in ["custom_incremental_model", "custom_incremental_with_filter"]: + materialization_deps = models[model_name]._get_custom_materialization(context) + assert materialization_deps is not None + assert len(materialization_deps.dependencies.macros) > 0 + macro_names = [macro.name for macro in materialization_deps.dependencies.macros] + expected_macros = [ + "build_incremental_filter_sql", + "Relation", + "create_table_as", + "make_temp_relation", + "run_hooks", + "statement", + ] + assert any(macro in macro_names for macro in expected_macros) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_adapter_specific_materialization_override(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + macros_dir = temp_project / "macros" / "materializations" + macros_dir.mkdir(parents=True, exist_ok=True) + + adapter_mat_content = """ +{%- materialization custom_adapter_test, default -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT 'default_adapter' as adapter_type, * FROM ({{ sql }}) AS subquery + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} + +{%- materialization custom_adapter_test, adapter='postgres' -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT 'postgres_adapter'::text as adapter_type, * FROM ({{ sql }}) AS subquery + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} + +{%- materialization custom_adapter_test, adapter='duckdb' -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT 'duckdb_adapter' as adapter_type, * FROM ({{ sql }}) AS subquery + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} +""".strip() + + (macros_dir / "custom_adapter_test.sql").write_text(adapter_mat_content) + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_adapter_test', +) }} + +SELECT + 1 as id, + 'test' as name +""".strip() + + (models_dir / "test_adapter_specific.sql").write_text(test_model_content) + + context = DbtContext(temp_project) + profile = Profile.load(context) + + helper = ManifestHelper( + temp_project, + temp_project, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + materializations = helper.materializations() + assert "custom_adapter_test_default" in materializations + assert "custom_adapter_test_duckdb" in materializations + assert "custom_adapter_test_postgres" in materializations + + default_mat = materializations["custom_adapter_test_default"] + assert "default_adapter" in default_mat.definition + assert default_mat.adapter == "default" + + duckdb_mat = materializations["custom_adapter_test_duckdb"] + assert "duckdb_adapter" in duckdb_mat.definition + assert duckdb_mat.adapter == "duckdb" + + postgres_mat = materializations["custom_adapter_test_postgres"] + assert "postgres_adapter" in postgres_mat.definition + assert postgres_mat.adapter == "postgres" + + # verify that the correct adapter is selected based on target + context._target = profile.target + context._manifest = helper + models = helper.models() + + test_model = models["test_adapter_specific"] + + kind = test_model.model_kind(context) + assert isinstance(kind, DbtCustomKind) + assert kind.materialization == "custom_adapter_test" + # Should use duckdb adapter since that's the default target + assert "duckdb_adapter" in kind.definition or "default_adapter" in kind.definition + + # test also that adapter-specific materializations execute with correct adapter + sushi_context = Context(paths=path) + + plan = sushi_context.plan(select_models=["sushi.test_adapter_specific"]) + sushi_context.apply(plan) + + # check that the table was created with the correct adapter type + result = sushi_context.engine_adapter.fetchdf("SELECT * FROM sushi.test_adapter_specific") + assert len(result) == 1 + assert "adapter_type" in result.columns + assert result["adapter_type"][0] == "duckdb_adapter" + assert result["id"][0] == 1 + assert result["name"][0] == "test" + + +@pytest.mark.xdist_group("dbt_manifest") +def test_missing_custom_materialization_error(): + from sqlmesh.utils.errors import ConfigError + + project_path = Path("tests/fixtures/dbt/sushi_test") + context = DbtContext(project_path) + profile = Profile.load(context) + + # the materialization is non-existent + fake_model_config = ModelConfig( + name="test_model", + path=project_path / "models" / "fake_model.sql", + raw_code="SELECT 1 as id", + materialized="non_existent_custom", + schema="test_schema", + ) + + context._target = profile.target + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + context._manifest = helper + + # Should raise ConfigError when trying to get the model kind + with pytest.raises(ConfigError) as e: + fake_model_config.model_kind(context) + + assert "Unknown materialization 'non_existent_custom'" in str(e.value) + assert "Custom materializations must be defined" in str(e.value) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_broken_jinja_materialization_error(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + macros_dir = temp_project / "macros" / "materializations" + macros_dir.mkdir(parents=True, exist_ok=True) + + # Create broken Jinja materialization + broken_mat_content = """ +{%- materialization broken_jinja, default -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {# An intentional undefined variable that will cause runtime error #} + {%- set broken_var = undefined_variable_that_does_not_exist + 10 -%} + + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} AS ( + SELECT * FROM ({{ sql }}) AS subquery + WHERE 1 = {{ broken_var }} + ) + {%- endcall -%} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} +""".strip() + + (macros_dir / "broken_jinja.sql").write_text(broken_mat_content) + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='broken_jinja', +) }} + +SELECT + 1 as id, + 'This should fail with Jinja error' as error_msg +""".strip() + + (models_dir / "test_broken_jinja.sql").write_text(test_model_content) + + sushi_context = Context(paths=path) + + # The model will load fine jinja won't fail at parse time + model = sushi_context.get_model("sushi.test_broken_jinja") + assert isinstance(model.kind, DbtCustomKind) + assert model.kind.materialization == "broken_jinja" + + # but execution should fail + with pytest.raises(Exception) as e: + plan = sushi_context.plan(select_models=["sushi.test_broken_jinja"]) + sushi_context.apply(plan) + + assert "plan application failed" in str(e.value).lower() + + +@pytest.mark.xdist_group("dbt_manifest") +def test_failing_hooks_in_materialization(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_incremental', + pre_hook="CREATE TABLE will_fail_due_to_intentional_syntax_error (", + post_hook="DROP TABLE non_existent_table_that_will_fail", +) }} + +SELECT + 1 as id, + 'Testing hook failures' as test_msg +""".strip() + + (models_dir / "test_failing_hooks.sql").write_text(test_model_content) + + sushi_context = Context(paths=[str(temp_project)]) + + # in this case the pre_hook has invalid syntax + with pytest.raises(Exception) as e: + plan = sushi_context.plan(select_models=["sushi.test_failing_hooks"]) + sushi_context.apply(plan) + + assert "plan application failed" in str(e.value).lower() + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_virtual_environments(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='created_at', +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + 1 as id, + 'venv_test' as test_type +""".strip() + + (models_dir / "test_venv_model.sql").write_text(test_model_content) + + sushi_context = Context(paths=path) + prod_plan = sushi_context.plan(select_models=["sushi.test_venv_model"]) + sushi_context.apply(prod_plan) + prod_result = sushi_context.engine_adapter.fetchdf( + "SELECT * FROM sushi.test_venv_model ORDER BY id" + ) + assert len(prod_result) == 1 + assert prod_result["id"][0] == 1 + assert prod_result["test_type"][0] == "venv_test" + + # Create dev environment and check the dev table was created with proper naming + dev_plan = sushi_context.plan("dev", select_models=["sushi.test_venv_model"]) + sushi_context.apply(dev_plan) + dev_result = sushi_context.engine_adapter.fetchdf( + "SELECT * FROM sushi__dev.test_venv_model ORDER BY id" + ) + assert len(dev_result) == 1 + assert dev_result["id"][0] == 1 + assert dev_result["test_type"][0] == "venv_test" + + dev_tables = sushi_context.engine_adapter.fetchdf(""" + SELECT table_name, table_schema + FROM system.information_schema.tables + WHERE table_schema LIKE 'sushi%dev%' + AND table_name LIKE '%test_venv_model%' + """) + + prod_tables = sushi_context.engine_adapter.fetchdf(""" + SELECT table_name, table_schema + FROM system.information_schema.tables + WHERE table_schema = 'sushi' + AND table_name LIKE '%test_venv_model%' + """) + + # Verify both environments have their own tables + assert len(dev_tables) >= 1 + assert len(prod_tables) >= 1 + + +@pytest.mark.xdist_group("dbt_manifest") +def test_virtual_environment_schema_names(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='created_at', +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + 1 as id, + 'schema_naming_test' as test_type +""".strip() + + (models_dir / "test_schema_naming.sql").write_text(test_model_content) + + context = Context(paths=path) + prod_plan = context.plan(select_models=["sushi.test_schema_naming"]) + context.apply(prod_plan) + + dev_plan = context.plan("dev", select_models=["sushi.test_schema_naming"]) + context.apply(dev_plan) + + prod_result = context.engine_adapter.fetchdf( + "SELECT * FROM sushi.test_schema_naming ORDER BY id" + ) + assert len(prod_result) == 1 + assert prod_result["test_type"][0] == "schema_naming_test" + + dev_result = context.engine_adapter.fetchdf( + "SELECT * FROM sushi__dev.test_schema_naming ORDER BY id" + ) + assert len(dev_result) == 1 + assert dev_result["test_type"][0] == "schema_naming_test" + + # to examine the schema structure + all_schemas_query = """ + SELECT DISTINCT table_schema, COUNT(*) as table_count + FROM system.information_schema.tables + WHERE table_schema LIKE '%sushi%' + AND table_name LIKE '%test_schema_naming%' + GROUP BY table_schema + ORDER BY table_schema + """ + + schema_info = context.engine_adapter.fetchdf(all_schemas_query) + + schema_names = schema_info["table_schema"].tolist() + + # - virtual schemas: sushi, sushi__dev (for views) + view_schemas = [s for s in schema_names if not s.startswith("sqlmesh__")] + + # - physical schema: sqlmesh__sushi (for actual data tables) + physical_schemas = [s for s in schema_names if s.startswith("sqlmesh__")] + + # verify we got both of them + assert len(view_schemas) >= 2 + assert len(physical_schemas) >= 1 + assert "sushi" in view_schemas + assert "sushi__dev" in view_schemas + assert any("sqlmesh__sushi" in s for s in physical_schemas) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_lineage_tracking(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + # create a custom materialization model that depends on simple_model_a and waiter_names seed + lineage_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='created_at', +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + w.id as waiter_id, + w.name as waiter_name, + s.a as simple_value, + w.id * s.a as computed_value, + 'lineage_test' as model_type +FROM {{ ref('waiter_names') }} w +CROSS JOIN {{ ref('simple_model_a') }} s +""".strip() + + (models_dir / "enhanced_waiter_data.sql").write_text(lineage_model_content) + + # Create another custom materialization model that depends on the first one and simple_model_b + downstream_model_content = """ +{{ config( + materialized='custom_incremental', + time_column='analysis_date', +) }} + +SELECT + CURRENT_TIMESTAMP as analysis_date, + e.waiter_name, + e.simple_value, + e.computed_value, + b.a as model_b_value, + e.computed_value + b.a as final_computation, + CASE + WHEN e.computed_value >= 5 THEN 'High' + WHEN e.computed_value >= 2 THEN 'Medium' + ELSE 'Low' + END as category, + 'downstream_lineage_test' as model_type +FROM {{ ref('enhanced_waiter_data') }} e +CROSS JOIN {{ ref('simple_model_b') }} b +WHERE e.computed_value >= 0 +""".strip() + + (models_dir / "waiter_analytics_summary.sql").write_text(downstream_model_content) + + context = Context(paths=path) + enhanced_data_model = context.get_model("sushi.enhanced_waiter_data") + analytics_summary_model = context.get_model("sushi.waiter_analytics_summary") + + # Verify that custom materialization models have proper model kinds + assert isinstance(enhanced_data_model.kind, DbtCustomKind) + assert enhanced_data_model.kind.materialization == "custom_incremental" + + assert isinstance(analytics_summary_model.kind, DbtCustomKind) + assert analytics_summary_model.kind.materialization == "custom_incremental" + + # - enhanced_waiter_data should depend on waiter_names and simple_model_a + enhanced_data_deps = enhanced_data_model.depends_on + assert '"memory"."sushi"."simple_model_a"' in enhanced_data_deps + assert '"memory"."sushi"."waiter_names"' in enhanced_data_deps + + # - waiter_analytics_summary should depend on enhanced_waiter_data and simple_model_b + analytics_deps = analytics_summary_model.depends_on + assert '"memory"."sushi"."enhanced_waiter_data"' in analytics_deps + assert '"memory"."sushi"."simple_model_b"' in analytics_deps + + # build only the models that have dependences + plan = context.plan( + select_models=[ + "sushi.waiter_names", + "sushi.simple_model_a", + "sushi.simple_model_b", + "sushi.enhanced_waiter_data", + "sushi.waiter_analytics_summary", + ] + ) + context.apply(plan) + + # Verify that all δοwnstream models were built and contain expected data + waiter_names_result = context.engine_adapter.fetchdf( + "SELECT COUNT(*) as count FROM sushi.waiter_names" + ) + assert waiter_names_result["count"][0] > 0 + + simple_a_result = context.engine_adapter.fetchdf("SELECT a FROM sushi.simple_model_a") + assert len(simple_a_result) > 0 + assert simple_a_result["a"][0] == 1 + + simple_b_result = context.engine_adapter.fetchdf("SELECT a FROM sushi.simple_model_b") + assert len(simple_b_result) > 0 + assert simple_b_result["a"][0] == 1 + + # Check intermediate custom materialization model + enhanced_data_result = context.engine_adapter.fetchdf(""" + SELECT + waiter_name, + simple_value, + computed_value, + model_type + FROM sushi.enhanced_waiter_data + ORDER BY waiter_id + LIMIT 5 + """) + + assert len(enhanced_data_result) > 0 + assert enhanced_data_result["model_type"][0] == "lineage_test" + assert all(val == 1 for val in enhanced_data_result["simple_value"]) + assert all(val >= 0 for val in enhanced_data_result["computed_value"]) + assert any(val == "Ryan" for val in enhanced_data_result["waiter_name"]) + + # Check final downstream custom materialization model + analytics_summary_result = context.engine_adapter.fetchdf(""" + SELECT + waiter_name, + category, + model_type, + final_computation + FROM sushi.waiter_analytics_summary + ORDER BY waiter_name + LIMIT 5 + """) + + assert len(analytics_summary_result) > 0 + assert analytics_summary_result["model_type"][0] == "downstream_lineage_test" + assert all(cat in ["High", "Medium", "Low"] for cat in analytics_summary_result["category"]) + assert all(val >= 0 for val in analytics_summary_result["final_computation"]) + + # Test that lineage information is preserved in dev environments + dev_plan = context.plan("dev", select_models=["sushi.waiter_analytics_summary"]) + context.apply(dev_plan) + + dev_analytics_result = context.engine_adapter.fetchdf(""" + SELECT + COUNT(*) as count, + COUNT(DISTINCT waiter_name) as unique_waiters + FROM sushi__dev.waiter_analytics_summary + """) + + prod_analytics_result = context.engine_adapter.fetchdf(""" + SELECT + COUNT(*) as count, + COUNT(DISTINCT waiter_name) as unique_waiters + FROM sushi.waiter_analytics_summary + """) + + # Dev and prod should have the same data as they share physical data + assert dev_analytics_result["count"][0] == prod_analytics_result["count"][0] + assert dev_analytics_result["unique_waiters"][0] == prod_analytics_result["unique_waiters"][0] + + +@pytest.mark.xdist_group("dbt_manifest") +def test_custom_materialization_grants(copy_to_temp_path: t.Callable, mocker): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + grants_model_content = """ +{{ config( + materialized='custom_incremental', + grants={ + 'select': ['user1', 'user2'], + 'insert': ['writer'] + } +) }} + +SELECT + CURRENT_TIMESTAMP as created_at, + 1 as id, + 'grants_test' as test_type +""".strip() + + (models_dir / "test_grants_model.sql").write_text(grants_model_content) + + mocker.patch.object(DuckDBEngineAdapter, "SUPPORTS_GRANTS", True) + mocker.patch.object(DuckDBEngineAdapter, "_get_current_grants_config", return_value={}) + + sync_grants_calls = [] + + def mock_sync_grants(*args, **kwargs): + sync_grants_calls.append((args, kwargs)) + + mocker.patch.object(DuckDBEngineAdapter, "sync_grants_config", side_effect=mock_sync_grants) + + context = Context(paths=path) + + model = context.get_model("sushi.test_grants_model") + assert isinstance(model.kind, DbtCustomKind) + plan = context.plan(select_models=["sushi.test_grants_model"]) + context.apply(plan) + + assert len(sync_grants_calls) == 1 + args = sync_grants_calls[0][0] + assert args + + table = args[0] + grants_config = args[1] + assert table.sql(dialect="duckdb") == "memory.sushi.test_grants_model" + assert grants_config == { + "select": ["user1", "user2"], + "insert": ["writer"], + } diff --git a/tests/dbt/test_integration.py b/tests/dbt/test_integration.py index 5a944d55d4..ab22bf7826 100644 --- a/tests/dbt/test_integration.py +++ b/tests/dbt/test_integration.py @@ -19,7 +19,8 @@ from sqlmesh.core.config.connection import DuckDBConnectionConfig from sqlmesh.core.engine_adapter import DuckDBEngineAdapter from sqlmesh.utils.pandas import columns_to_types_from_df -from sqlmesh.utils.yaml import YAML +from sqlmesh.utils.yaml import YAML, load as yaml_load, dump as yaml_dump +from sqlmesh_dbt.operations import init_project_if_required from tests.utils.pandas import compare_dataframes, create_df # Some developers had issues with this test freezing locally so we mark it as cicdonly @@ -540,3 +541,114 @@ def test_scd_type_2_by_column( ) df_expected = create_df(expected_table_data, self.target_schema) compare_dataframes(df_actual, df_expected, msg=f"Failed on time {time}") + + +def test_dbt_node_info(jaffle_shop_duckdb_context: Context): + ctx = jaffle_shop_duckdb_context + + customers = ctx.models['"jaffle_shop"."main"."customers"'] + assert customers.dbt_unique_id == "model.jaffle_shop.customers" + assert customers.dbt_fqn == "jaffle_shop.customers" + assert customers.dbt_node_info + assert customers.dbt_node_info.name == "customers" + + orders = ctx.models['"jaffle_shop"."main"."orders"'] + assert orders.dbt_unique_id == "model.jaffle_shop.orders" + assert orders.dbt_fqn == "jaffle_shop.orders" + assert orders.dbt_node_info + assert orders.dbt_node_info.name == "orders" + + stg_customers = ctx.models['"jaffle_shop"."main"."stg_customers"'] + assert stg_customers.dbt_unique_id == "model.jaffle_shop.stg_customers" + assert stg_customers.dbt_fqn == "jaffle_shop.staging.stg_customers" + assert stg_customers.dbt_node_info + assert stg_customers.dbt_node_info.name == "stg_customers" + + stg_orders = ctx.models['"jaffle_shop"."main"."stg_orders"'] + assert stg_orders.dbt_unique_id == "model.jaffle_shop.stg_orders" + assert stg_orders.dbt_fqn == "jaffle_shop.staging.stg_orders" + assert stg_orders.dbt_node_info + assert stg_orders.dbt_node_info.name == "stg_orders" + + raw_customers = ctx.models['"jaffle_shop"."main"."raw_customers"'] + assert raw_customers.dbt_unique_id == "seed.jaffle_shop.raw_customers" + assert raw_customers.dbt_fqn == "jaffle_shop.raw_customers" + assert raw_customers.dbt_node_info + assert raw_customers.dbt_node_info.name == "raw_customers" + + raw_orders = ctx.models['"jaffle_shop"."main"."raw_orders"'] + assert raw_orders.dbt_unique_id == "seed.jaffle_shop.raw_orders" + assert raw_orders.dbt_fqn == "jaffle_shop.raw_orders" + assert raw_orders.dbt_node_info + assert raw_orders.dbt_node_info.name == "raw_orders" + + raw_payments = ctx.models['"jaffle_shop"."main"."raw_payments"'] + assert raw_payments.dbt_unique_id == "seed.jaffle_shop.raw_payments" + assert raw_payments.dbt_fqn == "jaffle_shop.raw_payments" + assert raw_payments.dbt_node_info + assert raw_payments.dbt_node_info.name == "raw_payments" + + relationship_audit = ctx.snapshots[ + "relationships_orders_customer_id__customer_id__ref_customers_" + ] + assert relationship_audit.node.is_audit + assert ( + relationship_audit.node.dbt_unique_id + == "test.jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_.c6ec7f58f2" + ) + assert ( + relationship_audit.node.dbt_fqn + == "jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_" + ) + assert relationship_audit.node.dbt_node_info + assert ( + relationship_audit.node.dbt_node_info.name + == "relationships_orders_customer_id__customer_id__ref_customers_" + ) + + +def test_state_schema_isolation_per_target(jaffle_shop_duckdb: Path): + profiles_file = jaffle_shop_duckdb / "profiles.yml" + + profiles_yml = yaml_load(profiles_file) + + # make prod / dev config identical with the exception of a different default schema to simulate using the same warehouse + profiles_yml["jaffle_shop"]["outputs"]["prod"] = { + **profiles_yml["jaffle_shop"]["outputs"]["dev"] + } + profiles_yml["jaffle_shop"]["outputs"]["prod"]["schema"] = "prod_schema" + profiles_yml["jaffle_shop"]["outputs"]["dev"]["schema"] = "dev_schema" + + profiles_file.write_text(yaml_dump(profiles_yml)) + + init_project_if_required(jaffle_shop_duckdb) + + # start off with the prod target + prod_ctx = Context(paths=[jaffle_shop_duckdb], config_loader_kwargs={"target": "prod"}) + assert prod_ctx.config.get_state_schema() == "sqlmesh_state_jaffle_shop_prod_schema" + assert all("prod_schema" in fqn for fqn in prod_ctx.models) + assert prod_ctx.plan(auto_apply=True).has_changes + assert not prod_ctx.plan(auto_apply=True).has_changes + + # dev target should have changes - new state separate from prod + dev_ctx = Context(paths=[jaffle_shop_duckdb], config_loader_kwargs={"target": "dev"}) + assert dev_ctx.config.get_state_schema() == "sqlmesh_state_jaffle_shop_dev_schema" + assert all("dev_schema" in fqn for fqn in dev_ctx.models) + assert dev_ctx.plan(auto_apply=True).has_changes + assert not dev_ctx.plan(auto_apply=True).has_changes + + # no explicitly specified target should use dev because that's what's set for the default in the profiles.yml + assert profiles_yml["jaffle_shop"]["target"] == "dev" + default_ctx = Context(paths=[jaffle_shop_duckdb]) + assert default_ctx.config.get_state_schema() == "sqlmesh_state_jaffle_shop_dev_schema" + assert all("dev_schema" in fqn for fqn in default_ctx.models) + assert not default_ctx.plan(auto_apply=True).has_changes + + # an explicit state schema override set in `sqlmesh.yaml` should use that + sqlmesh_yaml_file = jaffle_shop_duckdb / "sqlmesh.yaml" + sqlmesh_yaml = yaml_load(sqlmesh_yaml_file) + sqlmesh_yaml["gateways"] = {"dev": {"state_schema": "sqlmesh_dev_state_override"}} + sqlmesh_yaml_file.write_text(yaml_dump(sqlmesh_yaml)) + default_ctx = Context(paths=[jaffle_shop_duckdb]) + assert default_ctx.config.get_state_schema() == "sqlmesh_dev_state_override" + assert all("dev_schema" in fqn for fqn in default_ctx.models) diff --git a/tests/dbt/test_manifest.py b/tests/dbt/test_manifest.py index e5e98eae49..2ecf8b8980 100644 --- a/tests/dbt/test_manifest.py +++ b/tests/dbt/test_manifest.py @@ -232,7 +232,7 @@ def test_source_meta_external_location(): expected = ( "read_parquet('path/to/external/items.parquet')" if DBT_VERSION >= (1, 4, 0) - else '"main"."parquet_file".items' + else '"memory"."parquet_file".items' ) assert relation.render() == expected @@ -304,3 +304,66 @@ def test_convert_jinja_test_to_macro(): {%- endmacro -%}""" assert _convert_jinja_test_to_macro(macro_input) == macro_input + + +@pytest.mark.xdist_group("dbt_manifest") +def test_macro_depenency_none_str(): + project_path = Path("tests/fixtures/dbt/sushi_test") + profile = Profile.load(DbtContext(project_path)) + helper = ManifestHelper( + project_path, + project_path, + "sushi", + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + node = helper._manifest.nodes["model.customers.customer_revenue_by_day"] + node.depends_on.macros.append("None") + + from sqlmesh.dbt.manifest import _macro_references + + # "None" macro shouldn't raise a KeyError + _macro_references(helper._manifest, node) + + +@pytest.mark.xdist_group("dbt_manifest") +def test_macro_assignment_shadowing(create_empty_project): + project_name = "local" + project_path, models_path = create_empty_project(project_name=project_name) + + macros_path = project_path / "macros" + macros_path.mkdir() + + (macros_path / "model_path_macro.sql").write_text(""" +{% macro model_path_macro() %} + {% if execute %} + {% set model = model.path.split('/')[-1].replace('.sql', '') %} + SELECT '{{ model }}' as model_name + {% else %} + SELECT 'placeholder' as placeholder + {% endif %} +{% endmacro %} +""") + + (models_path / "model_using_path_macro.sql").write_text(""" +{{ model_path_macro() }} +""") + + context = DbtContext(project_path) + profile = Profile.load(context) + + helper = ManifestHelper( + project_path, + project_path, + project_name, + profile.target, + model_defaults=ModelDefaultsConfig(start="2020-01-01"), + ) + + macros = helper.macros(project_name) + assert "model_path_macro" in macros + assert "path" in macros["model_path_macro"].dependencies.model_attrs.attrs + + models = helper.models() + assert "model_using_path_macro" in models + assert "path" in models["model_using_path_macro"].dependencies.model_attrs.attrs diff --git a/tests/dbt/test_model.py b/tests/dbt/test_model.py index dc2ebc492b..a954f98f41 100644 --- a/tests/dbt/test_model.py +++ b/tests/dbt/test_model.py @@ -1,5 +1,6 @@ import datetime -import typing as t +import logging + import pytest from pathlib import Path @@ -7,62 +8,24 @@ from sqlglot import exp from sqlglot.errors import SchemaError from sqlmesh import Context +from sqlmesh.core.console import NoopConsole, get_console from sqlmesh.core.model import TimeColumn, IncrementalByTimeRangeKind -from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange +from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange, SCDType2ByColumnKind from sqlmesh.core.state_sync.db.snapshot import _snapshot_to_json +from sqlmesh.core.config.common import VirtualEnvironmentMode +from sqlmesh.core.model.meta import GrantsTargetLayer from sqlmesh.dbt.common import Dependencies from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.model import ModelConfig -from sqlmesh.dbt.target import PostgresConfig +from sqlmesh.dbt.target import BigQueryConfig, DuckDbConfig, PostgresConfig from sqlmesh.dbt.test import TestConfig from sqlmesh.utils.yaml import YAML +from sqlmesh.utils.date import to_ds +import typing as t pytestmark = pytest.mark.dbt -@pytest.fixture -def create_empty_project(tmp_path: Path) -> t.Callable[[], t.Tuple[Path, Path]]: - def _create_empty_project() -> t.Tuple[Path, Path]: - yaml = YAML() - dbt_project_dir = tmp_path / "dbt" - dbt_project_dir.mkdir() - dbt_model_dir = dbt_project_dir / "models" - dbt_model_dir.mkdir() - dbt_project_config = { - "name": "empty_project", - "version": "1.0.0", - "config-version": 2, - "profile": "test", - "model-paths": ["models"], - } - dbt_project_file = dbt_project_dir / "dbt_project.yml" - with open(dbt_project_file, "w", encoding="utf-8") as f: - YAML().dump(dbt_project_config, f) - sqlmesh_config = { - "model_defaults": { - "start": "2025-01-01", - } - } - sqlmesh_config_file = dbt_project_dir / "sqlmesh.yaml" - with open(sqlmesh_config_file, "w", encoding="utf-8") as f: - YAML().dump(sqlmesh_config, f) - dbt_data_dir = tmp_path / "dbt_data" - dbt_data_dir.mkdir() - dbt_data_file = dbt_data_dir / "local.db" - dbt_profile_config = { - "test": { - "outputs": {"duckdb": {"type": "duckdb", "path": str(dbt_data_file)}}, - "target": "duckdb", - } - } - db_profile_file = dbt_project_dir / "profiles.yml" - with open(db_profile_file, "w", encoding="utf-8") as f: - yaml.dump(dbt_profile_config, f) - return dbt_project_dir, dbt_model_dir - - return _create_empty_project - - def test_test_config_is_standalone_behavior() -> None: """Test that TestConfig.is_standalone correctly identifies tests with cross-model references""" @@ -174,7 +137,7 @@ def test_manifest_filters_standalone_tests_from_models( ) -> None: """Integration test that verifies models only contain non-standalone tests after manifest loading.""" yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # Create two models model1_contents = "SELECT 1 as id" @@ -233,23 +196,23 @@ def test_manifest_filters_standalone_tests_from_models( # Should only have "not_null" test, not the "relationships" test model1_audit_names = [audit[0] for audit in model1_snapshot.model.audits] assert len(model1_audit_names) == 1 - assert model1_audit_names[0] == "not_null_model1_id" + assert model1_audit_names[0] == "local.not_null_model1_id" # Verify model2 has its non-standalone test model2_audit_names = [audit[0] for audit in model2_snapshot.model.audits] assert len(model2_audit_names) == 1 - assert model2_audit_names[0] == "not_null_model2_id" + assert model2_audit_names[0] == "local.not_null_model2_id" # Verify the standalone test (relationships) exists as a StandaloneAudit all_non_standalone_audits = [name for name in context._audits] assert sorted(all_non_standalone_audits) == [ - "not_null_model1_id", - "not_null_model2_id", + "local.not_null_model1_id", + "local.not_null_model2_id", ] standalone_audits = [name for name in context._standalone_audits] assert len(standalone_audits) == 1 - assert standalone_audits[0] == "relationships_model1_id__id__ref_model2_" + assert standalone_audits[0] == "local.relationships_model1_id__id__ref_model2_" plan_builder = context.plan_builder() dag = plan_builder._build_dag() @@ -265,7 +228,7 @@ def test_load_invalid_ref_audit_constraints( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it full_model_contents = """{{ config(tags=["blah"], tests=[{"blah": {"to": "ref('completely_ignored')", "field": "blah2"} }]) }} SELECT 1 as cola""" full_model_file = model_dir / "full_model.sql" @@ -313,7 +276,9 @@ def test_load_invalid_ref_audit_constraints( with open(model_schema_file, "w", encoding="utf-8") as f: yaml.dump(model_schema, f) - context = Context(paths=project_dir) + assert isinstance(get_console(), NoopConsole) + with caplog.at_level(logging.DEBUG): + context = Context(paths=project_dir) assert ( "Skipping audit 'relationships_full_model_cola__cola__ref_not_real_model_' because model 'not_real_model' is not a valid ref" in caplog.text @@ -332,7 +297,7 @@ def test_load_invalid_ref_audit_constraints( def test_load_microbatch_all_defined( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it microbatch_contents = """ {{ @@ -373,7 +338,7 @@ def test_load_microbatch_all_defined( def test_load_microbatch_all_defined_diff_values( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it microbatch_contents = """ {{ @@ -415,7 +380,7 @@ def test_load_microbatch_all_defined_diff_values( def test_load_microbatch_required_only( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it microbatch_contents = """ {{ @@ -454,7 +419,7 @@ def test_load_microbatch_required_only( def test_load_incremental_time_range_strategy_required_only( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local", start="2025-01-01") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it incremental_time_range_contents = """ {{ @@ -476,7 +441,7 @@ def test_load_incremental_time_range_strategy_required_only( snapshot = context.snapshots[snapshot_fqn] model = snapshot.model # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model kind attributes assert isinstance(model.kind, IncrementalByTimeRangeKind) @@ -496,7 +461,7 @@ def test_load_incremental_time_range_strategy_required_only( def test_load_incremental_time_range_strategy_all_defined( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local", start="2025-01-01") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it incremental_time_range_contents = """ {{ @@ -532,7 +497,7 @@ def test_load_incremental_time_range_strategy_all_defined( snapshot = context.snapshots[snapshot_fqn] model = snapshot.model # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model kind attributes assert isinstance(model.kind, IncrementalByTimeRangeKind) @@ -559,7 +524,7 @@ def test_load_incremental_time_range_strategy_all_defined( def test_load_deprecated_incremental_time_column( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local", start="2025-01-01") # add `tests` to model config since this is loaded by dbt and ignored and we shouldn't error when loading it incremental_time_range_contents = """ {{ @@ -577,13 +542,15 @@ def test_load_deprecated_incremental_time_column( f.write(incremental_time_range_contents) snapshot_fqn = '"local"."main"."incremental_time_range"' - context = Context(paths=project_dir) + assert isinstance(get_console(), NoopConsole) + with caplog.at_level(logging.DEBUG): + context = Context(paths=project_dir) model = context.snapshots[snapshot_fqn].model # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model-level attributes - assert model.start == "2025-01-01" + assert to_ds(model.start or "") == "2025-01-01" assert model.interval_unit.is_day # Validate model kind attributes assert isinstance(model.kind, IncrementalByTimeRangeKind) @@ -606,7 +573,7 @@ def test_load_microbatch_with_ref( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") source_schema = { "version": 2, "sources": [ @@ -659,11 +626,11 @@ def test_load_microbatch_with_ref( context = Context(paths=project_dir) assert ( context.render(microbatch_snapshot_fqn, start="2025-01-01", end="2025-01-10").sql() - == 'SELECT "cola" AS "cola", "ds_source" AS "ds" FROM (SELECT * FROM "local"."my_source"."my_table" AS "my_table" WHERE "ds_source" >= \'2025-01-01 00:00:00+00:00\' AND "ds_source" < \'2025-01-11 00:00:00+00:00\') AS "_q_0"' + == 'SELECT "cola" AS "cola", "ds_source" AS "ds" FROM (SELECT * FROM "local"."my_source"."my_table" AS "my_table" WHERE "ds_source" >= \'2025-01-01 00:00:00+00:00\' AND "ds_source" < \'2025-01-11 00:00:00+00:00\') AS "_0"' ) assert ( context.render(microbatch_two_snapshot_fqn, start="2025-01-01", end="2025-01-10").sql() - == 'SELECT "_q_0"."cola" AS "cola", "_q_0"."ds" AS "ds" FROM (SELECT "microbatch"."cola" AS "cola", "microbatch"."ds" AS "ds" FROM "local"."main"."microbatch" AS "microbatch" WHERE "microbatch"."ds" < \'2025-01-11 00:00:00+00:00\' AND "microbatch"."ds" >= \'2025-01-01 00:00:00+00:00\') AS "_q_0"' + == 'SELECT "_0"."cola" AS "cola", "_0"."ds" AS "ds" FROM (SELECT "microbatch"."cola" AS "cola", "microbatch"."ds" AS "ds" FROM "local"."main"."microbatch" AS "microbatch" WHERE "microbatch"."ds" < \'2025-01-11 00:00:00+00:00\' AND "microbatch"."ds" >= \'2025-01-01 00:00:00+00:00\') AS "_0"' ) @@ -672,7 +639,7 @@ def test_load_microbatch_with_ref_no_filter( tmp_path: Path, caplog, dbt_dummy_postgres_config: PostgresConfig, create_empty_project ) -> None: yaml = YAML() - project_dir, model_dir = create_empty_project() + project_dir, model_dir = create_empty_project(project_name="local") source_schema = { "version": 2, "sources": [ @@ -733,25 +700,45 @@ def test_load_microbatch_with_ref_no_filter( ) +@pytest.mark.slow +def test_load_multiple_snapshots_defined_in_same_file(sushi_test_dbt_context: Context) -> None: + context = sushi_test_dbt_context + assert context.get_model("snapshots.items_snapshot") + assert context.get_model("snapshots.items_check_snapshot") + + # Make sure cache works too + context.load() + assert context.get_model("snapshots.items_snapshot") + assert context.get_model("snapshots.items_check_snapshot") + + +@pytest.mark.slow +def test_dbt_snapshot_with_check_cols_expressions(sushi_test_dbt_context: Context) -> None: + context = sushi_test_dbt_context + model = context.get_model("snapshots.items_check_with_cast_snapshot") + assert model is not None + assert isinstance(model.kind, SCDType2ByColumnKind) + + columns = model.kind.columns + assert isinstance(columns, list) + assert len(columns) == 1 + + # expression in check_cols is: ds::DATE + assert isinstance(columns[0], exp.Cast) + assert columns[0].sql() == 'CAST("ds" AS DATE)' + + context.load() + cached_model = context.get_model("snapshots.items_check_with_cast_snapshot") + assert cached_model is not None + assert isinstance(cached_model.kind, SCDType2ByColumnKind) + assert isinstance(cached_model.kind.columns, list) + assert len(cached_model.kind.columns) == 1 + + @pytest.mark.slow def test_dbt_jinja_macro_undefined_variable_error(create_empty_project): project_dir, model_dir = create_empty_project() - dbt_profile_config = { - "test": { - "outputs": { - "duckdb": { - "type": "duckdb", - "path": str(project_dir.parent / "dbt_data" / "main.db"), - } - }, - "target": "duckdb", - } - } - db_profile_file = project_dir / "profiles.yml" - with open(db_profile_file, "w", encoding="utf-8") as f: - YAML().dump(dbt_profile_config, f) - macros_dir = project_dir / "macros" macros_dir.mkdir() @@ -789,6 +776,8 @@ def test_dbt_jinja_macro_undefined_variable_error(create_empty_project): @pytest.mark.slow def test_node_name_populated_for_dbt_models(dbt_dummy_postgres_config: PostgresConfig) -> None: model_config = ModelConfig( + unique_id="model.test_package.test_model", + fqn=["test_package", "test_model"], name="test_model", package_name="test_package", sql="SELECT 1 as id", @@ -803,7 +792,8 @@ def test_node_name_populated_for_dbt_models(dbt_dummy_postgres_config: PostgresC # check after convert to SQLMesh model that node_name is populated correctly sqlmesh_model = model_config.to_sqlmesh(context) - assert sqlmesh_model.dbt_name == "model.test_package.test_model" + assert sqlmesh_model.dbt_unique_id == "model.test_package.test_model" + assert sqlmesh_model.dbt_fqn == "test_package.test_model" @pytest.mark.slow @@ -860,4 +850,267 @@ def test_load_model_dbt_node_name(tmp_path: Path) -> None: # Verify that node_name is the equivalent dbt one model = context.snapshots[model_fqn].model - assert model.dbt_name == "model.test_project.simple_model" + assert model.dbt_unique_id == "model.test_project.simple_model" + assert model.dbt_fqn == "test_project.simple_model" + assert model.dbt_node_info + assert model.dbt_node_info.name == "simple_model" + + +@pytest.mark.slow +def test_jinja_config_no_query(create_empty_project): + project_dir, model_dir = create_empty_project(project_name="local") + + # model definition contains only a comment and non-SQL jinja + model_contents = "/* comment */ {{ config(materialized='table') }}" + model_file = model_dir / "comment_config_model.sql" + with open(model_file, "w", encoding="utf-8") as f: + f.write(model_contents) + + schema_yaml = {"version": 2, "models": [{"name": "comment_config_model"}]} + schema_file = model_dir / "schema.yml" + with open(schema_file, "w", encoding="utf-8") as f: + YAML().dump(schema_yaml, f) + + context = Context(paths=project_dir) + + # loads without error and contains empty query (which will error at runtime) + assert not context.snapshots['"local"."main"."comment_config_model"'].model.render_query() + + +@pytest.mark.slow +def test_load_custom_materialisations(sushi_test_dbt_context: Context) -> None: + context = sushi_test_dbt_context + assert context.get_model("sushi.custom_incremental_model") + assert context.get_model("sushi.custom_incremental_with_filter") + + context.load() + assert context.get_model("sushi.custom_incremental_model") + assert context.get_model("sushi.custom_incremental_with_filter") + + +def test_model_grants_to_sqlmesh_grants_config() -> None: + grants_config = { + "select": ["user1", "user2"], + "insert": ["admin_user"], + "update": ["power_user"], + } + model_config = ModelConfig( + name="test_model", + sql="SELECT 1 as id", + grants=grants_config, + path=Path("test_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + model_grants = sqlmesh_model.grants + assert model_grants == grants_config + + assert sqlmesh_model.grants_target_layer == GrantsTargetLayer.default + + +def test_model_grants_empty_permissions() -> None: + model_config = ModelConfig( + name="test_model_empty", + sql="SELECT 1 as id", + grants={"select": [], "insert": ["admin_user"]}, + path=Path("test_model_empty.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + model_grants = sqlmesh_model.grants + expected_grants = {"select": [], "insert": ["admin_user"]} + assert model_grants == expected_grants + + +def test_model_no_grants() -> None: + model_config = ModelConfig( + name="test_model_no_grants", + sql="SELECT 1 as id", + path=Path("test_model_no_grants.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is None + + +def test_model_empty_grants() -> None: + model_config = ModelConfig( + name="test_model_empty_grants", + sql="SELECT 1 as id", + grants={}, + path=Path("test_model_empty_grants.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is None + + +def test_model_grants_valid_special_characters() -> None: + valid_grantees = [ + "user@domain.com", + "service-account@project.iam.gserviceaccount.com", + "group:analysts", + '"quoted user"', + "`backtick user`", + "user_with_underscores", + "user.with.dots", + ] + + model_config = ModelConfig( + name="test_model_special_chars", + sql="SELECT 1 as id", + grants={"select": valid_grantees}, + path=Path("test_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is not None + assert "select" in grants_config + assert grants_config["select"] == valid_grantees + + +def test_model_grants_engine_specific_bigquery() -> None: + model_config = ModelConfig( + name="test_model_bigquery", + sql="SELECT 1 as id", + grants={ + "bigquery.dataviewer": ["user@domain.com"], + "select": ["analyst@company.com"], + }, + path=Path("test_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = BigQueryConfig( + name="bigquery_target", + project="test-project", + dataset="test_dataset", + location="US", + database="test-project", + schema="test_dataset", + ) + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + grants_config = sqlmesh_model.grants + assert grants_config is not None + assert grants_config["bigquery.dataviewer"] == ["user@domain.com"] + assert grants_config["select"] == ["analyst@company.com"] + + +def test_ephemeral_model_ignores_grants() -> None: + """Test that ephemeral models ignore grants configuration.""" + model_config = ModelConfig( + name="ephemeral_model", + sql="SELECT 1 as id", + materialized="ephemeral", + grants={"select": ["reporter", "analyst"]}, + path=Path("ephemeral_model.sql"), + ) + + context = DbtContext() + context.project_name = "test_project" + context.target = DuckDbConfig(name="target", schema="test_schema") + + sqlmesh_model = model_config.to_sqlmesh( + context, virtual_environment_mode=VirtualEnvironmentMode.FULL + ) + + assert sqlmesh_model.kind.is_embedded + assert sqlmesh_model.grants is None # grants config is skipped for ephemeral / embedded models + + +def test_conditional_ref_in_unexecuted_branch(copy_to_temp_path: t.Callable): + path = copy_to_temp_path("tests/fixtures/dbt/sushi_test") + temp_project = path[0] + + models_dir = temp_project / "models" + models_dir.mkdir(parents=True, exist_ok=True) + + test_model_content = """ +{{ config( + materialized='table', +) }} + +{% if true %} + WITH source AS ( + SELECT * + FROM {{ ref('simple_model_a') }} + ) +{% else %} + WITH source AS ( + SELECT * + FROM {{ ref('nonexistent_model') }} -- this doesn't exist but is in unexecuted branch + ) +{% endif %} + +SELECT * FROM source +""".strip() + + (models_dir / "conditional_ref_model.sql").write_text(test_model_content) + sushi_context = Context(paths=[str(temp_project)]) + + # the model should load successfully without raising MissingModelError + model = sushi_context.get_model("sushi.conditional_ref_model") + assert model is not None + + # Verify only the executed ref is in the dependencies + assert len(model.depends_on) == 1 + assert '"memory"."sushi"."simple_model_a"' in model.depends_on + + # Also the model can be rendered successfully with the executed ref + rendered = model.render_query() + assert rendered is not None + assert ( + rendered.sql() + == 'WITH "source" AS (SELECT "simple_model_a"."a" AS "a" FROM "memory"."sushi"."simple_model_a" AS "simple_model_a") SELECT "source"."a" AS "a" FROM "source" AS "source"' + ) + + # And run plan with this conditional model for good measure + plan = sushi_context.plan(select_models=["sushi.conditional_ref_model", "sushi.simple_model_a"]) + sushi_context.apply(plan) + upstream_ref = sushi_context.engine_adapter.fetchone("SELECT * FROM sushi.simple_model_a") + assert upstream_ref == (1,) + result = sushi_context.engine_adapter.fetchone("SELECT * FROM sushi.conditional_ref_model") + assert result == (1,) diff --git a/tests/dbt/test_test.py b/tests/dbt/test_test.py index 845c1d2fc0..fb33220c0c 100644 --- a/tests/dbt/test_test.py +++ b/tests/dbt/test_test.py @@ -1,3 +1,7 @@ +from pathlib import Path + +import pytest + from sqlmesh.dbt.test import TestConfig @@ -8,3 +12,131 @@ def test_multiline_test_kwarg() -> None: test_kwargs={"test_field": "foo\nbar\n"}, ) assert test._kwargs() == 'test_field="foo\nbar"' + + +@pytest.mark.xdist_group("dbt_manifest") +def test_tests_get_unique_names(tmp_path: Path, create_empty_project) -> None: + from sqlmesh.utils.yaml import YAML + from sqlmesh.core.context import Context + + yaml = YAML() + project_dir, model_dir = create_empty_project(project_name="local") + + model_file = model_dir / "my_model.sql" + with open(model_file, "w", encoding="utf-8") as f: + f.write("SELECT 1 as id, 'value1' as status") + + # Create schema.yml with: + # 1. Same test on model and source, both with/without custom test name + # 2. Same test on same model with different args, both with/without custom test name + # 3. Versioned model with tests (both built-in and custom named) + schema_yaml = { + "version": 2, + "sources": [ + { + "name": "raw", + "tables": [ + { + "name": "my_source", + "columns": [ + { + "name": "id", + "data_tests": [ + {"not_null": {"name": "custom_notnull_name"}}, + {"not_null": {}}, + ], + } + ], + } + ], + } + ], + "models": [ + { + "name": "my_model", + "columns": [ + { + "name": "id", + "data_tests": [ + {"not_null": {"name": "custom_notnull_name"}}, + {"not_null": {}}, + ], + }, + { + "name": "status", + "data_tests": [ + {"accepted_values": {"values": ["value1", "value2"]}}, + {"accepted_values": {"values": ["value1", "value2", "value3"]}}, + { + "accepted_values": { + "name": "custom_accepted_values_name", + "values": ["value1", "value2"], + } + }, + { + "accepted_values": { + "name": "custom_accepted_values_name", + "values": ["value1", "value2", "value3"], + } + }, + ], + }, + ], + }, + { + "name": "versioned_model", + "columns": [ + { + "name": "id", + "data_tests": [ + {"not_null": {}}, + {"not_null": {"name": "custom_versioned_notnull"}}, + ], + }, + { + "name": "amount", + "data_tests": [ + {"accepted_values": {"values": ["low", "high"]}}, + ], + }, + ], + "versions": [ + {"v": 1}, + {"v": 2}, + ], + }, + ], + } + + schema_file = model_dir / "schema.yml" + with open(schema_file, "w", encoding="utf-8") as f: + yaml.dump(schema_yaml, f) + + # Create versioned model files + versioned_model_v1_file = model_dir / "versioned_model_v1.sql" + with open(versioned_model_v1_file, "w", encoding="utf-8") as f: + f.write("SELECT 1 as id, 'low' as amount") + + versioned_model_v2_file = model_dir / "versioned_model_v2.sql" + with open(versioned_model_v2_file, "w", encoding="utf-8") as f: + f.write("SELECT 1 as id, 'low' as amount") + + context = Context(paths=project_dir) + + all_audit_names = list(context._audits.keys()) + list(context._standalone_audits.keys()) + assert sorted(all_audit_names) == [ + "local.accepted_values_my_model_status__value1__value2", + "local.accepted_values_my_model_status__value1__value2__value3", + "local.accepted_values_versioned_model_v1_amount__low__high", + "local.accepted_values_versioned_model_v2_amount__low__high", + "local.custom_accepted_values_name_my_model_status__value1__value2", + "local.custom_accepted_values_name_my_model_status__value1__value2__value3", + "local.custom_notnull_name_my_model_id", + "local.custom_versioned_notnull_versioned_model_v1_id", + "local.custom_versioned_notnull_versioned_model_v2_id", + "local.not_null_my_model_id", + "local.not_null_versioned_model_v1_id", + "local.not_null_versioned_model_v2_id", + "local.source_custom_notnull_name_raw_my_source_id", + "local.source_not_null_raw_my_source_id", + ] diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index 22b75abab6..fe6073dfad 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -1,5 +1,5 @@ import agate -from datetime import datetime +from datetime import datetime, timedelta import json import logging import typing as t @@ -53,6 +53,7 @@ ) from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.model import Materialization, ModelConfig +from sqlmesh.dbt.source import SourceConfig from sqlmesh.dbt.project import Project from sqlmesh.dbt.relation import Policy from sqlmesh.dbt.seed import SeedConfig @@ -64,7 +65,7 @@ PostgresConfig, ) from sqlmesh.dbt.test import TestConfig -from sqlmesh.utils.errors import ConfigError, MacroEvalError, SQLMeshError +from sqlmesh.utils.errors import ConfigError, SQLMeshError from sqlmesh.utils.jinja import MacroReference pytestmark = [pytest.mark.dbt, pytest.mark.slow] @@ -113,6 +114,129 @@ def test_materialization(): ModelConfig(name="model", alias="model", schema="schema", materialized="dictionary") +def test_dbt_custom_materialization(): + sushi_context = Context(paths=["tests/fixtures/dbt/sushi_test"]) + + plan_builder = sushi_context.plan_builder(select_models=["sushi.custom_incremental_model"]) + plan = plan_builder.build() + assert len(plan.selected_models) == 1 + selected_model = list(plan.selected_models)[0] + assert selected_model == "model.sushi.custom_incremental_model" + + query = "SELECT * FROM sushi.custom_incremental_model ORDER BY created_at" + hook_table = "SELECT * FROM hook_table ORDER BY id" + sushi_context.apply(plan) + result = sushi_context.engine_adapter.fetchdf(query) + assert len(result) == 1 + assert {"created_at", "id"}.issubset(result.columns) + + # assert the pre/post hooks executed as well as part of the custom materialization + hook_result = sushi_context.engine_adapter.fetchdf(hook_table) + assert len(hook_result) == 1 + assert {"length_col", "id", "updated_at"}.issubset(hook_result.columns) + assert int(hook_result["length_col"][0]) >= 519 + assert hook_result["id"][0] == 1 + + # running with execution time one day in the future to simulate an incremental insert + tomorrow = datetime.now() + timedelta(days=1) + sushi_context.run(select_models=["sushi.custom_incremental_model"], execution_time=tomorrow) + + result_after_run = sushi_context.engine_adapter.fetchdf(query) + assert {"created_at", "id"}.issubset(result_after_run.columns) + + # this should have added new unique values for the new row + assert len(result_after_run) == 2 + assert result_after_run["id"].is_unique + assert result_after_run["created_at"].is_unique + + # validate the hooks executed as part of the run as well + hook_result = sushi_context.engine_adapter.fetchdf(hook_table) + assert len(hook_result) == 2 + assert hook_result["id"][1] == 2 + assert int(hook_result["length_col"][1]) >= 519 + assert hook_result["id"].is_monotonic_increasing + assert hook_result["updated_at"].is_unique + assert not hook_result["length_col"].is_unique + + +def test_dbt_custom_materialization_with_time_filter_and_macro(): + sushi_context = Context(paths=["tests/fixtures/dbt/sushi_test"]) + today = datetime.now() + + # select both custom materialiasation models with the wildcard + selector = ["sushi.custom_incremental*"] + plan_builder = sushi_context.plan_builder(select_models=selector, execution_time=today) + plan = plan_builder.build() + + assert len(plan.selected_models) == 2 + assert { + "model.sushi.custom_incremental_model", + "model.sushi.custom_incremental_with_filter", + }.issubset(plan.selected_models) + + # the model that daily (default cron) populates with data + select_daily = "SELECT * FROM sushi.custom_incremental_model ORDER BY created_at" + + # this model uses `run_started_at` as a filter (which we populate with execution time) with 2 day interval + select_filter = "SELECT * FROM sushi.custom_incremental_with_filter ORDER BY created_at" + + sushi_context.apply(plan) + result = sushi_context.engine_adapter.fetchdf(select_daily) + assert len(result) == 1 + assert {"created_at", "id"}.issubset(result.columns) + + result = sushi_context.engine_adapter.fetchdf(select_filter) + assert len(result) == 1 + assert {"created_at", "id"}.issubset(result.columns) + + # - run ONE DAY LATER + a_day_later = today + timedelta(days=1) + sushi_context.run(select_models=selector, execution_time=a_day_later) + result_after_run = sushi_context.engine_adapter.fetchdf(select_daily) + + # the new row is inserted in the normal incremental model + assert len(result_after_run) == 2 + assert {"created_at", "id"}.issubset(result_after_run.columns) + assert result_after_run["id"].is_unique + assert result_after_run["created_at"].is_unique + + # this model due to the filter shouldn't populate with any new data + result_after_run_filter = sushi_context.engine_adapter.fetchdf(select_filter) + assert len(result_after_run_filter) == 1 + assert {"created_at", "id"}.issubset(result_after_run_filter.columns) + assert result.equals(result_after_run_filter) + assert result_after_run_filter["id"].is_unique + assert result_after_run_filter["created_at"][0].date() == today.date() + + # - run TWO DAYS LATER + two_days_later = a_day_later + timedelta(days=1) + sushi_context.run(select_models=selector, execution_time=two_days_later) + result_after_run = sushi_context.engine_adapter.fetchdf(select_daily) + + # again a new row is inserted in the normal model + assert len(result_after_run) == 3 + assert {"created_at", "id"}.issubset(result_after_run.columns) + assert result_after_run["id"].is_unique + assert result_after_run["created_at"].is_unique + + # the model with the filter now should populate as well + result_after_run_filter = sushi_context.engine_adapter.fetchdf(select_filter) + assert len(result_after_run_filter) == 2 + assert {"created_at", "id"}.issubset(result_after_run_filter.columns) + assert result_after_run_filter["id"].is_unique + assert result_after_run_filter["created_at"][0].date() == today.date() + assert result_after_run_filter["created_at"][1].date() == two_days_later.date() + + # assert hooks have executed for both plan and incremental runs + hook_result = sushi_context.engine_adapter.fetchdf("SELECT * FROM hook_table ORDER BY id") + assert len(hook_result) == 3 + hook_result["id"][0] == 1 + assert hook_result["id"].is_monotonic_increasing + assert hook_result["updated_at"].is_unique + assert int(hook_result["length_col"][1]) >= 519 + assert not hook_result["length_col"].is_unique + + def test_model_kind(): context = DbtContext() context.project_name = "Test" @@ -168,6 +292,32 @@ def test_model_kind(): on_additive_change=OnAdditiveChange.ALLOW, ) + check_cols_with_cast = ModelConfig( + materialized=Materialization.SNAPSHOT, + unique_key=["id"], + strategy="check", + check_cols=["created_at::TIMESTAMPTZ"], + ).model_kind(context) + assert isinstance(check_cols_with_cast, SCDType2ByColumnKind) + assert check_cols_with_cast.execution_time_as_valid_from is True + assert len(check_cols_with_cast.columns) == 1 + assert isinstance(check_cols_with_cast.columns[0], exp.Cast) + assert check_cols_with_cast.columns[0].sql() == 'CAST("created_at" AS TIMESTAMPTZ)' + + check_cols_multiple_expr = ModelConfig( + materialized=Materialization.SNAPSHOT, + unique_key=["id"], + strategy="check", + check_cols=["created_at::TIMESTAMPTZ", "COALESCE(status, 'active')"], + ).model_kind(context) + assert isinstance(check_cols_multiple_expr, SCDType2ByColumnKind) + assert len(check_cols_multiple_expr.columns) == 2 + assert isinstance(check_cols_multiple_expr.columns[0], exp.Cast) + assert isinstance(check_cols_multiple_expr.columns[1], exp.Coalesce) + + assert check_cols_multiple_expr.columns[0].sql() == 'CAST("created_at" AS TIMESTAMPTZ)' + assert check_cols_multiple_expr.columns[1].sql() == "COALESCE(\"status\", 'active')" + assert ModelConfig(materialized=Materialization.INCREMENTAL, time_column="foo").model_kind( context ) == IncrementalByTimeRangeKind( @@ -529,6 +679,23 @@ def test_model_kind(): == ManagedKind() ) + assert ModelConfig( + materialized=Materialization.SNAPSHOT, + unique_key=["id"], + updated_at="updated_at::timestamp", + strategy="timestamp", + dialect="redshift", + ).model_kind(context) == SCDType2ByTimeKind( + unique_key=["id"], + valid_from_name="dbt_valid_from", + valid_to_name="dbt_valid_to", + updated_at_as_valid_from=True, + updated_at_name="updated_at", + dialect="redshift", + on_destructive_change=OnDestructiveChange.IGNORE, + on_additive_change=OnAdditiveChange.ALLOW, + ) + def test_model_kind_snapshot_bigquery(): context = DbtContext() @@ -700,6 +867,28 @@ def test_seed_column_types(): sqlmesh_seed = seed.to_sqlmesh(context) assert sqlmesh_seed.columns_to_types == expected_column_types + seed = SeedConfig( + name="foo", + package="package", + path=Path("examples/sushi_dbt/seeds/waiter_names.csv"), + column_types={ + "id": "TEXT", + "name": "TEXT NOT NULL", + }, + quote_columns=True, + ) + + expected_column_types = { + "id": exp.DataType.build("text"), + "name": exp.DataType.build("text"), + } + + logger = logging.getLogger("sqlmesh.dbt.column") + with patch.object(logger, "warning") as mock_logger: + sqlmesh_seed = seed.to_sqlmesh(context) + assert "Ignoring unsupported constraints" in mock_logger.call_args[0][0] + assert sqlmesh_seed.columns_to_types == expected_column_types + def test_seed_column_inference(tmp_path): seed_csv = tmp_path / "seed.csv" @@ -1023,8 +1212,41 @@ def test_target_jinja(sushi_test_project: Project): user="user", password="password", warehouse="warehouse", + role="role", + threads=1, ) + assert context.render("{{ target.threads }}") == "1" + assert context.render("{{ target.database }}") == "test" assert context.render("{{ target.warehouse }}") == "warehouse" + assert context.render("{{ target.user }}") == "user" + assert context.render("{{ target.role }}") == "role" + assert context.render("{{ target.account }}") == "account" + + context = DbtContext() + context._target = PostgresConfig( + name="target", + schema="test", + database="test", + dbname="test", + host="host", + port=5432, + user="user", + password="password", + ) + assert context.render("{{ target.dbname }}") == "test" + assert context.render("{{ target.host }}") == "host" + assert context.render("{{ target.port }}") == "5432" + + context = DbtContext() + context._target = BigQueryConfig( + name="target", + schema="test_value", + database="test_project", + ) + assert context.render("{{ target.project }}") == "test_project" + assert context.render("{{ target.database }}") == "test_project" + assert context.render("{{ target.schema }}") == "test_value" + assert context.render("{{ target.dataset }}") == "test_value" @pytest.mark.xdist_group("dbt_manifest") @@ -1418,6 +1640,29 @@ def test_exceptions(sushi_test_project: Project): context.render('{{ exceptions.raise_compiler_error("Error") }}') +@pytest.mark.xdist_group("dbt_manifest") +def test_try_or_compiler_error(sushi_test_project: Project): + context = sushi_test_project.context + + result = context.render( + '{{ try_or_compiler_error("Error message", modules.datetime.datetime.strptime, "2023-01-15", "%Y-%m-%d") }}' + ) + assert "2023-01-15" in result + + with pytest.raises(CompilationError, match="Invalid date format"): + context.render( + '{{ try_or_compiler_error("Invalid date format", modules.datetime.datetime.strptime, "invalid", "%Y-%m-%d") }}' + ) + + # built-in macro calling try_or_compiler_error works + result = context.render( + '{{ dbt.dates_in_range("2023-01-01", "2023-01-03", "%Y-%m-%d", "%Y-%m-%d") }}' + ) + assert "2023-01-01" in result + assert "2023-01-02" in result + assert "2023-01-03" in result + + @pytest.mark.xdist_group("dbt_manifest") def test_modules(sushi_test_project: Project): context = sushi_test_project.context @@ -1506,12 +1751,10 @@ def test_as_filters(sushi_test_project: Project): context = sushi_test_project.context assert context.render("{{ True | as_bool }}") == "True" - with pytest.raises(MacroEvalError, match="Failed to convert 'invalid' into boolean."): - context.render("{{ 'invalid' | as_bool }}") + assert context.render("{{ 'valid' | as_bool }}") == "valid" assert context.render("{{ 123 | as_number }}") == "123" - with pytest.raises(MacroEvalError, match="Failed to convert 'invalid' into number."): - context.render("{{ 'invalid' | as_number }}") + assert context.render("{{ 'valid' | as_number }}") == "valid" assert context.render("{{ None | as_text }}") == "" @@ -1640,7 +1883,7 @@ def test_parsetime_adapter_call( @pytest.mark.xdist_group("dbt_manifest") -def test_partition_by(sushi_test_project: Project): +def test_partition_by(sushi_test_project: Project, caplog): context = sushi_test_project.context context.target = BigQueryConfig(name="production", database="main", schema="sushi") model_config = ModelConfig( @@ -1683,6 +1926,15 @@ def test_partition_by(sushi_test_project: Project): context.target = DuckDbConfig(name="target", schema="foo") assert model_config.to_sqlmesh(context).partitioned_by == [] + context.target = SnowflakeConfig( + name="target", schema="test", database="test", account="foo", user="bar", password="baz" + ) + assert model_config.to_sqlmesh(context).partitioned_by == [] + assert ( + "Ignoring partition_by config for model 'model' targeting snowflake. The partition_by config is not supported for Snowflake." + in caplog.text + ) + model_config = ModelConfig( name="model", alias="model", @@ -1695,6 +1947,29 @@ def test_partition_by(sushi_test_project: Project): ) assert model_config.to_sqlmesh(context).partitioned_by == [] + model_config = ModelConfig( + name="model", + alias="model", + schema="test", + package_name="package", + materialized=Materialization.EPHEMERAL.value, + unique_key="ds", + partition_by={"field": "ds", "granularity": "month"}, + sql="""SELECT 1 AS one, ds FROM foo""", + ) + assert model_config.to_sqlmesh(context).partitioned_by == [] + + with pytest.raises(ConfigError, match="Unexpected data_type 'string' in partition_by"): + ModelConfig( + name="model", + alias="model", + schema="test", + package_name="package", + materialized="table", + partition_by={"field": "ds", "data_type": "string"}, + sql="""SELECT 1 AS one, ds FROM foo""", + ) + @pytest.mark.xdist_group("dbt_manifest") def test_partition_by_none(sushi_test_project: Project): @@ -1938,7 +2213,7 @@ def test_clickhouse_properties(mocker: MockerFixture): ] assert [e.sql("clickhouse") for e in model_to_sqlmesh.partitioned_by] == [ - 'toMonday("ds")', + "dateTrunc('WEEK', \"ds\")", '"partition_col"', ] assert model_to_sqlmesh.storage_format == "MergeTree()" @@ -1965,8 +2240,9 @@ def test_snapshot_json_payload(): assert snapshot_json["node"]["jinja_macros"]["global_objs"]["target"] == { "type": "duckdb", "name": "in_memory", - "schema": "sushi", "database": "memory", + "schema": "sushi", + "threads": 1, "target_name": "in_memory", } @@ -2074,6 +2350,71 @@ def test_model_cluster_by(): ) assert model.to_sqlmesh(context).clustered_by == [] + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by=["Bar", "qux"], + sql="SELECT * FROM baz", + materialized=Materialization.EPHEMERAL.value, + ) + assert model.to_sqlmesh(context).clustered_by == [] + + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by="Bar, qux", + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.to_column('"BAR"'), + exp.to_column('"QUX"'), + ] + + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by=['"Bar,qux"'], + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.to_column('"Bar,qux"'), + ] + + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by='"Bar,qux"', + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.to_column('"Bar,qux"'), + ] + + model = ModelConfig( + name="model", + alias="model", + package_name="package", + target_schema="test", + cluster_by=["to_date(Bar),qux"], + sql="SELECT * FROM baz", + materialized=Materialization.TABLE.value, + ) + assert model.to_sqlmesh(context).clustered_by == [ + exp.TsOrDsToDate(this=exp.to_column('"BAR"')), + exp.to_column('"QUX"'), + ] + def test_snowflake_dynamic_table(): context = DbtContext() @@ -2481,3 +2822,208 @@ def test_selected_resources_context_variable( result = context.render(test_condition, selected_resources=selected_resources) assert result.strip() == "has_resources" + + +def test_ignore_source_depends_on_when_also_model(dbt_dummy_postgres_config: PostgresConfig): + context = DbtContext() + context._target = dbt_dummy_postgres_config + + source_a = SourceConfig( + name="source_a", + fqn=["package", "schema", "model_a"], + ) + source_a._canonical_name = "schema.source_a" + source_b = SourceConfig( + name="source_b", + fqn=["package", "schema", "source_b"], + ) + source_b._canonical_name = "schema.source_b" + context.sources = {"source_a": source_a, "source_b": source_b} + + model = ModelConfig( + dependencies=Dependencies(sources={"source_a", "source_b"}), + fqn=["package", "schema", "test_model"], + ) + context.models = { + "test_model": model, + "model_a": ModelConfig(name="model_a", fqn=["package", "schema", "model_a"]), + } + + assert model.sqlmesh_model_kwargs(context)["depends_on"] == {"schema.source_b"} + + +@pytest.mark.xdist_group("dbt_manifest") +def test_dbt_hooks_with_transaction_flag(sushi_test_dbt_context: Context): + model_fqn = '"memory"."sushi"."model_with_transaction_hooks"' + assert model_fqn in sushi_test_dbt_context.models + + model = sushi_test_dbt_context.models[model_fqn] + + pre_statements = model.pre_statements_ + assert pre_statements is not None + assert len(pre_statements) >= 3 + + # we need to check the expected SQL but more importantly that the transaction flags are there + assert any( + s.sql == 'JINJA_STATEMENT_BEGIN;\n{{ log("pre-hook") }}\nJINJA_END;' + and s.transaction is True + for s in pre_statements + ) + assert any( + "CREATE TABLE IF NOT EXISTS hook_outside_pre_table" in s.sql and s.transaction is False + for s in pre_statements + ) + assert any( + "CREATE TABLE IF NOT EXISTS shared_hook_table" in s.sql and s.transaction is False + for s in pre_statements + ) + assert any( + "{{ insert_into_shared_hook_table('inside_pre') }}" in s.sql and s.transaction is True + for s in pre_statements + ) + + post_statements = model.post_statements_ + assert post_statements is not None + assert len(post_statements) >= 4 + assert any( + s.sql == 'JINJA_STATEMENT_BEGIN;\n{{ log("post-hook") }}\nJINJA_END;' + and s.transaction is True + for s in post_statements + ) + assert any( + "{{ insert_into_shared_hook_table('inside_post') }}" in s.sql and s.transaction is True + for s in post_statements + ) + assert any( + "CREATE TABLE IF NOT EXISTS hook_outside_post_table" in s.sql and s.transaction is False + for s in post_statements + ) + assert any( + "{{ insert_into_shared_hook_table('after_commit') }}" in s.sql and s.transaction is False + for s in post_statements + ) + + # render_pre_statements with inside_transaction=True should only return inserrt + inside_pre_statements = model.render_pre_statements(inside_transaction=True) + assert len(inside_pre_statements) == 1 + assert ( + inside_pre_statements[0].sql() + == """INSERT INTO "shared_hook_table" ("id", "hook_name", "execution_order", "created_at") VALUES ((SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), 'inside_pre', (SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), NOW())""" + ) + + # while for render_pre_statements with inside_transaction=False the create statements + outside_pre_statements = model.render_pre_statements(inside_transaction=False) + assert len(outside_pre_statements) == 2 + assert "CREATE" in outside_pre_statements[0].sql() + assert "hook_outside_pre_table" in outside_pre_statements[0].sql() + assert "CREATE" in outside_pre_statements[1].sql() + assert "shared_hook_table" in outside_pre_statements[1].sql() + + # similarly for post statements + inside_post_statements = model.render_post_statements(inside_transaction=True) + assert len(inside_post_statements) == 1 + assert ( + inside_post_statements[0].sql() + == """INSERT INTO "shared_hook_table" ("id", "hook_name", "execution_order", "created_at") VALUES ((SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), 'inside_post', (SELECT COALESCE(MAX("id"), 0) + 1 FROM "shared_hook_table"), NOW())""" + ) + + outside_post_statements = model.render_post_statements(inside_transaction=False) + assert len(outside_post_statements) == 2 + assert "CREATE" in outside_post_statements[0].sql() + assert "hook_outside_post_table" in outside_post_statements[0].sql() + assert "INSERT" in outside_post_statements[1].sql() + assert "shared_hook_table" in outside_post_statements[1].sql() + + +@pytest.mark.xdist_group("dbt_manifest") +def test_dbt_hooks_with_transaction_flag_execution(sushi_test_dbt_context: Context): + model_fqn = '"memory"."sushi"."model_with_transaction_hooks"' + assert model_fqn in sushi_test_dbt_context.models + + plan = sushi_test_dbt_context.plan(select_models=["sushi.model_with_transaction_hooks"]) + sushi_test_dbt_context.apply(plan) + + result = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM sushi.model_with_transaction_hooks" + ) + assert len(result) == 1 + assert result["id"][0] == 1 + assert result["name"][0] == "test" + + # ensure the outside pre-hook and post-hook table were created + pre_outside = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM hook_outside_pre_table" + ) + assert len(pre_outside) == 1 + assert pre_outside["id"][0] == 1 + assert pre_outside["location"][0] == "outside" + assert pre_outside["execution_order"][0] == 1 + + post_outside = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM hook_outside_post_table" + ) + assert len(post_outside) == 1 + assert post_outside["id"][0] == 5 + assert post_outside["location"][0] == "outside" + assert post_outside["execution_order"][0] == 5 + + # verify the shared table that was created by before_begin and populated by all hooks + shared_table = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM shared_hook_table ORDER BY execution_order" + ) + assert len(shared_table) == 3 + assert shared_table["execution_order"].is_monotonic_increasing + + # The order of creation and insertion will verify the following order of execution + # 1. before_begin (transaction=false) ran BEFORE the transaction started and created the table + # 2. inside_pre (transaction=true) ran INSIDE the transaction and could insert into the table + # 3. inside_post (transaction=true) ran INSIDE the transaction and could insert into the table (but after pre statement) + # 4. after_commit (transaction=false) ran AFTER the transaction committed + + assert shared_table["id"][0] == 1 + assert shared_table["hook_name"][0] == "inside_pre" + assert shared_table["execution_order"][0] == 1 + + assert shared_table["id"][1] == 2 + assert shared_table["hook_name"][1] == "inside_post" + assert shared_table["execution_order"][1] == 2 + + assert shared_table["id"][2] == 3 + assert shared_table["hook_name"][2] == "after_commit" + assert shared_table["execution_order"][2] == 3 + + # the timestamps also should be monotonically increasing for the same reason + for i in range(len(shared_table) - 1): + assert shared_table["created_at"][i] <= shared_table["created_at"][i + 1] + + # the tables using the alternate syntax should have correct order as well + assert pre_outside["created_at"][0] < shared_table["created_at"][0] + assert post_outside["created_at"][0] > shared_table["created_at"][1] + + # running with execution time one day in the future to simulate a run + tomorrow = datetime.now() + timedelta(days=1) + sushi_test_dbt_context.run( + select_models=["sushi.model_with_transaction_hooks"], execution_time=tomorrow + ) + + # to verify that the transaction information persists in state and is respected + shared_table = sushi_test_dbt_context.engine_adapter.fetchdf( + "SELECT * FROM shared_hook_table ORDER BY execution_order" + ) + + # and the execution order for run is similar + assert shared_table["execution_order"].is_monotonic_increasing + assert shared_table["id"][3] == 4 + assert shared_table["hook_name"][3] == "inside_pre" + assert shared_table["execution_order"][3] == 4 + + assert shared_table["id"][4] == 5 + assert shared_table["hook_name"][4] == "inside_post" + assert shared_table["execution_order"][4] == 5 + + assert shared_table["id"][5] == 6 + assert shared_table["hook_name"][5] == "after_commit" + assert shared_table["execution_order"][5] == 6 + + for i in range(len(shared_table) - 1): + assert shared_table["created_at"][i] <= shared_table["created_at"][i + 1] diff --git a/tests/engines/spark/conftest.py b/tests/engines/spark/conftest.py index 933bc7870f..ce6a99ea35 100644 --- a/tests/engines/spark/conftest.py +++ b/tests/engines/spark/conftest.py @@ -9,6 +9,7 @@ def spark_session() -> t.Generator[SparkSession, None, None]: session = ( SparkSession.builder.master("local") .appName("SQLMesh Test") + .config("spark.driver.memory", "512m") .enableHiveSupport() .getOrCreate() ) diff --git a/tests/fixtures/dbt/empty_project/dbt_project.yml b/tests/fixtures/dbt/empty_project/dbt_project.yml new file mode 100644 index 0000000000..dab3d1e0e8 --- /dev/null +++ b/tests/fixtures/dbt/empty_project/dbt_project.yml @@ -0,0 +1,18 @@ +name: 'empty_project' + +version: '1.0.0' +config-version: 2 + +profile: 'empty_project' + +model-paths: ["models"] +seed-paths: ["seeds"] +test-paths: ["tests"] +analysis-paths: ["analysis"] +macro-paths: ["macros"] + +target-path: "target" +clean-targets: + - "target" + - "dbt_modules" + - "logs" \ No newline at end of file diff --git a/tests/fixtures/dbt/empty_project/profiles.yml b/tests/fixtures/dbt/empty_project/profiles.yml new file mode 100644 index 0000000000..712456bffe --- /dev/null +++ b/tests/fixtures/dbt/empty_project/profiles.yml @@ -0,0 +1,13 @@ +empty_project: + + target: __DEFAULT_TARGET__ + + outputs: + __DEFAULT_TARGET__: + type: duckdb + # database is required for dbt < 1.5 where our adapter deliberately doesnt infer the database from the path and + # defaults it to "main", which raises a "project catalog doesnt match context catalog" error + # ref: https://github.com/SQLMesh/sqlmesh/pull/1109 + database: empty_project + path: 'empty_project.duckdb' + threads: 4 diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/dbt_project.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/dbt_project.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/dbt_project.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/dbt_project.yml diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/customers.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/customers.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/customers.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/customers.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/docs.md b/tests/fixtures/dbt/jaffle_shop_duckdb/models/docs.md similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/docs.md rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/docs.md diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/orders.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/orders.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/orders.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/orders.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/overview.md b/tests/fixtures/dbt/jaffle_shop_duckdb/models/overview.md similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/overview.md rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/overview.md diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/schema.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/models/schema.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/schema.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/schema.yml diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/schema.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/schema.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/schema.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/schema.yml diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_customers.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_customers.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_customers.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_customers.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_orders.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_orders.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_orders.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_orders.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_payments.sql b/tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_payments.sql similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/models/staging/stg_payments.sql rename to tests/fixtures/dbt/jaffle_shop_duckdb/models/staging/stg_payments.sql diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/profiles.yml b/tests/fixtures/dbt/jaffle_shop_duckdb/profiles.yml similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/profiles.yml rename to tests/fixtures/dbt/jaffle_shop_duckdb/profiles.yml diff --git a/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/.gitkeep b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_customers.csv b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_customers.csv similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_customers.csv rename to tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_customers.csv diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_orders.csv b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_orders.csv similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_orders.csv rename to tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_orders.csv diff --git a/tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_payments.csv b/tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_payments.csv similarity index 100% rename from tests/dbt/cli/fixtures/jaffle_shop_duckdb/seeds/raw_payments.csv rename to tests/fixtures/dbt/jaffle_shop_duckdb/seeds/raw_payments.csv diff --git a/tests/fixtures/dbt/sushi_test/config.py b/tests/fixtures/dbt/sushi_test/config.py index 83118b02cf..a68b3e2333 100644 --- a/tests/fixtures/dbt/sushi_test/config.py +++ b/tests/fixtures/dbt/sushi_test/config.py @@ -11,6 +11,16 @@ test_config = config + +test_config_with_var_override = sqlmesh_config( + Path(__file__).parent, + model_defaults=ModelDefaultsConfig(dialect="duckdb", start="Jan 1 2022"), + variables={ + "some_var": "overridden_from_config_py", + }, +) + + test_config_with_normalization_strategy = sqlmesh_config( Path(__file__).parent, model_defaults=ModelDefaultsConfig( diff --git a/tests/fixtures/dbt/sushi_test/dbt_project.yml b/tests/fixtures/dbt/sushi_test/dbt_project.yml index 2a25389e43..0b5f6b0f83 100644 --- a/tests/fixtures/dbt/sushi_test/dbt_project.yml +++ b/tests/fixtures/dbt/sushi_test/dbt_project.yml @@ -50,6 +50,7 @@ vars: yet_another_var: 1 dynamic_test_var: 3 + some_var: 'should be overridden in customers package' customers: some_var: ["foo", "bar"] @@ -65,6 +66,9 @@ vars: - name: 'item2' value: 2 + # Despite this being an invalid variable definition, dbt doesn't mind if it's unused + invalid_var: "{{ ref('ref_without_closing_paren' }}" + on-run-start: - 'CREATE TABLE IF NOT EXISTS analytic_stats (physical_table VARCHAR, evaluation_time VARCHAR);' @@ -74,4 +78,4 @@ on-run-start: on-run-end: - '{{ create_tables(schemas) }}' - 'DROP TABLE to_be_executed_last;' - - '{{ graph_usage() }}' \ No newline at end of file + - '{{ graph_usage() }}' diff --git a/tests/fixtures/dbt/sushi_test/macros/distinct.sql b/tests/fixtures/dbt/sushi_test/macros/distinct.sql new file mode 100644 index 0000000000..1b339a9349 --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/macros/distinct.sql @@ -0,0 +1 @@ +{% macro default__select_distinct() %}distinct{% endmacro %} diff --git a/tests/fixtures/dbt/sushi_test/macros/insert_hook.sql b/tests/fixtures/dbt/sushi_test/macros/insert_hook.sql new file mode 100644 index 0000000000..aa27a7fe6d --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/macros/insert_hook.sql @@ -0,0 +1,14 @@ +{% macro insert_into_shared_hook_table(hook_name) %} +INSERT INTO shared_hook_table ( + id, + hook_name, + execution_order, + created_at +) +VALUES ( + (SELECT COALESCE(MAX(id), 0) + 1 FROM shared_hook_table), + '{{ hook_name }}', + (SELECT COALESCE(MAX(id), 0) + 1 FROM shared_hook_table), + NOW() +) +{% endmacro %} diff --git a/tests/fixtures/dbt/sushi_test/macros/materializations/custom_incremental.sql b/tests/fixtures/dbt/sushi_test/macros/materializations/custom_incremental.sql new file mode 100644 index 0000000000..c61899c8ff --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/macros/materializations/custom_incremental.sql @@ -0,0 +1,61 @@ +{%- macro build_incremental_filter_sql(sql, time_column, existing_relation, interval_config) -%} + {# macro to build the filter and also test use of macro inside materialisation #} + WITH source_data AS ( + {{ sql }} + ) + SELECT * FROM source_data + WHERE {{ time_column }} >= ( + SELECT COALESCE(MAX({{ time_column }}), '1900-01-01') + {%- if interval_config %} + INTERVAL {{ interval_config }} {%- endif %} + FROM {{ existing_relation }} + ) +{%- endmacro -%} + +{%- materialization custom_incremental, default -%} + {%- set existing_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set new_relation = api.Relation.create(database=database, schema=schema, identifier=identifier) -%} + {%- set temp_relation = make_temp_relation(new_relation) -%} + + {%- set time_column = config.get('time_column') -%} + {%- set interval_config = config.get('interval') -%} + + {{ run_hooks(pre_hooks) }} + + {%- if existing_relation is none -%} + {# The first insert creates new table if it doesn't exist #} + {%- call statement('main') -%} + CREATE TABLE {{ new_relation }} + AS {{ sql }} + {%- endcall -%} + {%- else -%} + {# Incremental load, appending new data with optional time filtering #} + {%- if time_column is not none -%} + {%- set filtered_sql -%} + {{ build_incremental_filter_sql(sql, time_column, existing_relation, interval_config) }} + {%- endset -%} + {%- else -%} + {%- set filtered_sql = sql -%} + {%- endif -%} + + {{log(filtered_sql, info=true)}} + + {%- call statement('create_temp') -%} + {{ create_table_as(True, temp_relation, filtered_sql) }} + CREATE TABLE {{ temp_relation }} + AS {{ filtered_sql }} + {%- endcall -%} + + {%- call statement('insert') -%} + INSERT INTO {{ new_relation }} + SELECT * FROM {{ temp_relation }} + {%- endcall -%} + + {%- call statement('drop_temp') -%} + DROP TABLE {{ temp_relation }} + {%- endcall -%} + {%- endif -%} + + {{ run_hooks(post_hooks) }} + + {{ return({'relations': [new_relation]}) }} +{%- endmaterialization -%} diff --git a/tests/fixtures/dbt/sushi_test/models/custom_incremental_model.sql b/tests/fixtures/dbt/sushi_test/models/custom_incremental_model.sql new file mode 100644 index 0000000000..c7e9a8f7ea --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/models/custom_incremental_model.sql @@ -0,0 +1,20 @@ +{{ config( + materialized='custom_incremental', + pre_hook=[ + "CREATE TABLE IF NOT EXISTS hook_table (id INTEGER, length_col TEXT, updated_at TIMESTAMP)" + ], + post_hook=[ + """ + INSERT INTO hook_table + SELECT + COALESCE(MAX(id), 0) + 1 AS id, + '{{ model.raw_code | length }}' AS length_col, + CURRENT_TIMESTAMP AS updated_at + FROM hook_table + """ + ] +) }} + +SELECT + current_timestamp as created_at, + hash(current_timestamp) as id, \ No newline at end of file diff --git a/tests/fixtures/dbt/sushi_test/models/custom_incremental_with_filter.sql b/tests/fixtures/dbt/sushi_test/models/custom_incremental_with_filter.sql new file mode 100644 index 0000000000..94cbdc9333 --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/models/custom_incremental_with_filter.sql @@ -0,0 +1,9 @@ +{{ config( + materialized='custom_incremental', + time_column='created_at', + interval='2 day' +) }} + +SELECT + CAST('{{ run_started_at }}' AS TIMESTAMP) as created_at, + hash('{{ run_started_at }}') as id, \ No newline at end of file diff --git a/tests/fixtures/dbt/sushi_test/models/model_with_raw_code.sql b/tests/fixtures/dbt/sushi_test/models/model_with_raw_code.sql index 386e7f40ef..1424f6e970 100644 --- a/tests/fixtures/dbt/sushi_test/models/model_with_raw_code.sql +++ b/tests/fixtures/dbt/sushi_test/models/model_with_raw_code.sql @@ -1,6 +1,6 @@ {{ config( - pre_hook=['CREATE TABLE t AS SELECT \'Length is {{ model.raw_code|length }}\' AS length_col'] + pre_hook=['CREATE TABLE IF NOT EXISTS t AS SELECT \'Length is {{ model.raw_code|length }}\' AS length_col'] ) }} diff --git a/tests/fixtures/dbt/sushi_test/models/model_with_transaction_hooks.sql b/tests/fixtures/dbt/sushi_test/models/model_with_transaction_hooks.sql new file mode 100644 index 0000000000..49883f73df --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/models/model_with_transaction_hooks.sql @@ -0,0 +1,56 @@ +{{ + config( + materialized = 'table', + + pre_hook = [ + { + "sql": " + CREATE TABLE IF NOT EXISTS hook_outside_pre_table AS + SELECT + 1 AS id, + 'outside' AS location, + 1 AS execution_order, + NOW() AS created_at + ", + "transaction": false + }, + + before_begin(" + CREATE TABLE IF NOT EXISTS shared_hook_table ( + id INT, + hook_name VARCHAR, + execution_order INT, + created_at TIMESTAMPTZ + ) + "), + + { + "sql": "{{ insert_into_shared_hook_table('inside_pre') }}", + "transaction": true + } + ], + + post_hook = [ + { + "sql": "{{ insert_into_shared_hook_table('inside_post') }}", + "transaction": true + }, + + { + "sql": " + CREATE TABLE IF NOT EXISTS hook_outside_post_table AS + SELECT + 5 AS id, + 'outside' AS location, + 5 AS execution_order, + NOW() AS created_at + ", + "transaction": false + }, + + after_commit("{{ insert_into_shared_hook_table('after_commit') }}") + ] + ) +}} + +SELECT 1 AS id, 'test' AS name; diff --git a/tests/fixtures/dbt/sushi_test/models/schema.yml b/tests/fixtures/dbt/sushi_test/models/schema.yml index 48b8b814d3..24b1d4b3ee 100644 --- a/tests/fixtures/dbt/sushi_test/models/schema.yml +++ b/tests/fixtures/dbt/sushi_test/models/schema.yml @@ -1,6 +1,55 @@ version: 2 models: + - name: simple_model_a + description: A simple model for testing + columns: + - name: a + data_type: int + unit_tests: + - name: test_simple_model_a_outputs_one + description: Test that simple_model_a outputs 1 as column a + model: simple_model_a + given: [] # No input models needed + expect: + format: csv + rows: | + a + 1 + - name: simple_model_b + description: Model that references simple_model_a + columns: + - name: a + data_type: int + unit_tests: + - name: test_simple_model_b_with_mock_input + description: Test simple_model_b with mocked simple_model_a input + model: simple_model_b + given: + - input: ref('simple_model_a') + format: csv + rows: | + a + 10 + 20 + 30 + expect: + format: csv + rows: | + a + 10 + 20 + 30 + - name: test_simple_model_b_with_sql_input + description: Test simple_model_b with SQL-defined input data + model: simple_model_b + given: + - input: ref('simple_model_a') + format: sql + rows: SELECT 42 AS a + expect: + format: sql + rows: SELECT 42 AS a - name: top_waiters description: description of top waiters columns: @@ -20,6 +69,12 @@ models: error_after: {count: 9, period: hour} - name: waiters description: '{{ doc("waiters") }}' + config: + # Exercise pre and post hooks + pre_hook: + - SELECT 1 + post_hook: + - SELECT 1 - name: waiter_as_customer_by_day - name: waiter_revenue_by_day versions: @@ -36,8 +91,14 @@ sources: schema: raw tables: - name: items + config: + meta: - name: orders + config: + meta: - name: order_items + config: + meta: freshness: warn_after: {count: 10, period: hour} error_after: {count: 11, period: hour} @@ -71,4 +132,4 @@ metrics: type: simple label: testing type_params: - measure: total_waiters \ No newline at end of file + measure: total_waiters diff --git a/tests/fixtures/dbt/sushi_test/profiles.yml b/tests/fixtures/dbt/sushi_test/profiles.yml index 056c3c2b91..f49ad8ea0f 100644 --- a/tests/fixtures/dbt/sushi_test/profiles.yml +++ b/tests/fixtures/dbt/sushi_test/profiles.yml @@ -3,6 +3,7 @@ sushi: in_memory: type: duckdb schema: sushi + database: memory duckdb: type: duckdb path: 'local.duckdb' diff --git a/tests/fixtures/dbt/sushi_test/snapshots/items_check_snapshot.sql b/tests/fixtures/dbt/sushi_test/snapshots/items_check_snapshot.sql deleted file mode 100644 index fdda412e7f..0000000000 --- a/tests/fixtures/dbt/sushi_test/snapshots/items_check_snapshot.sql +++ /dev/null @@ -1,15 +0,0 @@ -{% snapshot items_check_snapshot %} - -{{ - config( - target_schema='snapshots', - unique_key='id', - strategy='check', - check_cols=['ds'], - invalidate_hard_deletes=True, - ) -}} - -select * from {{ source('streaming', 'items') }} - -{% endsnapshot %} diff --git a/tests/fixtures/dbt/sushi_test/snapshots/items_snapshot.sql b/tests/fixtures/dbt/sushi_test/snapshots/items_snapshot.sql deleted file mode 100644 index c5c922d217..0000000000 --- a/tests/fixtures/dbt/sushi_test/snapshots/items_snapshot.sql +++ /dev/null @@ -1,16 +0,0 @@ -{% snapshot items_snapshot %} - -{{ - config( - target_schema='snapshots', - unique_key='id', - strategy='timestamp', - updated_at='ds', - invalidate_hard_deletes=True, - on_schema_change='sync_all_columns', - ) -}} - -select * from {{ source('streaming', 'items') }} - -{% endsnapshot %} diff --git a/tests/fixtures/dbt/sushi_test/snapshots/items_snapshots.sql b/tests/fixtures/dbt/sushi_test/snapshots/items_snapshots.sql new file mode 100644 index 0000000000..fbce585edf --- /dev/null +++ b/tests/fixtures/dbt/sushi_test/snapshots/items_snapshots.sql @@ -0,0 +1,48 @@ +{% snapshot items_snapshot %} + +{{ + config( + target_schema='snapshots', + unique_key='id', + strategy='timestamp', + updated_at='ds', + invalidate_hard_deletes=True, + on_schema_change='sync_all_columns', + ) +}} + +select * from {{ source('streaming', 'items') }} + +{% endsnapshot %} + +{% snapshot items_check_snapshot %} + +{{ + config( + target_schema='snapshots', + unique_key='id', + strategy='check', + check_cols=['ds'], + invalidate_hard_deletes=True, + ) +}} + +select * from {{ source('streaming', 'items') }} + +{% endsnapshot %} + +{% snapshot items_check_with_cast_snapshot %} + +{{ + config( + target_schema='snapshots', + unique_key='id', + strategy='check', + check_cols=['ds::DATE'], + invalidate_hard_deletes=True, + ) +}} + +select * from {{ source('streaming', 'items') }} + +{% endsnapshot %} diff --git a/tests/integrations/github/cicd/test_github_controller.py b/tests/integrations/github/cicd/test_github_controller.py index a27f75f459..e4fe10e321 100644 --- a/tests/integrations/github/cicd/test_github_controller.py +++ b/tests/integrations/github/cicd/test_github_controller.py @@ -15,6 +15,7 @@ from sqlmesh.core.model import SqlModel from sqlmesh.core.user import User, UserRole from sqlmesh.core.plan.definition import Plan +from sqlmesh.core.linter.rule import RuleViolation from sqlmesh.integrations.github.cicd.config import GithubCICDBotConfig, MergeMethod from sqlmesh.integrations.github.cicd.controller import ( BotCommand, @@ -29,6 +30,29 @@ pytestmark = pytest.mark.github + +def add_linter_violations(controller: GithubController): + class _MockModel: + _path = "tests/linter_test.sql" + + class _MockLinterRule: + name = "mock_linter_rule" + + controller._console.show_linter_violations( + [ + RuleViolation( + rule=_MockLinterRule(), violation_msg="Linter warning", violation_range=None + ) + ], + _MockModel(), + ) + controller._console.show_linter_violations( + [RuleViolation(rule=_MockLinterRule(), violation_msg="Linter error", violation_range=None)], + _MockModel(), + is_error=True, + ) + + github_controller_approvers_params = [ ( "2 approvers, 1 required", @@ -315,7 +339,8 @@ def test_prod_plan_with_gaps(github_client, make_controller): assert controller.prod_plan_with_gaps.environment.name == c.PROD assert not controller.prod_plan_with_gaps.skip_backfill - assert not controller._prod_plan_with_gaps_builder._auto_categorization_enabled + # auto_categorization should now be enabled to prevent uncategorized snapshot errors + assert controller._prod_plan_with_gaps_builder._auto_categorization_enabled assert not controller.prod_plan_with_gaps.no_gaps assert not controller._context.apply.called assert controller._context._run_plan_tests.call_args == call(skip_tests=True) @@ -436,6 +461,33 @@ def test_deploy_to_prod_merge_error(github_client, make_controller): controller.deploy_to_prod() +def test_deploy_to_prod_blocked_pr(github_client, make_controller): + mock_pull_request = github_client.get_repo().get_pull() + mock_pull_request.merged = False + controller = make_controller( + "tests/fixtures/github/pull_request_synchronized.json", + github_client, + merge_state_status=MergeStateStatus.BLOCKED, + ) + with pytest.raises( + Exception, + match=r"^Branch protection or ruleset requirement is likely not satisfied, e.g. missing CODEOWNERS approval.*", + ): + controller.deploy_to_prod() + + +def test_deploy_to_prod_not_blocked_pr_if_config_set(github_client, make_controller): + mock_pull_request = github_client.get_repo().get_pull() + mock_pull_request.merged = False + controller = make_controller( + "tests/fixtures/github/pull_request_synchronized.json", + github_client, + merge_state_status=MergeStateStatus.BLOCKED, + bot_config=GithubCICDBotConfig(check_if_blocked_on_deploy_to_prod=False), + ) + controller.deploy_to_prod() + + def test_deploy_to_prod_dirty_pr(github_client, make_controller): mock_pull_request = github_client.get_repo().get_pull() mock_pull_request.merged = False @@ -444,7 +496,10 @@ def test_deploy_to_prod_dirty_pr(github_client, make_controller): github_client, merge_state_status=MergeStateStatus.DIRTY, ) - with pytest.raises(Exception, match=r"^Merge commit cannot be cleanly created.*"): + with pytest.raises( + Exception, + match=r"^Merge commit cannot be cleanly created. Likely from a merge conflict.*", + ): controller.deploy_to_prod() @@ -660,12 +715,18 @@ def test_get_plan_summary_includes_warnings_and_errors( controller._console.log_warning("Warning 1\nWith multiline") controller._console.log_warning("Warning 2") controller._console.log_error("Error 1") + add_linter_violations(controller) summary = controller.get_plan_summary(controller.prod_plan) - assert ("> [!WARNING]\n>\n> - Warning 1\n> With multiline\n>\n> - Warning 2\n\n") in summary - - assert ("> [!CAUTION]\n>\n> Error 1\n\n") in summary + assert ("> [!WARNING]\n>\n> - Warning 1\n> With multiline\n>\n> - Warning 2\n>\n>") in summary + assert ( + "> Linter warnings for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter warning\n>" + ) in summary + assert ("> [!CAUTION]\n>\n> - Error 1\n>\n>") in summary + assert ( + "> Linter **errors** for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter error\n>" + ) in summary def test_get_pr_environment_summary_includes_warnings_and_errors( @@ -679,24 +740,39 @@ def test_get_pr_environment_summary_includes_warnings_and_errors( controller._console.log_warning("Warning 1") controller._console.log_error("Error 1") + add_linter_violations(controller) # completed with no exception triggers a SUCCESS conclusion and only shows warnings success_summary = controller.get_pr_environment_summary( conclusion=GithubCheckConclusion.SUCCESS ) - assert "> [!WARNING]\n>\n> Warning 1\n" in success_summary - assert "> [!CAUTION]\n>\n> Error 1" not in success_summary + assert "> [!WARNING]\n>\n> - Warning 1\n" in success_summary + assert ( + "> Linter warnings for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter warning\n" + in success_summary + ) + assert "Error 1" not in success_summary + assert "mock_linter_rule: Linter error" not in success_summary # since they got consumed in the previous call controller._console.log_warning("Warning 1") controller._console.log_error("Error 1") + add_linter_violations(controller) # completed with an exception triggers a FAILED conclusion and shows errors error_summary = controller.get_pr_environment_summary( conclusion=GithubCheckConclusion.FAILURE, exception=SQLMeshError("Something broke") ) - assert "> [!WARNING]\n>\n> Warning 1\n" in error_summary - assert "> [!CAUTION]\n>\n> Error 1" in error_summary + assert "> [!WARNING]\n>\n> - Warning 1\n>\n" in error_summary + assert ( + "> Linter warnings for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter warning\n" + in error_summary + ) + assert "[!CAUTION]\n>
\n>\n> - Error 1\n>\n" in error_summary + assert ( + "> Linter **errors** for `tests/linter_test.sql`:\n> - mock_linter_rule: Linter error\n" + in error_summary + ) def test_pr_comment_deploy_indicator_includes_command_namespace( diff --git a/tests/integrations/jupyter/test_magics.py b/tests/integrations/jupyter/test_magics.py index 0a39c155cf..991df8fc15 100644 --- a/tests/integrations/jupyter/test_magics.py +++ b/tests/integrations/jupyter/test_magics.py @@ -906,9 +906,6 @@ def test_destroy( "Are you ABSOLUTELY SURE you want to proceed with deletion? [y/n]:", "Environment 'prod' invalidated.", "Deleted object memory.sushi", - 'Deleted object "memory"."raw"."model1"', - 'Deleted object "memory"."raw"."model2"', - 'Deleted object "memory"."raw"."demographics"', "State tables removed.", "Destroy completed successfully.", ] diff --git a/tests/lsp/test_reference_model_column_prefix.py b/tests/lsp/test_reference_model_column_prefix.py index 3cd25a080e..082ee9c8e6 100644 --- a/tests/lsp/test_reference_model_column_prefix.py +++ b/tests/lsp/test_reference_model_column_prefix.py @@ -41,7 +41,7 @@ def test_model_reference_with_column_prefix(): model_refs = get_all_references(lsp_context, URI.from_path(sushi_customers_path), position) - assert len(model_refs) >= 7 + assert len(model_refs) >= 6 # Verify that we have the FROM clause reference assert any(ref.range.start.line == from_clause_range.start.line for ref in model_refs), ( @@ -65,8 +65,8 @@ def test_column_prefix_references_are_found(): # Find all occurrences of sushi.orders in the file ranges = find_ranges_from_regex(read_file, r"sushi\.orders") - # Should find exactly 2: FROM clause and WHERE clause with column prefix - assert len(ranges) == 2, f"Expected 2 occurrences of 'sushi.orders', found {len(ranges)}" + # Should find exactly 1 in FROM clause with column prefix + assert len(ranges) == 1, f"Expected 1 occurrence of 'sushi.orders', found {len(ranges)}" # Verify we have the expected lines line_contents = [read_file[r.start.line].strip() for r in ranges] @@ -76,11 +76,6 @@ def test_column_prefix_references_are_found(): "Should find FROM clause with sushi.orders" ) - # Should find customer_id in WHERE clause with column prefix - assert any("WHERE sushi.orders.customer_id" in content for content in line_contents), ( - "Should find WHERE clause with sushi.orders.customer_id" - ) - def test_quoted_uppercase_table_and_column_references(tmp_path: Path): # Initialize example project in temporary directory with case sensitive normalization diff --git a/tests/lsp/test_reference_model_find_all.py b/tests/lsp/test_reference_model_find_all.py index 7c0077d6cd..cd9c0a3a1c 100644 --- a/tests/lsp/test_reference_model_find_all.py +++ b/tests/lsp/test_reference_model_find_all.py @@ -30,8 +30,8 @@ def test_find_references_for_model_usages(): # Click on the model reference position = Position(line=ranges[0].start.line, character=ranges[0].start.character + 6) references = get_model_find_all_references(lsp_context, URI.from_path(customers_path), position) - assert len(references) >= 7, ( - f"Expected at least 7 references to sushi.orders (including column prefix), found {len(references)}" + assert len(references) >= 6, ( + f"Expected at least 6 references to sushi.orders (including column prefix), found {len(references)}" ) # Verify expected files are present @@ -53,7 +53,7 @@ def test_find_references_for_model_usages(): # Note: customers file has multiple references due to column prefix support expected_ranges = { "orders": [(0, 0, 0, 0)], # the start for the model itself - "customers": [(30, 7, 30, 19), (44, 6, 44, 18)], # FROM clause and WHERE clause + "customers": [(30, 7, 30, 19)], # FROM clause "waiter_revenue_by_day": [(19, 5, 19, 17)], "customer_revenue_lifetime": [(38, 7, 38, 19)], "customer_revenue_by_day": [(33, 5, 33, 17)], diff --git a/tests/pyproject.toml b/tests/pyproject.toml index 6f9cd2f9d9..73f143bfde 100644 --- a/tests/pyproject.toml +++ b/tests/pyproject.toml @@ -8,8 +8,8 @@ license = { text = "Apache License 2.0" } [project.urls] Homepage = "https://sqlmesh.com/" Documentation = "https://sqlmesh.readthedocs.io/en/stable/" -Repository = "https://github.com/TobikoData/sqlmesh" -Issues = "https://github.com/TobikoData/sqlmesh/issues" +Repository = "https://github.com/SQLMesh/sqlmesh" +Issues = "https://github.com/SQLMesh/sqlmesh/issues" [build-system] requires = ["setuptools", "setuptools_scm", "toml"] diff --git a/tests/setup.py b/tests/setup.py index d072cb555b..ab48a3128f 100644 --- a/tests/setup.py +++ b/tests/setup.py @@ -7,6 +7,8 @@ sqlmesh_pyproject = Path(__file__).parent / "sqlmesh_pyproject.toml" parsed = toml.load(sqlmesh_pyproject)["project"] install_requires = parsed["dependencies"] + parsed["optional-dependencies"]["dev"] +# remove dbt dependencies +install_requires = [req for req in install_requires if not req.startswith("dbt")] # this is just so we can have a dynamic install_requires, everything else is defined in pyproject.toml setuptools.setup(install_requires=install_requires) diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index e69de29bb2..744ad37757 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -0,0 +1,23 @@ +import pytest + +from sqlmesh.utils import sanitize_name + + +@pytest.mark.parametrize( + "raw,exclude_unicode,include_unicode", + [ + ("simple", "simple", "simple"), + ("snake_case", "snake_case", "snake_case"), + ("客户数据", "____", "客户数据"), + ("客户-数据 v2", "______v2", "客户_数据_v2"), + ("中文,逗号", "_____", "中文_逗号"), + ("a/b", "a_b", "a_b"), + ("spaces\tand\nnewlines", "spaces_and_newlines", "spaces_and_newlines"), + ("data📦2025", "data_2025", "data_2025"), + ("MiXeD123_名字", "MiXeD123___", "MiXeD123_名字"), + ("", "", ""), + ], +) +def test_sanitize_name_no_(raw, exclude_unicode, include_unicode): + assert sanitize_name(raw) == exclude_unicode + assert sanitize_name(raw, include_unicode=True) == include_unicode diff --git a/tests/utils/test_cache.py b/tests/utils/test_cache.py index cd1fdb0115..ed19765b8a 100644 --- a/tests/utils/test_cache.py +++ b/tests/utils/test_cache.py @@ -39,6 +39,7 @@ def test_file_cache(tmp_path: Path, mocker: MockerFixture): loader.assert_called_once() assert "___test_model_" in cache._cache_entry_path('"test_model"').name + assert "客户数据" in cache._cache_entry_path("客户数据").name def test_optimized_query_cache(tmp_path: Path, mocker: MockerFixture): @@ -105,7 +106,7 @@ def test_optimized_query_cache_macro_def_change(tmp_path: Path, mocker: MockerFi assert cache.with_optimized_query(model) assert ( model.render_query_or_raise().sql() - == 'SELECT "_q_0"."a" AS "a" FROM (SELECT 1 AS "a") AS "_q_0" WHERE "_q_0"."a" = 1' + == 'SELECT "_0"."a" AS "a" FROM (SELECT 1 AS "a") AS "_0" WHERE "_0"."a" = 1' ) # Change the filter_ definition @@ -128,5 +129,5 @@ def test_optimized_query_cache_macro_def_change(tmp_path: Path, mocker: MockerFi assert cache.with_optimized_query(new_model) assert ( new_model.render_query_or_raise().sql() - == 'SELECT "_q_0"."a" AS "a" FROM (SELECT 1 AS "a") AS "_q_0" WHERE "_q_0"."a" = 2' + == 'SELECT "_0"."a" AS "a" FROM (SELECT 1 AS "a") AS "_0" WHERE "_0"."a" = 2' ) diff --git a/tests/utils/test_git_client.py b/tests/utils/test_git_client.py new file mode 100644 index 0000000000..13eecf294b --- /dev/null +++ b/tests/utils/test_git_client.py @@ -0,0 +1,173 @@ +import subprocess +from pathlib import Path +import pytest +from sqlmesh.utils.git import GitClient + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + repo_path = tmp_path / "test_repo" + repo_path.mkdir() + subprocess.run(["git", "init", "-b", "main"], cwd=repo_path, check=True, capture_output=True) + return repo_path + + +def test_git_uncommitted_changes(git_repo: Path): + git_client = GitClient(git_repo) + + test_file = git_repo / "model.sql" + test_file.write_text("SELECT 1 AS a") + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + assert git_client.list_uncommitted_changed_files() == [] + + # make an unstaged change and see that it is listed + test_file.write_text("SELECT 2 AS a") + uncommitted = git_client.list_uncommitted_changed_files() + assert len(uncommitted) == 1 + assert uncommitted[0].name == "model.sql" + + # stage the change and test that it is still detected + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + uncommitted = git_client.list_uncommitted_changed_files() + assert len(uncommitted) == 1 + assert uncommitted[0].name == "model.sql" + + +def test_git_both_staged_and_unstaged_changes(git_repo: Path): + git_client = GitClient(git_repo) + + file1 = git_repo / "model1.sql" + file2 = git_repo / "model2.sql" + file1.write_text("SELECT 1") + file2.write_text("SELECT 2") + subprocess.run(["git", "add", "."], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + # stage file1 + file1.write_text("SELECT 10") + subprocess.run(["git", "add", "model1.sql"], cwd=git_repo, check=True, capture_output=True) + + # modify file2 but don't stage it! + file2.write_text("SELECT 20") + + # both should be detected + uncommitted = git_client.list_uncommitted_changed_files() + assert len(uncommitted) == 2 + file_names = {f.name for f in uncommitted} + assert file_names == {"model1.sql", "model2.sql"} + + +def test_git_untracked_files(git_repo: Path): + git_client = GitClient(git_repo) + initial_file = git_repo / "initial.sql" + initial_file.write_text("SELECT 0") + subprocess.run(["git", "add", "initial.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + new_file = git_repo / "new_model.sql" + new_file.write_text("SELECT 1") + + # untracked file should not appear in uncommitted changes + assert git_client.list_uncommitted_changed_files() == [] + + # but in untracked + untracked = git_client.list_untracked_files() + assert len(untracked) == 1 + assert untracked[0].name == "new_model.sql" + + +def test_git_committed_changes(git_repo: Path): + git_client = GitClient(git_repo) + + test_file = git_repo / "model.sql" + test_file.write_text("SELECT 1") + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + subprocess.run( + ["git", "checkout", "-b", "feature"], + cwd=git_repo, + check=True, + capture_output=True, + ) + + test_file.write_text("SELECT 2") + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Update on feature branch", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + committed = git_client.list_committed_changed_files(target_branch="main") + assert len(committed) == 1 + assert committed[0].name == "model.sql" + + assert git_client.list_uncommitted_changed_files() == [] diff --git a/tests/utils/test_helpers.py b/tests/utils/test_helpers.py index ae0742f1db..20a544512e 100644 --- a/tests/utils/test_helpers.py +++ b/tests/utils/test_helpers.py @@ -83,6 +83,7 @@ def test_wrapper(*args, **kwargs): orig_console = get_console() try: new_console = TerminalConsole() + new_console.console.width = 80 new_console.console.no_color = True set_console(new_console) func(*args, **kwargs) diff --git a/tests/utils/test_jinja.py b/tests/utils/test_jinja.py index 5eb00aeb3c..1cf7c1bf95 100644 --- a/tests/utils/test_jinja.py +++ b/tests/utils/test_jinja.py @@ -302,3 +302,30 @@ def test_dbt_adapter_macro_scope(): rendered = registry.build_environment().from_string("{{ spark__macro_a() }}").render() assert rendered.strip() == "macro_a" + + +def test_macro_registry_to_expressions_sorted(): + refs = AttributeDict( + { + "payments": { + "database": "jaffle_shop", + "schema": "main", + "nested": {"foo": "bar", "baz": "bing"}, + }, + "orders": {"schema": "main", "database": "jaffle_shop", "nested_list": ["b", "a", "c"]}, + } + ) + + registry = JinjaMacroRegistry() + registry.add_globals({"sources": {}, "refs": refs}) + + # Ensure that the AttributeDict string representation is sorted + # in order to prevent an unexpected *visual* diff in ModelDiff + # (note that the actual diff is based on the data hashes, so this is purely visual) + expressions = registry.to_expressions() + assert len(expressions) == 1 + assert ( + expressions[0].sql(dialect="duckdb") + == "refs = {'orders': {'database': 'jaffle_shop', 'nested_list': ['a', 'b', 'c'], 'schema': 'main'}, 'payments': {'database': 'jaffle_shop', 'nested': {'baz': 'bing', 'foo': 'bar'}, 'schema': 'main'}}\n" + "sources = {}" + ) diff --git a/tests/utils/test_metaprogramming.py b/tests/utils/test_metaprogramming.py index 19413f68ef..9a6f0c95cd 100644 --- a/tests/utils/test_metaprogramming.py +++ b/tests/utils/test_metaprogramming.py @@ -23,6 +23,7 @@ Executable, ExecutableKind, _dict_sort, + _resolve_import_module, build_env, func_globals, normalize_source, @@ -49,7 +50,7 @@ def test_print_exception(mocker: MockerFixture): except Exception as ex: print_exception(ex, test_env, out_mock) - expected_message = r""" File ".*?.tests.utils.test_metaprogramming\.py", line 48, in test_print_exception + expected_message = r""" File ".*?.tests.utils.test_metaprogramming\.py", line 49, in test_print_exception eval\("test_fun\(\)", env\).* File '/test/path.py' \(or imported file\), line 2, in test_fun @@ -83,7 +84,18 @@ class DataClass: x: int +class ReferencedClass: + def __init__(self, value: int): + self.value = value + + def get_value(self) -> int: + return self.value + + class MyClass: + def __init__(self, x: int): + self.helper = ReferencedClass(x * 2) + @staticmethod def foo(): return KLASS_X @@ -95,6 +107,13 @@ def bar(cls): def baz(self): return KLASS_Z + def use_referenced(self, value: int) -> int: + ref = ReferencedClass(value) + return ref.get_value() + + def compute_with_reference(self) -> int: + return self.helper.get_value() + 10 + def other_func(a: int) -> int: import sqlglot @@ -103,7 +122,8 @@ def other_func(a: int) -> int: pd.DataFrame([{"x": 1}]) to_table("y") my_lambda() # type: ignore - return X + a + W + obj = MyClass(a) + return X + a + W + obj.compute_with_reference() @contextmanager @@ -131,7 +151,7 @@ def function_with_custom_decorator(): def main_func(y: int, foo=exp.true(), *, bar=expressions.Literal.number(1) + 2) -> int: """DOC STRING""" sqlglot.parse_one("1") - MyClass() + MyClass(47) DataClass(x=y) normalize_model_name("test" + SQLGLOT_META) fetch_data() @@ -177,6 +197,7 @@ def test_func_globals() -> None: assert func_globals(other_func) == { "X": 1, "W": 0, + "MyClass": MyClass, "my_lambda": my_lambda, "pd": pd, "to_table": to_table, @@ -202,7 +223,7 @@ def test_normalize_source() -> None: == """def main_func(y: int, foo=exp.true(), *, bar=expressions.Literal.number(1) + 2 ): sqlglot.parse_one('1') - MyClass() + MyClass(47) DataClass(x=y) normalize_model_name('test' + SQLGLOT_META) fetch_data() @@ -223,7 +244,8 @@ def closure(z: int): pd.DataFrame([{'x': 1}]) to_table('y') my_lambda() - return X + a + W""" + obj = MyClass(a) + return X + a + W + obj.compute_with_reference()""" ) @@ -252,7 +274,7 @@ def test_serialize_env() -> None: payload="""def main_func(y: int, foo=exp.true(), *, bar=expressions.Literal.number(1) + 2 ): sqlglot.parse_one('1') - MyClass() + MyClass(47) DataClass(x=y) normalize_model_name('test' + SQLGLOT_META) fetch_data() @@ -295,6 +317,9 @@ class DataClass: path="test_metaprogramming.py", payload="""class MyClass: + def __init__(self, x: int): + self.helper = ReferencedClass(x * 2) + @staticmethod def foo(): return KLASS_X @@ -304,7 +329,26 @@ def bar(cls): return KLASS_Y def baz(self): - return KLASS_Z""", + return KLASS_Z + + def use_referenced(self, value: int): + ref = ReferencedClass(value) + return ref.get_value() + + def compute_with_reference(self): + return self.helper.get_value() + 10""", + ), + "ReferencedClass": Executable( + kind=ExecutableKind.DEFINITION, + name="ReferencedClass", + path="test_metaprogramming.py", + payload="""class ReferencedClass: + + def __init__(self, value: int): + self.value = value + + def get_value(self): + return self.value""", ), "dataclass": Executable( payload="from dataclasses import dataclass", kind=ExecutableKind.IMPORT @@ -341,7 +385,8 @@ def sample_context_manager(): pd.DataFrame([{'x': 1}]) to_table('y') my_lambda() - return X + a + W""", + obj = MyClass(a) + return X + a + W + obj.compute_with_reference()""", ), "sample_context_manager": Executable( payload="""@contextmanager @@ -424,6 +469,21 @@ def function_with_custom_decorator(): assert all(is_metadata for (_, is_metadata) in env.values()) assert serialized_env == expected_env + # Check that class references inside init are captured + init_globals = func_globals(MyClass.__init__) + assert "ReferencedClass" in init_globals + + env = {} + build_env(other_func, env=env, name="other_func_test", path=path) + serialized_env = serialize_env(env, path=path) + + assert "MyClass" in serialized_env + assert "ReferencedClass" in serialized_env + + prepared_env = prepare_env(serialized_env) + result = eval("other_func_test(2)", prepared_env) + assert result == 17 + def test_serialize_env_with_enum_import_appearing_in_two_functions() -> None: path = Path("tests/utils") @@ -579,3 +639,18 @@ def test_dict_sort_executable_integration(): # non-deterministic repr should not change the payload exec3 = Executable.value(variables1) assert exec3.payload == "{'env': 'dev', 'debug': True, 'timeout': 30}" + + +def test_resolve_import_module(): + """Test that _resolve_import_module finds the shallowest public re-exporting module.""" + # to_table lives in sqlglot.expressions.builders but is re-exported from sqlglot.expressions + assert _resolve_import_module(to_table, "to_table") == "sqlglot.expressions" + + # Objects whose __module__ is already the public module should be returned as-is + assert _resolve_import_module(exp.Column, "Column") == "sqlglot.expressions" + + # Objects not re-exported by any parent should return the original module + class _Local: + __module__ = "some.deep.internal.module" + + assert _resolve_import_module(_Local, "_Local") == "some.deep.internal.module" diff --git a/tests/utils/test_windows.py b/tests/utils/test_windows.py new file mode 100644 index 0000000000..196589d9c2 --- /dev/null +++ b/tests/utils/test_windows.py @@ -0,0 +1,39 @@ +import pytest +from pathlib import Path +from sqlmesh.utils.windows import IS_WINDOWS, WINDOWS_LONGPATH_PREFIX, fix_windows_path + + +@pytest.mark.skipif( + not IS_WINDOWS, reason="pathlib.Path only produces WindowsPath objects on Windows" +) +def test_fix_windows_path(): + short_path = Path("c:\\foo") + short_path_prefixed = Path(WINDOWS_LONGPATH_PREFIX + "c:\\foo") + + segments = "\\".join(["bar", "baz", "bing"] * 50) + long_path = Path("c:\\" + segments) + long_path_prefixed = Path(WINDOWS_LONGPATH_PREFIX + "c:\\" + segments) + + assert len(str(short_path.absolute)) < 260 + assert len(str(long_path.absolute)) > 260 + + # paths less than 260 chars are still prefixed because they may be being used as a base path + assert fix_windows_path(short_path) == short_path_prefixed + + # paths greater than 260 characters don't work at all without the prefix + assert fix_windows_path(long_path) == long_path_prefixed + + # multiple calls dont keep appending the same prefix + assert ( + fix_windows_path(fix_windows_path(fix_windows_path(long_path_prefixed))) + == long_path_prefixed + ) + + # paths with relative sections need to have relative sections resolved before they can be used + # since the \\?\ prefix doesnt work for paths with relative sections + assert fix_windows_path(Path("c:\\foo\\..\\bar")) == Path(WINDOWS_LONGPATH_PREFIX + "c:\\bar") + + # also check that relative sections are still resolved if they are added to a previously prefixed path + base = fix_windows_path(Path("c:\\foo")) + assert base == Path(WINDOWS_LONGPATH_PREFIX + "c:\\foo") + assert fix_windows_path(base / ".." / "bar") == Path(WINDOWS_LONGPATH_PREFIX + "c:\\bar") diff --git a/vscode/extension/README.md b/vscode/extension/README.md index 64f6c3e130..dac6d9cae6 100644 --- a/vscode/extension/README.md +++ b/vscode/extension/README.md @@ -77,8 +77,8 @@ If you encounter issues, please refer to the [VSCode Extension Guide](https://sq We welcome contributions! Please: -1. [Report bugs](https://github.com/tobikodata/sqlmesh/issues) you encounter -2. [Request features](https://github.com/tobikodata/sqlmesh/issues) you'd like to see +1. [Report bugs](https://github.com/SQLMesh/sqlmesh/issues) you encounter +2. [Request features](https://github.com/SQLMesh/sqlmesh/issues) you'd like to see 3. Share feedback on your experience ## 📄 License @@ -87,7 +87,7 @@ This extension is licensed under the Apache License 2.0. See [LICENSE](LICENSE) ## 🔗 Links -- [SQLMesh GitHub Repository](https://github.com/tobikodata/sqlmesh) +- [SQLMesh GitHub Repository](https://github.com/SQLMesh/sqlmesh) - [Tobiko Data Website](https://tobikodata.com) - [Extension Marketplace Page](https://marketplace.visualstudio.com/items?itemName=tobikodata.sqlmesh) diff --git a/vscode/extension/package.json b/vscode/extension/package.json index 35499ad68f..342096731f 100644 --- a/vscode/extension/package.json +++ b/vscode/extension/package.json @@ -6,7 +6,7 @@ "version": "0.0.7", "repository": { "type": "git", - "url": "https://github.com/tobikodata/sqlmesh" + "url": "https://github.com/SQLMesh/sqlmesh" }, "main": "./dist/extension.js", "icon": "assets/logo.png", diff --git a/web/client/playwright.config.ts b/web/client/playwright.config.ts index afaa00c716..c574869b87 100644 --- a/web/client/playwright.config.ts +++ b/web/client/playwright.config.ts @@ -50,7 +50,10 @@ export default defineConfig({ /* Run your local dev server before starting the tests */ webServer: { - command: 'npm run build && npm run preview', + command: + process.env.PLAYWRIGHT_SKIP_BUILD != null + ? 'npm run preview' + : 'npm run build && npm run preview', url: URL, reuseExistingServer: process.env.CI == null, timeout: 120000, // Two minutes diff --git a/web/client/vite.config.ts b/web/client/vite.config.ts index 206504cf4b..4b98b21c68 100644 --- a/web/client/vite.config.ts +++ b/web/client/vite.config.ts @@ -68,5 +68,6 @@ export default defineConfig({ }, preview: { port: 8005, + host: '127.0.0.1', }, }) diff --git a/web/common/.storybook/main.ts b/web/common/.storybook/main.ts index 8994b8a737..e916ea6f64 100644 --- a/web/common/.storybook/main.ts +++ b/web/common/.storybook/main.ts @@ -2,7 +2,7 @@ import type { StorybookConfig } from '@storybook/react-vite' const config: StorybookConfig = { stories: ['../src/**/*.mdx', '../src/**/*.stories.@(js|jsx|mjs|ts|tsx)'], - addons: ['@storybook/addon-docs', '@storybook/addon-onboarding'], + addons: ['@storybook/addon-docs'], framework: { name: '@storybook/react-vite', options: {}, diff --git a/web/common/.syncpackrc b/web/common/.syncpackrc index 52d97009ce..edc87cc315 100644 --- a/web/common/.syncpackrc +++ b/web/common/.syncpackrc @@ -14,7 +14,7 @@ ], "semverGroups": [ { - "label": "Use caret ranges for all dependencies", + "label": "Use exact versions for all dependencies", "dependencies": [ "**" ], @@ -23,7 +23,7 @@ "peer", "prod" ], - "range": "^" + "range": "" } ] } \ No newline at end of file diff --git a/web/common/package-lock.json b/web/common/package-lock.json deleted file mode 100644 index eaaaee941b..0000000000 --- a/web/common/package-lock.json +++ /dev/null @@ -1,7183 +0,0 @@ -{ - "name": "@tobikodata/sqlmesh-common", - "version": "0.0.1", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "@tobikodata/sqlmesh-common", - "version": "0.0.1", - "license": "Apache-2.0", - "devDependencies": { - "@eslint/js": "^9.31.0", - "@radix-ui/react-dialog": "^1.1.15", - "@radix-ui/react-dropdown-menu": "^2.1.16", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@storybook/addon-docs": "^9.1.5", - "@storybook/addon-onboarding": "^9.1.5", - "@storybook/react-vite": "^9.1.5", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@testing-library/dom": "^10.4.1", - "@testing-library/jest-dom": "^6.6.3", - "@testing-library/react": "^16.3.0", - "@types/node": "^20.11.25", - "@types/react": "^18.3.23", - "@types/react-dom": "^18.3.7", - "@vitejs/plugin-react": "^4.7.0", - "@vitest/browser": "^3.2.4", - "@xyflow/react": "^12.8.4", - "autoprefixer": "^10.4.21", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "cmdk": "^1.1.1", - "eslint": "^9.31.0", - "eslint-plugin-react-hooks": "^5.2.0", - "eslint-plugin-storybook": "^9.1.5", - "fuse.js": "^7.1.0", - "globals": "^16.3.0", - "lucide-react": "^0.542.0", - "playwright": "^1.54.1", - "postcss": "^8.5.6", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "storybook": "^9.1.5", - "syncpack": "^13.0.4", - "tailwind-merge": "^3.3.1", - "tailwind-scrollbar": "^4.0.2", - "tailwindcss": "^3.4.17", - "typescript": "^5.8.3", - "typescript-eslint": "^8.38.0", - "vite": "^6.3.5", - "vite-plugin-dts": "^4.5.4", - "vite-plugin-static-copy": "^3.1.1", - "vitest": "^3.2.4" - }, - "peerDependencies": { - "@radix-ui/react-dialog": "^1.1.15", - "@radix-ui/react-dropdown-menu": "^2.1.16", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@xyflow/react": "^12.8.4", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "cmdk": "^1.1.1", - "fuse.js": "^7.1.0", - "lucide-react": "^0.542.0", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "tailwind-merge": "^3.3.1", - "tailwindcss": "^3.4.17" - } - }, - "../../node_modules/.pnpm/@eslint+js@9.31.0/node_modules/@eslint/js": { - "version": "9.31.0", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://eslint.org/donate" - } - }, - "../../node_modules/.pnpm/@vitejs+plugin-react@4.7.0_vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terse_p5zuafkpgv2vlm3nhxz3zj4hsu/node_modules/@vitejs/plugin-react": { - "version": "4.7.0", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.28.0", - "@babel/plugin-transform-react-jsx-self": "^7.27.1", - "@babel/plugin-transform-react-jsx-source": "^7.27.1", - "@rolldown/pluginutils": "1.0.0-beta.27", - "@types/babel__core": "^7.20.5", - "react-refresh": "^0.17.0" - }, - "devDependencies": { - "@vitejs/react-common": "workspace:*", - "babel-plugin-react-compiler": "19.1.0-rc.2", - "react": "^19.1.0", - "react-dom": "^19.1.0", - "rolldown": "1.0.0-beta.27", - "tsdown": "^0.12.9", - "vitest": "^3.2.4" - }, - "engines": { - "node": "^14.18.0 || >=16.0.0" - }, - "peerDependencies": { - "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "../../node_modules/.pnpm/eslint@9.31.0_jiti@2.4.2/node_modules/eslint": { - "version": "9.31.0", - "dev": true, - "license": "MIT", - "dependencies": { - "@eslint-community/eslint-utils": "^4.2.0", - "@eslint-community/regexpp": "^4.12.1", - "@eslint/config-array": "^0.21.0", - "@eslint/config-helpers": "^0.3.0", - "@eslint/core": "^0.15.0", - "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.31.0", - "@eslint/plugin-kit": "^0.3.1", - "@humanfs/node": "^0.16.6", - "@humanwhocodes/module-importer": "^1.0.1", - "@humanwhocodes/retry": "^0.4.2", - "@types/estree": "^1.0.6", - "@types/json-schema": "^7.0.15", - "ajv": "^6.12.4", - "chalk": "^4.0.0", - "cross-spawn": "^7.0.6", - "debug": "^4.3.2", - "escape-string-regexp": "^4.0.0", - "eslint-scope": "^8.4.0", - "eslint-visitor-keys": "^4.2.1", - "espree": "^10.4.0", - "esquery": "^1.5.0", - "esutils": "^2.0.2", - "fast-deep-equal": "^3.1.3", - "file-entry-cache": "^8.0.0", - "find-up": "^5.0.0", - "glob-parent": "^6.0.2", - "ignore": "^5.2.0", - "imurmurhash": "^0.1.4", - "is-glob": "^4.0.0", - "json-stable-stringify-without-jsonify": "^1.0.1", - "lodash.merge": "^4.6.2", - "minimatch": "^3.1.2", - "natural-compare": "^1.4.0", - "optionator": "^0.9.3" - }, - "bin": { - "eslint": "bin/eslint.js" - }, - "devDependencies": { - "@arethetypeswrong/cli": "^0.18.0", - "@babel/core": "^7.4.3", - "@babel/preset-env": "^7.4.3", - "@cypress/webpack-preprocessor": "^6.0.2", - "@eslint/json": "^0.13.0", - "@trunkio/launcher": "^1.3.4", - "@types/esquery": "^1.5.4", - "@types/node": "^22.13.14", - "@typescript-eslint/parser": "^8.4.0", - "babel-loader": "^8.0.5", - "c8": "^7.12.0", - "chai": "^4.0.1", - "cheerio": "^0.22.0", - "common-tags": "^1.8.0", - "core-js": "^3.1.3", - "cypress": "^14.1.0", - "ejs": "^3.0.2", - "eslint": "file:.", - "eslint-config-eslint": "file:packages/eslint-config-eslint", - "eslint-plugin-eslint-plugin": "^6.0.0", - "eslint-plugin-expect-type": "^0.6.0", - "eslint-plugin-yml": "^1.14.0", - "eslint-release": "^3.3.0", - "eslint-rule-composer": "^0.3.0", - "eslump": "^3.0.0", - "esprima": "^4.0.1", - "fast-glob": "^3.2.11", - "fs-teardown": "^0.1.3", - "glob": "^10.0.0", - "globals": "^16.2.0", - "got": "^11.8.3", - "gray-matter": "^4.0.3", - "jiti": "^2.2.0", - "jiti-v2.0": "npm:jiti@2.0.x", - "jiti-v2.1": "npm:jiti@2.1.x", - "knip": "^5.60.2", - "lint-staged": "^11.0.0", - "load-perf": "^0.2.0", - "markdown-it": "^12.2.0", - "markdown-it-container": "^3.0.0", - "marked": "^4.0.8", - "metascraper": "^5.25.7", - "metascraper-description": "^5.25.7", - "metascraper-image": "^5.29.3", - "metascraper-logo": "^5.25.7", - "metascraper-logo-favicon": "^5.25.7", - "metascraper-title": "^5.25.7", - "mocha": "^11.7.1", - "node-polyfill-webpack-plugin": "^1.0.3", - "npm-license": "^0.3.3", - "pirates": "^4.0.5", - "progress": "^2.0.3", - "proxyquire": "^2.0.1", - "recast": "^0.23.0", - "regenerator-runtime": "^0.14.0", - "semver": "^7.5.3", - "shelljs": "^0.10.0", - "sinon": "^11.0.0", - "typescript": "^5.3.3", - "webpack": "^5.23.0", - "webpack-cli": "^4.5.0", - "yorkie": "^2.0.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://eslint.org/donate" - }, - "peerDependencies": { - "jiti": "*" - }, - "peerDependenciesMeta": { - "jiti": { - "optional": true - } - } - }, - "../../node_modules/.pnpm/typescript-eslint@8.38.0_eslint@9.31.0_jiti@2.4.2__typescript@5.8.3/node_modules/typescript-eslint": { - "version": "8.38.0", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/eslint-plugin": "8.38.0", - "@typescript-eslint/parser": "8.38.0", - "@typescript-eslint/typescript-estree": "8.38.0", - "@typescript-eslint/utils": "8.38.0" - }, - "devDependencies": { - "@vitest/coverage-v8": "^3.1.3", - "eslint": "*", - "rimraf": "*", - "typescript": "*", - "vitest": "^3.1.3" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" - } - }, - "../../node_modules/.pnpm/typescript@5.8.3/node_modules/typescript": { - "version": "5.8.3", - "dev": true, - "license": "Apache-2.0", - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "devDependencies": { - "@dprint/formatter": "^0.4.1", - "@dprint/typescript": "0.93.3", - "@esfx/canceltoken": "^1.0.0", - "@eslint/js": "^9.17.0", - "@octokit/rest": "^21.0.2", - "@types/chai": "^4.3.20", - "@types/diff": "^5.2.3", - "@types/minimist": "^1.2.5", - "@types/mocha": "^10.0.10", - "@types/ms": "^0.7.34", - "@types/node": "latest", - "@types/source-map-support": "^0.5.10", - "@types/which": "^3.0.4", - "@typescript-eslint/rule-tester": "^8.18.1", - "@typescript-eslint/type-utils": "^8.18.1", - "@typescript-eslint/utils": "^8.18.1", - "azure-devops-node-api": "^14.1.0", - "c8": "^10.1.3", - "chai": "^4.5.0", - "chalk": "^4.1.2", - "chokidar": "^3.6.0", - "diff": "^5.2.0", - "dprint": "^0.47.6", - "esbuild": "^0.24.0", - "eslint": "^9.17.0", - "eslint-formatter-autolinkable-stylish": "^1.4.0", - "eslint-plugin-regexp": "^2.7.0", - "fast-xml-parser": "^4.5.1", - "glob": "^10.4.5", - "globals": "^15.13.0", - "hereby": "^1.10.0", - "jsonc-parser": "^3.3.1", - "knip": "^5.41.0", - "minimist": "^1.2.8", - "mocha": "^10.8.2", - "mocha-fivemat-progress-reporter": "^0.1.0", - "monocart-coverage-reports": "^2.11.4", - "ms": "^2.1.3", - "playwright": "^1.49.1", - "source-map-support": "^0.5.21", - "tslib": "^2.8.1", - "typescript": "^5.7.2", - "typescript-eslint": "^8.18.1", - "which": "^3.0.1" - }, - "engines": { - "node": ">=14.17" - } - }, - "../../node_modules/.pnpm/vite-plugin-dts@4.5.4_@types+node@24.1.0_rollup@4.45.1_typescript@5.8.3_vite@6.3.5_@types+nod_ddgp24sr5pf6ze3b5hs7mrzr5e/node_modules/vite-plugin-dts": { - "version": "4.5.4", - "dev": true, - "license": "MIT", - "dependencies": { - "@microsoft/api-extractor": "^7.50.1", - "@rollup/pluginutils": "^5.1.4", - "@volar/typescript": "^2.4.11", - "@vue/language-core": "2.2.0", - "compare-versions": "^6.1.1", - "debug": "^4.4.0", - "kolorist": "^1.8.0", - "local-pkg": "^1.0.0", - "magic-string": "^0.30.17" - }, - "devDependencies": { - "@commitlint/cli": "^19.7.1", - "@types/debug": "^4.1.12", - "@types/minimist": "^1.2.5", - "@types/node": "^22.13.5", - "@types/prompts": "^2.4.9", - "@types/semver": "^7.5.8", - "@vexip-ui/commitlint-config": "^0.5.0", - "@vexip-ui/eslint-config": "^0.12.1", - "@vexip-ui/prettier-config": "^1.0.0", - "@vexip-ui/scripts": "^1.2.0", - "@vue/eslint-config-standard": "^8.0.1", - "@vue/eslint-config-typescript": "^13.0.0", - "conventional-changelog-cli": "^5.0.0", - "eslint": "^8.57.0", - "execa": "^9.5.2", - "husky": "^9.1.7", - "is-ci": "^4.1.0", - "lint-staged": "^15.4.3", - "minimist": "^1.2.8", - "pinst": "^3.0.0", - "prettier": "^3.5.2", - "pretty-quick": "^4.0.0", - "prompts": "^2.4.2", - "rimraf": "^6.0.1", - "semver": "^7.7.1", - "tsx": "^4.19.3", - "typescript": "5.7.3", - "unbuild": "^3.3.1", - "vite": "^6.2.0", - "vitest": "^3.0.7" - }, - "peerDependencies": { - "typescript": "*", - "vite": "*" - }, - "peerDependenciesMeta": { - "vite": { - "optional": true - } - } - }, - "../../node_modules/.pnpm/vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terser@5.43.1_tsx@4.20.3_yaml@2.8.0/node_modules/vite": { - "version": "6.3.5", - "dev": true, - "license": "MIT", - "dependencies": { - "esbuild": "^0.25.0", - "fdir": "^6.4.4", - "picomatch": "^4.0.2", - "postcss": "^8.5.3", - "rollup": "^4.34.9", - "tinyglobby": "^0.2.13" - }, - "bin": { - "vite": "bin/vite.js" - }, - "devDependencies": { - "@ampproject/remapping": "^2.3.0", - "@babel/parser": "^7.27.0", - "@jridgewell/trace-mapping": "^0.3.25", - "@polka/compression": "^1.0.0-next.25", - "@rollup/plugin-alias": "^5.1.1", - "@rollup/plugin-commonjs": "^28.0.3", - "@rollup/plugin-dynamic-import-vars": "2.1.4", - "@rollup/plugin-json": "^6.1.0", - "@rollup/plugin-node-resolve": "16.0.1", - "@rollup/pluginutils": "^5.1.4", - "@types/escape-html": "^1.0.4", - "@types/pnpapi": "^0.0.5", - "artichokie": "^0.3.1", - "cac": "^6.7.14", - "chokidar": "^3.6.0", - "connect": "^3.7.0", - "convert-source-map": "^2.0.0", - "cors": "^2.8.5", - "cross-spawn": "^7.0.6", - "debug": "^4.4.0", - "dep-types": "link:./src/types", - "dotenv": "^16.5.0", - "dotenv-expand": "^12.0.2", - "es-module-lexer": "^1.6.0", - "escape-html": "^1.0.3", - "estree-walker": "^3.0.3", - "etag": "^1.8.1", - "http-proxy": "^1.18.1", - "launch-editor-middleware": "^2.10.0", - "lightningcss": "^1.29.3", - "magic-string": "^0.30.17", - "mlly": "^1.7.4", - "mrmime": "^2.0.1", - "nanoid": "^5.1.5", - "open": "^10.1.1", - "parse5": "^7.2.1", - "pathe": "^2.0.3", - "periscopic": "^4.0.2", - "picocolors": "^1.1.1", - "postcss-import": "^16.1.0", - "postcss-load-config": "^6.0.1", - "postcss-modules": "^6.0.1", - "resolve.exports": "^2.0.3", - "rollup-plugin-dts": "^6.2.1", - "rollup-plugin-esbuild": "^6.2.1", - "rollup-plugin-license": "^3.6.0", - "sass": "^1.86.3", - "sass-embedded": "^1.86.3", - "sirv": "^3.0.1", - "source-map-support": "^0.5.21", - "strip-literal": "^3.0.0", - "terser": "^5.39.0", - "tsconfck": "^3.1.5", - "tslib": "^2.8.1", - "types": "link:./types", - "ufo": "^1.6.1", - "ws": "^8.18.1" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://github.com/vitejs/vite?sponsor=1" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - }, - "peerDependencies": { - "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", - "jiti": ">=1.21.0", - "less": "*", - "lightningcss": "^1.21.0", - "sass": "*", - "sass-embedded": "*", - "stylus": "*", - "sugarss": "*", - "terser": "^5.16.0", - "tsx": "^4.8.1", - "yaml": "^2.4.2" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "jiti": { - "optional": true - }, - "less": { - "optional": true - }, - "lightningcss": { - "optional": true - }, - "sass": { - "optional": true - }, - "sass-embedded": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "terser": { - "optional": true - }, - "tsx": { - "optional": true - }, - "yaml": { - "optional": true - } - } - }, - "node_modules/@adobe/css-tools": { - "version": "4.4.4", - "resolved": "https://registry.npmjs.org/@adobe/css-tools/-/css-tools-4.4.4.tgz", - "integrity": "sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg==", - "dev": true, - "license": "MIT" - }, - "node_modules/@alloc/quick-lru": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", - "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@ampproject/remapping": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", - "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.24" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/code-frame": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz", - "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-validator-identifier": "^7.27.1", - "js-tokens": "^4.0.0", - "picocolors": "^1.1.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/compat-data": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.28.0.tgz", - "integrity": "sha512-60X7qkglvrap8mn1lh2ebxXdZYtUcpd7gsmy9kLaBJ4i/WdY8PqTSdxyA8qraikqKQK5C1KRBKXqznrVapyNaw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/core": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.3.tgz", - "integrity": "sha512-yDBHV9kQNcr2/sUr9jghVyz9C3Y5G2zUM2H2lo+9mKv4sFgbA8s8Z9t8D1jiTkGoO/NoIfKMyKWr4s6CN23ZwQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@ampproject/remapping": "^2.2.0", - "@babel/code-frame": "^7.27.1", - "@babel/generator": "^7.28.3", - "@babel/helper-compilation-targets": "^7.27.2", - "@babel/helper-module-transforms": "^7.28.3", - "@babel/helpers": "^7.28.3", - "@babel/parser": "^7.28.3", - "@babel/template": "^7.27.2", - "@babel/traverse": "^7.28.3", - "@babel/types": "^7.28.2", - "convert-source-map": "^2.0.0", - "debug": "^4.1.0", - "gensync": "^1.0.0-beta.2", - "json5": "^2.2.3", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/babel" - } - }, - "node_modules/@babel/core/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/generator": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.28.3.tgz", - "integrity": "sha512-3lSpxGgvnmZznmBkCRnVREPUFJv2wrv9iAoFDvADJc0ypmdOxdUtcLeBgBJ6zE0PMeTKnxeQzyk0xTBq4Ep7zw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.28.3", - "@babel/types": "^7.28.2", - "@jridgewell/gen-mapping": "^0.3.12", - "@jridgewell/trace-mapping": "^0.3.28", - "jsesc": "^3.0.2" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets": { - "version": "7.27.2", - "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.27.2.tgz", - "integrity": "sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/compat-data": "^7.27.2", - "@babel/helper-validator-option": "^7.27.1", - "browserslist": "^4.24.0", - "lru-cache": "^5.1.1", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets/node_modules/lru-cache": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", - "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", - "dev": true, - "license": "ISC", - "dependencies": { - "yallist": "^3.0.2" - } - }, - "node_modules/@babel/helper-compilation-targets/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/helper-globals": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz", - "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-imports": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.27.1.tgz", - "integrity": "sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/traverse": "^7.27.1", - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-transforms": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.3.tgz", - "integrity": "sha512-gytXUbs8k2sXS9PnQptz5o0QnpLL51SwASIORY6XaBKF88nsOT0Zw9szLqlSGQDP/4TljBAD5y98p2U1fqkdsw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-module-imports": "^7.27.1", - "@babel/helper-validator-identifier": "^7.27.1", - "@babel/traverse": "^7.28.3" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-string-parser": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", - "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz", - "integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-option": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", - "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helpers": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.3.tgz", - "integrity": "sha512-PTNtvUQihsAsDHMOP5pfobP8C6CM4JWXmP8DrEIt46c3r2bf87Ua1zoqevsMo9g+tWDwgWrFP5EIxuBx5RudAw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/template": "^7.27.2", - "@babel/types": "^7.28.2" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/parser": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.28.3.tgz", - "integrity": "sha512-7+Ey1mAgYqFAx2h0RuoxcQT5+MlG3GTV0TQrgr7/ZliKsm/MNDxVVutlWaziMq7wJNAz8MTqz55XLpWvva6StA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.28.2" - }, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/runtime": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.3.tgz", - "integrity": "sha512-9uIQ10o0WGdpP6GDhXcdOJPJuDgFtIDtN/9+ArJQ2NAfAmiuhTQdzkaTGR33v43GYS2UrSA0eX2pPPHoFVvpxA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/template": { - "version": "7.27.2", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.2.tgz", - "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/parser": "^7.27.2", - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/traverse": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.28.3.tgz", - "integrity": "sha512-7w4kZYHneL3A6NP2nxzHvT3HCZ7puDZZjFMqDpBPECub79sTtSO5CGXDkKrTQq8ksAwfD/XI2MRFX23njdDaIQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/generator": "^7.28.3", - "@babel/helper-globals": "^7.28.0", - "@babel/parser": "^7.28.3", - "@babel/template": "^7.27.2", - "@babel/types": "^7.28.2", - "debug": "^4.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/types": { - "version": "7.28.2", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.28.2.tgz", - "integrity": "sha512-ruv7Ae4J5dUYULmeXw1gmb7rYRz57OWCPM57pHojnLq/3Z1CK2lNSLTCVjxVk1F/TZHwOZZrOWi0ur95BbLxNQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^7.27.1", - "@babel/helper-validator-identifier": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@esbuild/aix-ppc64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.9.tgz", - "integrity": "sha512-OaGtL73Jck6pBKjNIe24BnFE6agGl+6KxDtTfHhy1HmhthfKouEcOhqpSL64K4/0WCtbKFLOdzD/44cJ4k9opA==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "aix" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.9.tgz", - "integrity": "sha512-5WNI1DaMtxQ7t7B6xa572XMXpHAaI/9Hnhk8lcxF4zVN4xstUgTlvuGDorBguKEnZO70qwEcLpfifMLoxiPqHQ==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.9.tgz", - "integrity": "sha512-IDrddSmpSv51ftWslJMvl3Q2ZT98fUSL2/rlUXuVqRXHCs5EUF1/f+jbjF5+NG9UffUDMCiTyh8iec7u8RlTLg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.9.tgz", - "integrity": "sha512-I853iMZ1hWZdNllhVZKm34f4wErd4lMyeV7BLzEExGEIZYsOzqDWDf+y082izYUE8gtJnYHdeDpN/6tUdwvfiw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.9.tgz", - "integrity": "sha512-XIpIDMAjOELi/9PB30vEbVMs3GV1v2zkkPnuyRRURbhqjyzIINwj+nbQATh4H9GxUgH1kFsEyQMxwiLFKUS6Rg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.9.tgz", - "integrity": "sha512-jhHfBzjYTA1IQu8VyrjCX4ApJDnH+ez+IYVEoJHeqJm9VhG9Dh2BYaJritkYK3vMaXrf7Ogr/0MQ8/MeIefsPQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.9.tgz", - "integrity": "sha512-z93DmbnY6fX9+KdD4Ue/H6sYs+bhFQJNCPZsi4XWJoYblUqT06MQUdBCpcSfuiN72AbqeBFu5LVQTjfXDE2A6Q==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.9.tgz", - "integrity": "sha512-mrKX6H/vOyo5v71YfXWJxLVxgy1kyt1MQaD8wZJgJfG4gq4DpQGpgTB74e5yBeQdyMTbgxp0YtNj7NuHN0PoZg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.9.tgz", - "integrity": "sha512-HBU2Xv78SMgaydBmdor38lg8YDnFKSARg1Q6AT0/y2ezUAKiZvc211RDFHlEZRFNRVhcMamiToo7bDx3VEOYQw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.9.tgz", - "integrity": "sha512-BlB7bIcLT3G26urh5Dmse7fiLmLXnRlopw4s8DalgZ8ef79Jj4aUcYbk90g8iCa2467HX8SAIidbL7gsqXHdRw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ia32": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.9.tgz", - "integrity": "sha512-e7S3MOJPZGp2QW6AK6+Ly81rC7oOSerQ+P8L0ta4FhVi+/j/v2yZzx5CqqDaWjtPFfYz21Vi1S0auHrap3Ma3A==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-loong64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.9.tgz", - "integrity": "sha512-Sbe10Bnn0oUAB2AalYztvGcK+o6YFFA/9829PhOCUS9vkJElXGdphz0A3DbMdP8gmKkqPmPcMJmJOrI3VYB1JQ==", - "cpu": [ - "loong64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-mips64el": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.9.tgz", - "integrity": "sha512-YcM5br0mVyZw2jcQeLIkhWtKPeVfAerES5PvOzaDxVtIyZ2NUBZKNLjC5z3/fUlDgT6w89VsxP2qzNipOaaDyA==", - "cpu": [ - "mips64el" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ppc64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.9.tgz", - "integrity": "sha512-++0HQvasdo20JytyDpFvQtNrEsAgNG2CY1CLMwGXfFTKGBGQT3bOeLSYE2l1fYdvML5KUuwn9Z8L1EWe2tzs1w==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-riscv64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.9.tgz", - "integrity": "sha512-uNIBa279Y3fkjV+2cUjx36xkx7eSjb8IvnL01eXUKXez/CBHNRw5ekCGMPM0BcmqBxBcdgUWuUXmVWwm4CH9kg==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-s390x": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.9.tgz", - "integrity": "sha512-Mfiphvp3MjC/lctb+7D287Xw1DGzqJPb/J2aHHcHxflUo+8tmN/6d4k6I2yFR7BVo5/g7x2Monq4+Yew0EHRIA==", - "cpu": [ - "s390x" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.9.tgz", - "integrity": "sha512-iSwByxzRe48YVkmpbgoxVzn76BXjlYFXC7NvLYq+b+kDjyyk30J0JY47DIn8z1MO3K0oSl9fZoRmZPQI4Hklzg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.9.tgz", - "integrity": "sha512-9jNJl6FqaUG+COdQMjSCGW4QiMHH88xWbvZ+kRVblZsWrkXlABuGdFJ1E9L7HK+T0Yqd4akKNa/lO0+jDxQD4Q==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.9.tgz", - "integrity": "sha512-RLLdkflmqRG8KanPGOU7Rpg829ZHu8nFy5Pqdi9U01VYtG9Y0zOG6Vr2z4/S+/3zIyOxiK6cCeYNWOFR9QP87g==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.9.tgz", - "integrity": "sha512-YaFBlPGeDasft5IIM+CQAhJAqS3St3nJzDEgsgFixcfZeyGPCd6eJBWzke5piZuZ7CtL656eOSYKk4Ls2C0FRQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.9.tgz", - "integrity": "sha512-1MkgTCuvMGWuqVtAvkpkXFmtL8XhWy+j4jaSO2wxfJtilVCi0ZE37b8uOdMItIHz4I6z1bWWtEX4CJwcKYLcuA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openharmony-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.9.tgz", - "integrity": "sha512-4Xd0xNiMVXKh6Fa7HEJQbrpP3m3DDn43jKxMjxLLRjWnRsfxjORYJlXPO4JNcXtOyfajXorRKY9NkOpTHptErg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openharmony" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.9.tgz", - "integrity": "sha512-WjH4s6hzo00nNezhp3wFIAfmGZ8U7KtrJNlFMRKxiI9mxEK1scOMAaa9i4crUtu+tBr+0IN6JCuAcSBJZfnphw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "sunos" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.9.tgz", - "integrity": "sha512-mGFrVJHmZiRqmP8xFOc6b84/7xa5y5YvR1x8djzXpJBSv/UsNK6aqec+6JDjConTgvvQefdGhFDAs2DLAds6gQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.9.tgz", - "integrity": "sha512-b33gLVU2k11nVx1OhX3C8QQP6UHQK4ZtN56oFWvVXvz2VkDoe6fbG8TOgHFxEvqeqohmRnIHe5A1+HADk4OQww==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-x64": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.9.tgz", - "integrity": "sha512-PPOl1mi6lpLNQxnGoyAfschAodRFYXJ+9fs6WHXz7CSWKbOqiMZsubC+BQsVKuul+3vKLuwTHsS2c2y9EoKwxQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@eslint-community/eslint-utils": { - "version": "4.7.0", - "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz", - "integrity": "sha512-dyybb3AcajC7uha6CvhdVRJqaKyn7w2YKqKyAN37NKYgZT36w+iRb0Dymmc5qEJ549c/S31cMMSFd75bteCpCw==", - "dev": true, - "license": "MIT", - "dependencies": { - "eslint-visitor-keys": "^3.4.3" - }, - "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - }, - "peerDependencies": { - "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" - } - }, - "node_modules/@eslint/js": { - "resolved": "../../node_modules/.pnpm/@eslint+js@9.31.0/node_modules/@eslint/js", - "link": true - }, - "node_modules/@floating-ui/core": { - "version": "1.7.3", - "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz", - "integrity": "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/utils": "^0.2.10" - } - }, - "node_modules/@floating-ui/dom": { - "version": "1.7.4", - "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.4.tgz", - "integrity": "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/core": "^1.7.3", - "@floating-ui/utils": "^0.2.10" - } - }, - "node_modules/@floating-ui/react-dom": { - "version": "2.1.6", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.6.tgz", - "integrity": "sha512-4JX6rEatQEvlmgU80wZyq9RT96HZJa88q8hp0pBd+LrczeDI4o6uA2M+uvxngVHo4Ihr8uibXxH6+70zhAFrVw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/dom": "^1.7.4" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" - } - }, - "node_modules/@floating-ui/utils": { - "version": "0.2.10", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz", - "integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", - "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "dev": true, - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@joshwooding/vite-plugin-react-docgen-typescript": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/@joshwooding/vite-plugin-react-docgen-typescript/-/vite-plugin-react-docgen-typescript-0.6.1.tgz", - "integrity": "sha512-J4BaTocTOYFkMHIra1JDWrMWpNmBl4EkplIwHEsV8aeUOtdWjwSnln9U7twjMFTAEB7mptNtSKyVi1Y2W9sDJw==", - "dev": true, - "license": "MIT", - "dependencies": { - "glob": "^10.0.0", - "magic-string": "^0.30.0", - "react-docgen-typescript": "^2.2.2" - }, - "peerDependencies": { - "typescript": ">= 4.3.x", - "vite": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.13", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", - "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", - "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.5", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", - "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", - "dev": true, - "license": "MIT" - }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.30", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.30.tgz", - "integrity": "sha512-GQ7Nw5G2lTu/BtHTKfXhKHok2WGetd4XYcVKGx00SjAk8GMwgJM3zr6zORiPGuOE+/vkc90KtTosSSvaCjKb2Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/@mdx-js/react": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.0.tgz", - "integrity": "sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/mdx": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - }, - "peerDependencies": { - "@types/react": ">=16", - "react": ">=16" - } - }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", - "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, - "license": "MIT", - "optional": true, - "engines": { - "node": ">=14" - } - }, - "node_modules/@polka/url": { - "version": "1.0.0-next.29", - "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.29.tgz", - "integrity": "sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==", - "dev": true, - "license": "MIT" - }, - "node_modules/@radix-ui/primitive": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", - "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", - "dev": true, - "license": "MIT" - }, - "node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", - "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-collection": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz", - "integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-compose-refs": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz", - "integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-context": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", - "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dialog": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.15.tgz", - "integrity": "sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-direction": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz", - "integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", - "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dropdown-menu": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz", - "integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-menu": "2.1.16", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-guards": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", - "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", - "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-id": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", - "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-menu": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz", - "integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popper": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", - "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-portal": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", - "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-presence": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", - "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", - "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-roving-focus": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz", - "integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-tooltip": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz", - "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-visually-hidden": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-callback-ref": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", - "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-controllable-state": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz", - "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-effect-event": "0.0.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-effect-event": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz", - "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-escape-keydown": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz", - "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-layout-effect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", - "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz", - "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-size": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz", - "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-visually-hidden": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz", - "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz", - "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@rollup/pluginutils": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-5.2.0.tgz", - "integrity": "sha512-qWJ2ZTbmumwiLFomfzTyt5Kng4hwPi9rwCYN4SHb6eaRU1KNO4ccxINHr/VhH4GgPlt1XfSTLX2LBTme8ne4Zw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "estree-walker": "^2.0.2", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/pluginutils/node_modules/estree-walker": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", - "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@rollup/pluginutils/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/@sindresorhus/merge-streams": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/@sindresorhus/merge-streams/-/merge-streams-2.3.0.tgz", - "integrity": "sha512-LtoMMhxAlorcGhmFYI+LhPgbPZCkgP6ra1YL604EeF6U98pLlQ3iWIGMdWSC+vWmPBWBNgmDBAhnAobLROJmwg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@standard-schema/spec": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.0.0.tgz", - "integrity": "sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@storybook/addon-docs": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/addon-docs/-/addon-docs-9.1.5.tgz", - "integrity": "sha512-q1j5RRElxFSnHOh60eS3dS2TAyAHzcQeH/2B9UXo6MUHu7HmhNpw3qt2YibIw0zEogHCvZhLNx6TNzSy+7wRUw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@mdx-js/react": "^3.0.0", - "@storybook/csf-plugin": "9.1.5", - "@storybook/icons": "^1.4.0", - "@storybook/react-dom-shim": "9.1.5", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", - "ts-dedent": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/addon-onboarding": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/addon-onboarding/-/addon-onboarding-9.1.5.tgz", - "integrity": "sha512-UJpkWLbugcSGzSUzivTTNdO0Y8gpAn//qJzn2TobwkPJgSwQEoHcjUfWjgZ3mSpQrSQO2e1O1yC3SJTBQt/fqQ==", - "dev": true, - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/builder-vite": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/builder-vite/-/builder-vite-9.1.5.tgz", - "integrity": "sha512-sgt/9+Yl/5O7Bj5hdbHfadN8e/e4CNiDZKDcbLOMpOjKKoqF8vm19I1QocWIAiKjTOhF+4E9v9LddjtAGnfqHQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@storybook/csf-plugin": "9.1.5", - "ts-dedent": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/@storybook/csf-plugin": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/csf-plugin/-/csf-plugin-9.1.5.tgz", - "integrity": "sha512-PmHuF+j11Z7BxAI2/4wQYn0gH1d67gNvycyR+EWgp4P/AWam9wFbuI/T1R45CRQTV2/VrfGdts/tFrvo5kXWig==", - "dev": true, - "license": "MIT", - "dependencies": { - "unplugin": "^1.3.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/global": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/@storybook/global/-/global-5.0.0.tgz", - "integrity": "sha512-FcOqPAXACP0I3oJ/ws6/rrPT9WGhu915Cg8D02a9YxLo0DE9zI+a9A5gRGvmQ09fiWPukqI8ZAEoQEdWUKMQdQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@storybook/icons": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/@storybook/icons/-/icons-1.4.0.tgz", - "integrity": "sha512-Td73IeJxOyalzvjQL+JXx72jlIYHgs+REaHiREOqfpo3A2AYYG71AUbcv+lg7mEDIweKVCxsMQ0UKo634c8XeA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta" - } - }, - "node_modules/@storybook/react": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/react/-/react-9.1.5.tgz", - "integrity": "sha512-fBVP7Go09gzpImtaMcZ2DipLEWdWeTmz7BrACr3Z8uCyKcoH8/d1Wv0JgIiBo1UKDh5ZgYx5pLafaPNqmVAepg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@storybook/global": "^5.0.0", - "@storybook/react-dom-shim": "9.1.5" - }, - "engines": { - "node": ">=20.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "storybook": "^9.1.5", - "typescript": ">= 4.9.x" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@storybook/react-dom-shim": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/react-dom-shim/-/react-dom-shim-9.1.5.tgz", - "integrity": "sha512-blSq9uzSYnfgEYPHYKgM5O14n8hbXNiXx2GiVJyDSg8QPNicbsBg+lCb1TC7/USfV26pNZr/lGNNKGkcCEN6Gw==", - "dev": true, - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "storybook": "^9.1.5" - } - }, - "node_modules/@storybook/react-vite": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/@storybook/react-vite/-/react-vite-9.1.5.tgz", - "integrity": "sha512-OYbkHHNCrn8MNPd+4KxMjcSR4M/YHa84h8sWDUHhKRTRtZFmj8i/QDW3E8tGx2BRLxXw3dTYe9J5UYBhJDDxFA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@joshwooding/vite-plugin-react-docgen-typescript": "0.6.1", - "@rollup/pluginutils": "^5.0.2", - "@storybook/builder-vite": "9.1.5", - "@storybook/react": "9.1.5", - "find-up": "^7.0.0", - "magic-string": "^0.30.0", - "react-docgen": "^8.0.0", - "resolve": "^1.22.8", - "tsconfig-paths": "^4.2.0" - }, - "engines": { - "node": ">=20.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta", - "storybook": "^9.1.5", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/@tailwindcss/typography": { - "version": "0.5.16", - "resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.16.tgz", - "integrity": "sha512-0wDLwCVF5V3x3b1SGXPCDcdsbDHMBe+lkFzBRaHeLvNi+nrrnZ1lA18u+OTWO8iSWU2GxUOCvlXtDuqftc1oiA==", - "dev": true, - "license": "MIT", - "dependencies": { - "lodash.castarray": "^4.4.0", - "lodash.isplainobject": "^4.0.6", - "lodash.merge": "^4.6.2", - "postcss-selector-parser": "6.0.10" - }, - "peerDependencies": { - "tailwindcss": ">=3.0.0 || insiders || >=4.0.0-alpha.20 || >=4.0.0-beta.1" - } - }, - "node_modules/@tanstack/react-virtual": { - "version": "3.13.12", - "resolved": "https://registry.npmjs.org/@tanstack/react-virtual/-/react-virtual-3.13.12.tgz", - "integrity": "sha512-Gd13QdxPSukP8ZrkbgS2RwoZseTTbQPLnQEn7HY/rqtM+8Zt95f7xKC7N0EsKs7aoz0WzZ+fditZux+F8EzYxA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@tanstack/virtual-core": "3.13.12" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/@tanstack/virtual-core": { - "version": "3.13.12", - "resolved": "https://registry.npmjs.org/@tanstack/virtual-core/-/virtual-core-3.13.12.tgz", - "integrity": "sha512-1YBOJfRHV4sXUmWsFSf5rQor4Ss82G8dQWLRbnk3GA4jeP8hQt1hxXh0tmflpC0dz3VgEv/1+qwPyLeWkQuPFA==", - "dev": true, - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - } - }, - "node_modules/@testing-library/dom": { - "version": "10.4.1", - "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz", - "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.10.4", - "@babel/runtime": "^7.12.5", - "@types/aria-query": "^5.0.1", - "aria-query": "5.3.0", - "dom-accessibility-api": "^0.5.9", - "lz-string": "^1.5.0", - "picocolors": "1.1.1", - "pretty-format": "^27.0.2" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@testing-library/jest-dom": { - "version": "6.7.0", - "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.7.0.tgz", - "integrity": "sha512-RI2e97YZ7MRa+vxP4UUnMuMFL2buSsf0ollxUbTgrbPLKhMn8KVTx7raS6DYjC7v1NDVrioOvaShxsguLNISCA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@adobe/css-tools": "^4.4.0", - "aria-query": "^5.0.0", - "css.escape": "^1.5.1", - "dom-accessibility-api": "^0.6.3", - "picocolors": "^1.1.1", - "redent": "^3.0.0" - }, - "engines": { - "node": ">=14", - "npm": ">=6", - "yarn": ">=1" - } - }, - "node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz", - "integrity": "sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@testing-library/react": { - "version": "16.3.0", - "resolved": "https://registry.npmjs.org/@testing-library/react/-/react-16.3.0.tgz", - "integrity": "sha512-kFSyxiEDwv1WLl2fgsq6pPBbw5aWKrsY2/noi1Id0TK0UParSF62oFQFGHXIyaG4pp2tEub/Zlel+fjjZILDsw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/runtime": "^7.12.5" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@testing-library/dom": "^10.0.0", - "@types/react": "^18.0.0 || ^19.0.0", - "@types/react-dom": "^18.0.0 || ^19.0.0", - "react": "^18.0.0 || ^19.0.0", - "react-dom": "^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@testing-library/user-event": { - "version": "14.6.1", - "resolved": "https://registry.npmjs.org/@testing-library/user-event/-/user-event-14.6.1.tgz", - "integrity": "sha512-vq7fv0rnt+QTXgPxr5Hjc210p6YKq2kmdziLgnsZGgLJ9e6VAShx1pACLuRjd/AS/sr7phAR58OIIpf0LlmQNw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12", - "npm": ">=6" - }, - "peerDependencies": { - "@testing-library/dom": ">=7.21.4" - } - }, - "node_modules/@types/aria-query": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz", - "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/babel__core": { - "version": "7.20.5", - "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", - "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.20.7", - "@babel/types": "^7.20.7", - "@types/babel__generator": "*", - "@types/babel__template": "*", - "@types/babel__traverse": "*" - } - }, - "node_modules/@types/babel__generator": { - "version": "7.27.0", - "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.27.0.tgz", - "integrity": "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__template": { - "version": "7.4.4", - "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", - "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.1.0", - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__traverse": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.28.0.tgz", - "integrity": "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.28.2" - } - }, - "node_modules/@types/chai": { - "version": "5.2.2", - "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.2.tgz", - "integrity": "sha512-8kB30R7Hwqf40JPiKhVzodJs2Qc1ZJ5zuT3uzw5Hq/dhNCl3G3l83jfpdI1e20BP348+fV7VIL/+FxaXkqBmWg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/deep-eql": "*" - } - }, - "node_modules/@types/d3-color": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-3.1.3.tgz", - "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/d3-drag": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/@types/d3-drag/-/d3-drag-3.0.7.tgz", - "integrity": "sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-selection": "*" - } - }, - "node_modules/@types/d3-interpolate": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz", - "integrity": "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-color": "*" - } - }, - "node_modules/@types/d3-selection": { - "version": "3.0.11", - "resolved": "https://registry.npmjs.org/@types/d3-selection/-/d3-selection-3.0.11.tgz", - "integrity": "sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/d3-transition": { - "version": "3.0.9", - "resolved": "https://registry.npmjs.org/@types/d3-transition/-/d3-transition-3.0.9.tgz", - "integrity": "sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-selection": "*" - } - }, - "node_modules/@types/d3-zoom": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/@types/d3-zoom/-/d3-zoom-3.0.8.tgz", - "integrity": "sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-interpolate": "*", - "@types/d3-selection": "*" - } - }, - "node_modules/@types/deep-eql": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz", - "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/doctrine": { - "version": "0.0.9", - "resolved": "https://registry.npmjs.org/@types/doctrine/-/doctrine-0.0.9.tgz", - "integrity": "sha512-eOIHzCUSH7SMfonMG1LsC2f8vxBFtho6NGBznK41R84YzPuvSBzrhEps33IsQiOW9+VL6NQ9DbjQJznk/S4uRA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/estree": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", - "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/mdx": { - "version": "2.0.13", - "resolved": "https://registry.npmjs.org/@types/mdx/-/mdx-2.0.13.tgz", - "integrity": "sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/node": { - "version": "20.19.13", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.13.tgz", - "integrity": "sha512-yCAeZl7a0DxgNVteXFHt9+uyFbqXGy/ShC4BlcHkoE0AfGXYv/BUiplV72DjMYXHDBXFjhvr6DD1NiRVfB4j8g==", - "dev": true, - "license": "MIT", - "dependencies": { - "undici-types": "~6.21.0" - } - }, - "node_modules/@types/prismjs": { - "version": "1.26.5", - "resolved": "https://registry.npmjs.org/@types/prismjs/-/prismjs-1.26.5.tgz", - "integrity": "sha512-AUZTa7hQ2KY5L7AmtSiqxlhWxb4ina0yd8hNbl4TWuqnv/pFP0nDMb3YrfSBf4hJVGLh2YEIBfKaBW/9UEl6IQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/prop-types": { - "version": "15.7.15", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz", - "integrity": "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/react": { - "version": "18.3.23", - "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.23.tgz", - "integrity": "sha512-/LDXMQh55EzZQ0uVAZmKKhfENivEvWz6E+EYzh+/MCjMhNsotd+ZHhBGIjFDTi6+fz0OhQQQLbTgdQIxxCsC0w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/prop-types": "*", - "csstype": "^3.0.2" - } - }, - "node_modules/@types/react-dom": { - "version": "18.3.7", - "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.7.tgz", - "integrity": "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "^18.0.0" - } - }, - "node_modules/@types/resolve": { - "version": "1.20.6", - "resolved": "https://registry.npmjs.org/@types/resolve/-/resolve-1.20.6.tgz", - "integrity": "sha512-A4STmOXPhMUtHH+S6ymgE2GiBSMqf4oTvcQZMcHzokuTLVYzXTB8ttjcgxOVaAp2lGwEdzZ0J+cRbbeevQj1UQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@typescript-eslint/project-service": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.39.1.tgz", - "integrity": "sha512-8fZxek3ONTwBu9ptw5nCKqZOSkXshZB7uAxuFF0J/wTMkKydjXCzqqga7MlFMpHi9DoG4BadhmTkITBcg8Aybw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.39.1", - "@typescript-eslint/types": "^8.39.1", - "debug": "^4.3.4" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/scope-manager": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.39.1.tgz", - "integrity": "sha512-RkBKGBrjgskFGWuyUGz/EtD8AF/GW49S21J8dvMzpJitOF1slLEbbHnNEtAHtnDAnx8qDEdRrULRnWVx27wGBw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/types": "8.39.1", - "@typescript-eslint/visitor-keys": "8.39.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.39.1.tgz", - "integrity": "sha512-ePUPGVtTMR8XMU2Hee8kD0Pu4NDE1CN9Q1sxGSGd/mbOtGZDM7pnhXNJnzW63zk/q+Z54zVzj44HtwXln5CvHA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/types": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.39.1.tgz", - "integrity": "sha512-7sPDKQQp+S11laqTrhHqeAbsCfMkwJMrV7oTDvtDds4mEofJYir414bYKUEb8YPUm9QL3U+8f6L6YExSoAGdQw==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.39.1.tgz", - "integrity": "sha512-EKkpcPuIux48dddVDXyQBlKdeTPMmALqBUbEk38McWv0qVEZwOpVJBi7ugK5qVNgeuYjGNQxrrnoM/5+TI/BPw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/project-service": "8.39.1", - "@typescript-eslint/tsconfig-utils": "8.39.1", - "@typescript-eslint/types": "8.39.1", - "@typescript-eslint/visitor-keys": "8.39.1", - "debug": "^4.3.4", - "fast-glob": "^3.3.2", - "is-glob": "^4.0.3", - "minimatch": "^9.0.4", - "semver": "^7.6.0", - "ts-api-utils": "^2.1.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/utils": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.39.1.tgz", - "integrity": "sha512-VF5tZ2XnUSTuiqZFXCZfZs1cgkdd3O/sSYmdo2EpSyDlC86UM/8YytTmKnehOW3TGAlivqTDT6bS87B/GQ/jyg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.39.1", - "@typescript-eslint/types": "8.39.1", - "@typescript-eslint/typescript-estree": "8.39.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.39.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.39.1.tgz", - "integrity": "sha512-W8FQi6kEh2e8zVhQ0eeRnxdvIoOkAp/CPAahcNio6nO9dsIwb9b34z90KOlheoyuVf6LSOEdjlkxSkapNEc+4A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/types": "8.39.1", - "eslint-visitor-keys": "^4.2.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/visitor-keys/node_modules/eslint-visitor-keys": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", - "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/@vitejs/plugin-react": { - "resolved": "../../node_modules/.pnpm/@vitejs+plugin-react@4.7.0_vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terse_p5zuafkpgv2vlm3nhxz3zj4hsu/node_modules/@vitejs/plugin-react", - "link": true - }, - "node_modules/@vitest/browser": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/browser/-/browser-3.2.4.tgz", - "integrity": "sha512-tJxiPrWmzH8a+w9nLKlQMzAKX/7VjFs50MWgcAj7p9XQ7AQ9/35fByFYptgPELyLw+0aixTnC4pUWV+APcZ/kw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@testing-library/dom": "^10.4.0", - "@testing-library/user-event": "^14.6.1", - "@vitest/mocker": "3.2.4", - "@vitest/utils": "3.2.4", - "magic-string": "^0.30.17", - "sirv": "^3.0.1", - "tinyrainbow": "^2.0.0", - "ws": "^8.18.2" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "playwright": "*", - "vitest": "3.2.4", - "webdriverio": "^7.0.0 || ^8.0.0 || ^9.0.0" - }, - "peerDependenciesMeta": { - "playwright": { - "optional": true - }, - "safaridriver": { - "optional": true - }, - "webdriverio": { - "optional": true - } - } - }, - "node_modules/@vitest/expect": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.4.tgz", - "integrity": "sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/chai": "^5.2.2", - "@vitest/spy": "3.2.4", - "@vitest/utils": "3.2.4", - "chai": "^5.2.0", - "tinyrainbow": "^2.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/mocker": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.4.tgz", - "integrity": "sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/spy": "3.2.4", - "estree-walker": "^3.0.3", - "magic-string": "^0.30.17" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "msw": "^2.4.9", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" - }, - "peerDependenciesMeta": { - "msw": { - "optional": true - }, - "vite": { - "optional": true - } - } - }, - "node_modules/@vitest/pretty-format": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.4.tgz", - "integrity": "sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==", - "dev": true, - "license": "MIT", - "dependencies": { - "tinyrainbow": "^2.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/runner": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.4.tgz", - "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/utils": "3.2.4", - "pathe": "^2.0.3", - "strip-literal": "^3.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/snapshot": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.4.tgz", - "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/pretty-format": "3.2.4", - "magic-string": "^0.30.17", - "pathe": "^2.0.3" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/spy": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.4.tgz", - "integrity": "sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==", - "dev": true, - "license": "MIT", - "dependencies": { - "tinyspy": "^4.0.3" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/utils": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.4.tgz", - "integrity": "sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/pretty-format": "3.2.4", - "loupe": "^3.1.4", - "tinyrainbow": "^2.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@xyflow/react": { - "version": "12.8.4", - "resolved": "https://registry.npmjs.org/@xyflow/react/-/react-12.8.4.tgz", - "integrity": "sha512-bqUu4T5QSHiCFPkoH+b+LROKwQJdLvcjhGbNW9c1dLafCBRjmH1IYz0zPE+lRDXCtQ9kRyFxz3tG19+8VORJ1w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@xyflow/system": "0.0.68", - "classcat": "^5.0.3", - "zustand": "^4.4.0" - }, - "peerDependencies": { - "react": ">=17", - "react-dom": ">=17" - } - }, - "node_modules/@xyflow/system": { - "version": "0.0.68", - "resolved": "https://registry.npmjs.org/@xyflow/system/-/system-0.0.68.tgz", - "integrity": "sha512-QDG2wxIG4qX+uF8yzm1ULVZrcXX3MxPBoxv7O52FWsX87qIImOqifUhfa/TwsvLdzn7ic2DDBH1uI8TKbdNTYA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/d3-drag": "^3.0.7", - "@types/d3-interpolate": "^3.0.4", - "@types/d3-selection": "^3.0.10", - "@types/d3-transition": "^3.0.8", - "@types/d3-zoom": "^3.0.8", - "d3-drag": "^3.0.0", - "d3-interpolate": "^3.0.1", - "d3-selection": "^3.0.0", - "d3-zoom": "^3.0.0" - } - }, - "node_modules/acorn": { - "version": "8.15.0", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", - "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", - "dev": true, - "license": "MIT", - "bin": { - "acorn": "bin/acorn" - }, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/ansi-colors": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", - "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/any-promise": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", - "dev": true, - "license": "MIT" - }, - "node_modules/anymatch": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", - "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", - "dev": true, - "license": "ISC", - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/arg": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", - "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==", - "dev": true, - "license": "MIT" - }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, - "license": "Python-2.0" - }, - "node_modules/aria-hidden": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.6.tgz", - "integrity": "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==", - "dev": true, - "license": "MIT", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/aria-query": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", - "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "dequal": "^2.0.3" - } - }, - "node_modules/assertion-error": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", - "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - } - }, - "node_modules/ast-types": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.16.1.tgz", - "integrity": "sha512-6t10qk83GOG8p0vKmaCr8eiilZwO171AvbROMtvvNiwrTly62t+7XkA8RdIIVbpMhCASAsxgAzdRSwh6nw/5Dg==", - "dev": true, - "license": "MIT", - "dependencies": { - "tslib": "^2.0.1" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/autoprefixer": { - "version": "10.4.21", - "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.21.tgz", - "integrity": "sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/autoprefixer" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "browserslist": "^4.24.4", - "caniuse-lite": "^1.0.30001702", - "fraction.js": "^4.3.7", - "normalize-range": "^0.1.2", - "picocolors": "^1.1.1", - "postcss-value-parser": "^4.2.0" - }, - "bin": { - "autoprefixer": "bin/autoprefixer" - }, - "engines": { - "node": "^10 || ^12 || >=14" - }, - "peerDependencies": { - "postcss": "^8.1.0" - } - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true, - "license": "MIT" - }, - "node_modules/better-opn": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/better-opn/-/better-opn-3.0.2.tgz", - "integrity": "sha512-aVNobHnJqLiUelTaHat9DZ1qM2w0C0Eym4LPI/3JxOnSokGVdsl1T1kN7TFvsEAD8G47A6VKQ0TVHqbBnYMJlQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "open": "^8.0.4" - }, - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/braces": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", - "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", - "dev": true, - "license": "MIT", - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/browserslist": { - "version": "4.25.2", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.2.tgz", - "integrity": "sha512-0si2SJK3ooGzIawRu61ZdPCO1IncZwS8IzuX73sPZsXW6EQ/w/DAfPyKI8l1ETTCr2MnvqWitmlCUxgdul45jA==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "caniuse-lite": "^1.0.30001733", - "electron-to-chromium": "^1.5.199", - "node-releases": "^2.0.19", - "update-browserslist-db": "^1.1.3" - }, - "bin": { - "browserslist": "cli.js" - }, - "engines": { - "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" - } - }, - "node_modules/cac": { - "version": "6.7.14", - "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", - "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/callsites": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", - "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/camelcase-css": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", - "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/caniuse-lite": { - "version": "1.0.30001735", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001735.tgz", - "integrity": "sha512-EV/laoX7Wq2J9TQlyIXRxTJqIw4sxfXS4OYgudGxBYRuTv0q7AM6yMEpU/Vo1I94thg9U6EZ2NfZx9GJq83u7w==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/caniuse-lite" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "CC-BY-4.0" - }, - "node_modules/chai": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/chai/-/chai-5.2.1.tgz", - "integrity": "sha512-5nFxhUrX0PqtyogoYOA8IPswy5sZFTOsBFl/9bNsmDLgsxYTzSZQJDPppDnZPTQbzSEm0hqGjWPzRemQCYbD6A==", - "dev": true, - "license": "MIT", - "dependencies": { - "assertion-error": "^2.0.1", - "check-error": "^2.1.1", - "deep-eql": "^5.0.1", - "loupe": "^3.1.0", - "pathval": "^2.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/chalk": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", - "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^12.17.0 || ^14.13 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/chalk-template": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-1.1.2.tgz", - "integrity": "sha512-2bxTP2yUH7AJj/VAXfcA+4IcWGdQ87HwBANLt5XxGTeomo8yG0y95N1um9i5StvhT/Bl0/2cARA5v1PpPXUxUA==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.2.0" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/chalk/chalk-template?sponsor=1" - } - }, - "node_modules/check-error": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.1.tgz", - "integrity": "sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 16" - } - }, - "node_modules/chokidar": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", - "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", - "dev": true, - "license": "MIT", - "dependencies": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" - }, - "engines": { - "node": ">= 8.10.0" - }, - "funding": { - "url": "https://paulmillr.com/funding/" - }, - "optionalDependencies": { - "fsevents": "~2.3.2" - } - }, - "node_modules/chokidar/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/class-variance-authority": { - "version": "0.7.1", - "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.1.tgz", - "integrity": "sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "clsx": "^2.1.1" - }, - "funding": { - "url": "https://polar.sh/cva" - } - }, - "node_modules/classcat": { - "version": "5.0.5", - "resolved": "https://registry.npmjs.org/classcat/-/classcat-5.0.5.tgz", - "integrity": "sha512-JhZUT7JFcQy/EzW605k/ktHtncoo9vnyW/2GspNYwFlN1C/WmjuV/xtS04e9SOkL2sTdw0VAZ2UGCcQ9lR6p6w==", - "dev": true, - "license": "MIT" - }, - "node_modules/cli-cursor": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-5.0.0.tgz", - "integrity": "sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw==", - "dev": true, - "license": "MIT", - "dependencies": { - "restore-cursor": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-spinners": { - "version": "2.9.2", - "resolved": "https://registry.npmjs.org/cli-spinners/-/cli-spinners-2.9.2.tgz", - "integrity": "sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/clsx": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", - "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/cmdk": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/cmdk/-/cmdk-1.1.1.tgz", - "integrity": "sha512-Vsv7kFaXm+ptHDMZ7izaRsP70GgrW9NBNGswt9OZaVBLlE0SNpDq8eu/VGXyF9r7M0azK3Wy7OlYXsuyYLFzHg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "^1.1.1", - "@radix-ui/react-dialog": "^1.1.6", - "@radix-ui/react-id": "^1.1.0", - "@radix-ui/react-primitive": "^2.0.2" - }, - "peerDependencies": { - "react": "^18 || ^19 || ^19.0.0-rc", - "react-dom": "^18 || ^19 || ^19.0.0-rc" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true, - "license": "MIT" - }, - "node_modules/commander": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", - "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/convert-source-map": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", - "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", - "dev": true, - "license": "MIT" - }, - "node_modules/cosmiconfig": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz", - "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", - "dev": true, - "license": "MIT", - "dependencies": { - "env-paths": "^2.2.1", - "import-fresh": "^3.3.0", - "js-yaml": "^4.1.0", - "parse-json": "^5.2.0" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/d-fischer" - }, - "peerDependencies": { - "typescript": ">=4.9.5" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/cross-spawn": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", - "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/css.escape": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/css.escape/-/css.escape-1.5.1.tgz", - "integrity": "sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==", - "dev": true, - "license": "MIT" - }, - "node_modules/cssesc": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", - "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", - "dev": true, - "license": "MIT", - "bin": { - "cssesc": "bin/cssesc" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/csstype": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", - "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "dev": true, - "license": "MIT" - }, - "node_modules/d3-color": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", - "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-dispatch": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-dispatch/-/d3-dispatch-3.0.1.tgz", - "integrity": "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-drag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-drag/-/d3-drag-3.0.0.tgz", - "integrity": "sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-selection": "3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-ease": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz", - "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-interpolate": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", - "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-selection": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-timer": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz", - "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-transition": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-transition/-/d3-transition-3.0.1.tgz", - "integrity": "sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3", - "d3-dispatch": "1 - 3", - "d3-ease": "1 - 3", - "d3-interpolate": "1 - 3", - "d3-timer": "1 - 3" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "d3-selection": "2 - 3" - } - }, - "node_modules/d3-zoom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-zoom/-/d3-zoom-3.0.0.tgz", - "integrity": "sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==", - "dev": true, - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-drag": "2 - 3", - "d3-interpolate": "1 - 3", - "d3-selection": "2 - 3", - "d3-transition": "2 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/debug": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", - "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/deep-eql": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", - "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/define-lazy-prop": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz", - "integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/dequal": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", - "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/detect-node-es": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", - "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/didyoumean": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", - "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==", - "dev": true, - "license": "Apache-2.0" - }, - "node_modules/dlv": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", - "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==", - "dev": true, - "license": "MIT" - }, - "node_modules/doctrine": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", - "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "esutils": "^2.0.2" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/dom-accessibility-api": { - "version": "0.5.16", - "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz", - "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==", - "dev": true, - "license": "MIT" - }, - "node_modules/eastasianwidth": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", - "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "dev": true, - "license": "MIT" - }, - "node_modules/effect": { - "version": "3.17.13", - "resolved": "https://registry.npmjs.org/effect/-/effect-3.17.13.tgz", - "integrity": "sha512-JMz5oBxs/6mu4FP9Csjub4jYMUwMLrp+IzUmSDVIzn2NoeoyOXMl7x1lghfr3dLKWffWrdnv/d8nFFdgrHXPqw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@standard-schema/spec": "^1.0.0", - "fast-check": "^3.23.1" - } - }, - "node_modules/electron-to-chromium": { - "version": "1.5.201", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.201.tgz", - "integrity": "sha512-ZG65vsrLClodGqywuigc+7m0gr4ISoTQttfVh7nfpLv0M7SIwF4WbFNEOywcqTiujs12AUeeXbFyQieDICAIxg==", - "dev": true, - "license": "ISC" - }, - "node_modules/emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true, - "license": "MIT" - }, - "node_modules/enquirer": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.4.1.tgz", - "integrity": "sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-colors": "^4.1.1", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/enquirer/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/env-paths": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", - "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/error-ex": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", - "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-arrayish": "^0.2.1" - } - }, - "node_modules/es-module-lexer": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", - "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==", - "dev": true, - "license": "MIT" - }, - "node_modules/esbuild": { - "version": "0.25.9", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.9.tgz", - "integrity": "sha512-CRbODhYyQx3qp7ZEwzxOk4JBqmD/seJrzPa/cGjY1VtIn5E09Oi9/dB4JwctnfZ8Q8iT7rioVv5k/FNT/uf54g==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.25.9", - "@esbuild/android-arm": "0.25.9", - "@esbuild/android-arm64": "0.25.9", - "@esbuild/android-x64": "0.25.9", - "@esbuild/darwin-arm64": "0.25.9", - "@esbuild/darwin-x64": "0.25.9", - "@esbuild/freebsd-arm64": "0.25.9", - "@esbuild/freebsd-x64": "0.25.9", - "@esbuild/linux-arm": "0.25.9", - "@esbuild/linux-arm64": "0.25.9", - "@esbuild/linux-ia32": "0.25.9", - "@esbuild/linux-loong64": "0.25.9", - "@esbuild/linux-mips64el": "0.25.9", - "@esbuild/linux-ppc64": "0.25.9", - "@esbuild/linux-riscv64": "0.25.9", - "@esbuild/linux-s390x": "0.25.9", - "@esbuild/linux-x64": "0.25.9", - "@esbuild/netbsd-arm64": "0.25.9", - "@esbuild/netbsd-x64": "0.25.9", - "@esbuild/openbsd-arm64": "0.25.9", - "@esbuild/openbsd-x64": "0.25.9", - "@esbuild/openharmony-arm64": "0.25.9", - "@esbuild/sunos-x64": "0.25.9", - "@esbuild/win32-arm64": "0.25.9", - "@esbuild/win32-ia32": "0.25.9", - "@esbuild/win32-x64": "0.25.9" - } - }, - "node_modules/esbuild-register": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/esbuild-register/-/esbuild-register-3.6.0.tgz", - "integrity": "sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==", - "dev": true, - "license": "MIT", - "dependencies": { - "debug": "^4.3.4" - }, - "peerDependencies": { - "esbuild": ">=0.12 <1" - } - }, - "node_modules/escalade": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", - "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/eslint": { - "resolved": "../../node_modules/.pnpm/eslint@9.31.0_jiti@2.4.2/node_modules/eslint", - "link": true - }, - "node_modules/eslint-plugin-react-hooks": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-5.2.0.tgz", - "integrity": "sha512-+f15FfK64YQwZdJNELETdn5ibXEUQmW1DZL6KXhNnc2heoy/sg9VJJeT7n8TlMWouzWqSWavFkIhHyIbIAEapg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" - } - }, - "node_modules/eslint-plugin-storybook": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-9.1.5.tgz", - "integrity": "sha512-vCfaZ2Wk1N1vvK4vmNZoA6y2CYxJwbgIs6BE8/toPf4Z6hCAipoobP6a/30Rs0g/B2TSxTSj41TfrJKJrowpjQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/utils": "^8.8.1" - }, - "engines": { - "node": ">=20.0.0" - }, - "peerDependencies": { - "eslint": ">=8", - "storybook": "^9.1.5" - } - }, - "node_modules/eslint-visitor-keys": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", - "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/esprima": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", - "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, - "license": "BSD-2-Clause", - "bin": { - "esparse": "bin/esparse.js", - "esvalidate": "bin/esvalidate.js" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/estree-walker": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", - "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, - "node_modules/esutils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", - "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/expect-type": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz", - "integrity": "sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/fast-check": { - "version": "3.23.2", - "resolved": "https://registry.npmjs.org/fast-check/-/fast-check-3.23.2.tgz", - "integrity": "sha512-h5+1OzzfCC3Ef7VbtKdcv7zsstUQwUDlYpUTvjeUsJAssPgLn7QzbboPtL5ro04Mq0rPOsMzl7q5hIbRs2wD1A==", - "dev": true, - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/dubzzz" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/fast-check" - } - ], - "license": "MIT", - "dependencies": { - "pure-rand": "^6.1.0" - }, - "engines": { - "node": ">=8.0.0" - } - }, - "node_modules/fast-glob": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", - "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.8" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/fast-glob/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/fastq": { - "version": "1.19.1", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", - "integrity": "sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "reusify": "^1.0.4" - } - }, - "node_modules/fill-range": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", - "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", - "dev": true, - "license": "MIT", - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/find-up": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-7.0.0.tgz", - "integrity": "sha512-YyZM99iHrqLKjmt4LJDj58KI+fYyufRLBSYcqycxf//KpBk9FoewoGX0450m9nB44qrZnovzC2oeP5hUibxc/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "locate-path": "^7.2.0", - "path-exists": "^5.0.0", - "unicorn-magic": "^0.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/foreground-child": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", - "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", - "dev": true, - "license": "ISC", - "dependencies": { - "cross-spawn": "^7.0.6", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/fraction.js": { - "version": "4.3.7", - "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", - "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==", - "dev": true, - "license": "MIT", - "engines": { - "node": "*" - }, - "funding": { - "type": "patreon", - "url": "https://github.com/sponsors/rawify" - } - }, - "node_modules/fs-extra": { - "version": "11.3.1", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.1.tgz", - "integrity": "sha512-eXvGGwZ5CL17ZSwHWd3bbgk7UUpF6IFHtP57NYYakPvHOs8GDgDe5KJI36jIJzDkJ6eJjuzRA8eBQb6SkKue0g==", - "dev": true, - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=14.14" - } - }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/function-bind": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", - "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/fuse.js": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.1.0.tgz", - "integrity": "sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=10" - } - }, - "node_modules/gensync": { - "version": "1.0.0-beta.2", - "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", - "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/get-east-asian-width": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.3.1.tgz", - "integrity": "sha512-R1QfovbPsKmosqTnPoRFiJ7CF9MLRgb53ChvMZm+r4p76/+8yKDy17qLL2PKInORy2RkZZekuK0efYgmzTkXyQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/get-nonce": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz", - "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "dev": true, - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/glob-parent": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", - "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.3" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/globals": { - "version": "16.3.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-16.3.0.tgz", - "integrity": "sha512-bqWEnJ1Nt3neqx2q5SFfGS8r/ahumIakg3HcwtNlrVlwXIeNumWn/c7Pn/wKzGhf6SaW6H6uWXLqC30STCMchQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/globby": { - "version": "14.1.0", - "resolved": "https://registry.npmjs.org/globby/-/globby-14.1.0.tgz", - "integrity": "sha512-0Ia46fDOaT7k4og1PDW4YbodWWr3scS2vAr2lTbsplOt2WkKp0vQbkI9wKis/T5LV/dqPjO3bpS/z6GTJB82LA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@sindresorhus/merge-streams": "^2.1.0", - "fast-glob": "^3.3.3", - "ignore": "^7.0.3", - "path-type": "^6.0.0", - "slash": "^5.1.0", - "unicorn-magic": "^0.3.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/globby/node_modules/unicorn-magic": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.3.0.tgz", - "integrity": "sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", - "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/hosted-git-info": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-8.1.0.tgz", - "integrity": "sha512-Rw/B2DNQaPBICNXEm8balFz9a6WpZrkCGpcWFpy7nCj+NyhSdqXipmfvtmWt9xGfp0wZnBxB+iVpLmQMYt47Tw==", - "dev": true, - "license": "ISC", - "dependencies": { - "lru-cache": "^10.0.1" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/ignore": { - "version": "7.0.5", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", - "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/import-fresh": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", - "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "parent-module": "^1.0.0", - "resolve-from": "^4.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/indent-string": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", - "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/is-arrayish": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", - "dev": true, - "license": "MIT" - }, - "node_modules/is-binary-path": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", - "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", - "dev": true, - "license": "MIT", - "dependencies": { - "binary-extensions": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-core-module": { - "version": "2.16.1", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz", - "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==", - "dev": true, - "license": "MIT", - "dependencies": { - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-docker": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz", - "integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==", - "dev": true, - "license": "MIT", - "bin": { - "is-docker": "cli.js" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/is-glob": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", - "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-extglob": "^2.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-interactive": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-interactive/-/is-interactive-2.0.0.tgz", - "integrity": "sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/is-unicode-supported": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-2.1.0.tgz", - "integrity": "sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-wsl": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-2.2.0.tgz", - "integrity": "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-docker": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true, - "license": "ISC" - }, - "node_modules/jackspeak": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", - "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/cliui": "^8.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - }, - "optionalDependencies": { - "@pkgjs/parseargs": "^0.11.0" - } - }, - "node_modules/jiti": { - "version": "1.21.7", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz", - "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==", - "dev": true, - "license": "MIT", - "bin": { - "jiti": "bin/jiti.js" - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "dev": true, - "license": "MIT", - "dependencies": { - "argparse": "^2.0.1" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/jsesc": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", - "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", - "dev": true, - "license": "MIT", - "bin": { - "jsesc": "bin/jsesc" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/json-parse-even-better-errors": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", - "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", - "dev": true, - "license": "MIT" - }, - "node_modules/json5": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", - "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", - "dev": true, - "license": "MIT", - "bin": { - "json5": "lib/cli.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/jsonc-parser": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", - "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/jsonfile": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz", - "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==", - "dev": true, - "license": "MIT", - "dependencies": { - "universalify": "^2.0.0" - }, - "optionalDependencies": { - "graceful-fs": "^4.1.6" - } - }, - "node_modules/kleur": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", - "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/lilconfig": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", - "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antonk52" - } - }, - "node_modules/lines-and-columns": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", - "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", - "dev": true, - "license": "MIT" - }, - "node_modules/locate-path": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-7.2.0.tgz", - "integrity": "sha512-gvVijfZvn7R+2qyPX8mAuKcFGDf6Nc61GdvGafQsHL0sBIxfKzA+usWn4GFC/bk+QdwPUD4kWFJLhElipq+0VA==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-locate": "^6.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/lodash.castarray": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/lodash.castarray/-/lodash.castarray-4.4.0.tgz", - "integrity": "sha512-aVx8ztPv7/2ULbArGJ2Y42bG1mEQ5mGjpdvrbJcJFU3TbYybe+QlLS4pst9zV52ymy2in1KpFPiZnAOATxD4+Q==", - "dev": true, - "license": "MIT" - }, - "node_modules/lodash.isplainobject": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", - "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", - "dev": true, - "license": "MIT" - }, - "node_modules/lodash.merge": { - "version": "4.6.2", - "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", - "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/log-symbols": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-6.0.0.tgz", - "integrity": "sha512-i24m8rpwhmPIS4zscNzK6MSEhk0DUWa/8iYQWxhffV8jkI4Phvs3F+quL5xvS0gdQR0FyTCMMH33Y78dDTzzIw==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.3.0", - "is-unicode-supported": "^1.3.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/log-symbols/node_modules/is-unicode-supported": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-1.3.0.tgz", - "integrity": "sha512-43r2mRvz+8JRIKnWJ+3j8JtjRKZ6GmjzfaE/qiBJnikNnYv/6bagRJ1kUhNk8R5EX/GkobD+r+sfxCPJsiKBLQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/loupe": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.0.tgz", - "integrity": "sha512-2NCfZcT5VGVNX9mSZIxLRkEAegDGBpuQZBy13desuHeVORmBDyAET4TkJr4SjqQy3A8JDofMN6LpkK8Xcm/dlw==", - "dev": true, - "license": "MIT" - }, - "node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/lucide-react": { - "version": "0.542.0", - "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.542.0.tgz", - "integrity": "sha512-w3hD8/SQB7+lzU2r4VdFyzzOzKnUjTZIF/MQJGSSvni7Llewni4vuViRppfRAa2guOsY5k4jZyxw/i9DQHv+dw==", - "dev": true, - "license": "ISC", - "peerDependencies": { - "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/lz-string": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", - "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==", - "dev": true, - "license": "MIT", - "bin": { - "lz-string": "bin/bin.js" - } - }, - "node_modules/magic-string": { - "version": "0.30.17", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", - "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0" - } - }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/micromatch": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", - "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", - "dev": true, - "license": "MIT", - "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/mimic-function": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/mimic-function/-/mimic-function-5.0.1.tgz", - "integrity": "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/min-indent": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", - "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/mrmime": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-2.0.1.tgz", - "integrity": "sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - } - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, - "license": "MIT" - }, - "node_modules/mz": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", - "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0", - "object-assign": "^4.0.1", - "thenify-all": "^1.0.0" - } - }, - "node_modules/nanoid": { - "version": "3.3.11", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", - "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/node-releases": { - "version": "2.0.19", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz", - "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==", - "dev": true, - "license": "MIT" - }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/normalize-range": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", - "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/npm-package-arg": { - "version": "12.0.2", - "resolved": "https://registry.npmjs.org/npm-package-arg/-/npm-package-arg-12.0.2.tgz", - "integrity": "sha512-f1NpFjNI9O4VbKMOlA5QoBq/vSQPORHcTZ2feJpFkTHJ9eQkdlmZEKSjcAhxTGInC7RlEyScT9ui67NaOsjFWA==", - "dev": true, - "license": "ISC", - "dependencies": { - "hosted-git-info": "^8.0.0", - "proc-log": "^5.0.0", - "semver": "^7.3.5", - "validate-npm-package-name": "^6.0.0" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/onetime": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/onetime/-/onetime-7.0.0.tgz", - "integrity": "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "mimic-function": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/open": { - "version": "8.4.2", - "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz", - "integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-lazy-prop": "^2.0.0", - "is-docker": "^2.1.1", - "is-wsl": "^2.2.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/ora": { - "version": "8.2.0", - "resolved": "https://registry.npmjs.org/ora/-/ora-8.2.0.tgz", - "integrity": "sha512-weP+BZ8MVNnlCm8c0Qdc1WSWq4Qn7I+9CJGm7Qali6g44e/PUzbjNqJX5NJ9ljlNMosfJvg1fKEGILklK9cwnw==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.3.0", - "cli-cursor": "^5.0.0", - "cli-spinners": "^2.9.2", - "is-interactive": "^2.0.0", - "is-unicode-supported": "^2.0.0", - "log-symbols": "^6.0.0", - "stdin-discarder": "^0.2.2", - "string-width": "^7.2.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/ora/node_modules/emoji-regex": { - "version": "10.5.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.5.0.tgz", - "integrity": "sha512-lb49vf1Xzfx080OKA0o6l8DQQpV+6Vg95zyCJX9VB/BqKYlhG7N4wgROUUHRA+ZPUefLnteQOad7z1kT2bV7bg==", - "dev": true, - "license": "MIT" - }, - "node_modules/ora/node_modules/string-width": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz", - "integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^10.3.0", - "get-east-asian-width": "^1.0.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-limit": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-4.0.0.tgz", - "integrity": "sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "yocto-queue": "^1.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-locate": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-6.0.0.tgz", - "integrity": "sha512-wPrq66Llhl7/4AGC6I+cqxT07LhXvWL08LNXz1fENOw0Ap4sRZZ/gZpTTJ5jpurzzzfS2W/Ge9BY3LgLjCShcw==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-limit": "^4.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-map": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.3.tgz", - "integrity": "sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/package-json-from-dist": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", - "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "dev": true, - "license": "BlueOak-1.0.0" - }, - "node_modules/parent-module": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", - "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", - "dev": true, - "license": "MIT", - "dependencies": { - "callsites": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/parse-json": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", - "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.0.0", - "error-ex": "^1.3.1", - "json-parse-even-better-errors": "^2.3.0", - "lines-and-columns": "^1.1.6" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/path-exists": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-5.0.0.tgz", - "integrity": "sha512-RjhtfwJOxzcFmNOi6ltcbcu4Iu+FL3zEj83dk4kAS+fVpTxXLO1b38RvJgT/0QwvV/L3aY9TAnyv0EOqW4GoMQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - } - }, - "node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/path-parse": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", - "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", - "dev": true, - "license": "MIT" - }, - "node_modules/path-scurry": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", - "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^10.2.0", - "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" - }, - "engines": { - "node": ">=16 || 14 >=14.18" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/path-type": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-6.0.0.tgz", - "integrity": "sha512-Vj7sf++t5pBD637NSfkxpHSMfWaeig5+DKWLhcqIYx6mWQz5hdJTGDVMQiJcw1ZYkhs7AazKDGpRVji1LJCZUQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/pathe": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", - "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", - "dev": true, - "license": "MIT" - }, - "node_modules/pathval": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz", - "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 14.16" - } - }, - "node_modules/picocolors": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", - "dev": true, - "license": "ISC" - }, - "node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/pify": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", - "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/pirates": { - "version": "4.0.7", - "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz", - "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/playwright": { - "version": "1.55.0", - "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.55.0.tgz", - "integrity": "sha512-sdCWStblvV1YU909Xqx0DhOjPZE4/5lJsIS84IfN9dAZfcl/CIZ5O8l3o0j7hPMjDvqoTF8ZUcc+i/GL5erstA==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "playwright-core": "1.55.0" - }, - "bin": { - "playwright": "cli.js" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "fsevents": "2.3.2" - } - }, - "node_modules/playwright-core": { - "version": "1.55.0", - "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.55.0.tgz", - "integrity": "sha512-GvZs4vU3U5ro2nZpeiwyb0zuFaqb9sUiAJuyrWpcGouD8y9/HLgGbNRjIph7zU9D3hnPaisMl9zG9CgFi/biIg==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "playwright-core": "cli.js" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/playwright/node_modules/fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/postcss": { - "version": "8.5.6", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", - "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.11", - "picocolors": "^1.1.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/postcss-import": { - "version": "15.1.0", - "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz", - "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==", - "dev": true, - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.0.0", - "read-cache": "^1.0.0", - "resolve": "^1.1.7" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "postcss": "^8.0.0" - } - }, - "node_modules/postcss-js": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.0.1.tgz", - "integrity": "sha512-dDLF8pEO191hJMtlHFPRa8xsizHaM82MLfNkUHdUtVEV3tgTp5oj+8qbEqYM57SLfc74KSbw//4SeJma2LRVIw==", - "dev": true, - "license": "MIT", - "dependencies": { - "camelcase-css": "^2.0.1" - }, - "engines": { - "node": "^12 || ^14 || >= 16" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - "peerDependencies": { - "postcss": "^8.4.21" - } - }, - "node_modules/postcss-load-config": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", - "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "lilconfig": "^3.0.0", - "yaml": "^2.3.4" - }, - "engines": { - "node": ">= 14" - }, - "peerDependencies": { - "postcss": ">=8.0.9", - "ts-node": ">=9.0.0" - }, - "peerDependenciesMeta": { - "postcss": { - "optional": true - }, - "ts-node": { - "optional": true - } - } - }, - "node_modules/postcss-nested": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz", - "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "postcss-selector-parser": "^6.1.1" - }, - "engines": { - "node": ">=12.0" - }, - "peerDependencies": { - "postcss": "^8.2.14" - } - }, - "node_modules/postcss-nested/node_modules/postcss-selector-parser": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", - "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-selector-parser": { - "version": "6.0.10", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.10.tgz", - "integrity": "sha512-IQ7TZdoaqbT+LCpShg46jnZVlhWD2w6iQYAcYXfHARZ7X1t/UGhhceQDs5X0cGqKvYlHNOuv7Oa1xmb0oQuA3w==", - "dev": true, - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-value-parser": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", - "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/pretty-format": { - "version": "27.5.1", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz", - "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1", - "ansi-styles": "^5.0.0", - "react-is": "^17.0.1" - }, - "engines": { - "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0" - } - }, - "node_modules/prism-react-renderer": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/prism-react-renderer/-/prism-react-renderer-2.4.1.tgz", - "integrity": "sha512-ey8Ls/+Di31eqzUxC46h8MksNuGx/n0AAC8uKpwFau4RPDYLuE3EXTp8N8G2vX2N7UC/+IXeNUnlWBGGcAG+Ig==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/prismjs": "^1.26.0", - "clsx": "^2.0.0" - }, - "peerDependencies": { - "react": ">=16.0.0" - } - }, - "node_modules/proc-log": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/proc-log/-/proc-log-5.0.0.tgz", - "integrity": "sha512-Azwzvl90HaF0aCz1JrDdXQykFakSSNPaPoiZ9fm5qJIMHioDZEi7OAdRwSm6rSoPtY3Qutnm3L7ogmg3dc+wbQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/prompts": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", - "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "kleur": "^3.0.3", - "sisteransi": "^1.0.5" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/pure-rand": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz", - "integrity": "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==", - "dev": true, - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/dubzzz" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/fast-check" - } - ], - "license": "MIT" - }, - "node_modules/queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react-docgen": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/react-docgen/-/react-docgen-8.0.0.tgz", - "integrity": "sha512-kmob/FOTwep7DUWf9KjuenKX0vyvChr3oTdvvPt09V60Iz75FJp+T/0ZeHMbAfJj2WaVWqAPP5Hmm3PYzSPPKg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.18.9", - "@babel/traverse": "^7.18.9", - "@babel/types": "^7.18.9", - "@types/babel__core": "^7.18.0", - "@types/babel__traverse": "^7.18.0", - "@types/doctrine": "^0.0.9", - "@types/resolve": "^1.20.2", - "doctrine": "^3.0.0", - "resolve": "^1.22.1", - "strip-indent": "^4.0.0" - }, - "engines": { - "node": "^20.9.0 || >=22" - } - }, - "node_modules/react-docgen-typescript": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/react-docgen-typescript/-/react-docgen-typescript-2.4.0.tgz", - "integrity": "sha512-ZtAp5XTO5HRzQctjPU0ybY0RRCQO19X/8fxn3w7y2VVTUbGHDKULPTL4ky3vB05euSgG5NpALhEhDPvQ56wvXg==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "typescript": ">= 4.3.x" - } - }, - "node_modules/react-docgen/node_modules/strip-indent": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-4.0.0.tgz", - "integrity": "sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA==", - "dev": true, - "license": "MIT", - "dependencies": { - "min-indent": "^1.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/react-dom": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", - "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", - "dev": true, - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0", - "scheduler": "^0.23.2" - }, - "peerDependencies": { - "react": "^18.3.1" - } - }, - "node_modules/react-is": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", - "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==", - "dev": true, - "license": "MIT" - }, - "node_modules/react-remove-scroll": { - "version": "2.7.1", - "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.7.1.tgz", - "integrity": "sha512-HpMh8+oahmIdOuS5aFKKY6Pyog+FNaZV/XyJOq7b4YFwsFHe5yYfdbIalI4k3vU2nSDql7YskmUseHsRrJqIPA==", - "dev": true, - "license": "MIT", - "dependencies": { - "react-remove-scroll-bar": "^2.3.7", - "react-style-singleton": "^2.2.3", - "tslib": "^2.1.0", - "use-callback-ref": "^1.3.3", - "use-sidecar": "^1.1.3" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-remove-scroll-bar": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz", - "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "react-style-singleton": "^2.2.2", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-style-singleton": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz", - "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "get-nonce": "^1.0.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/read-cache": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", - "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", - "dev": true, - "license": "MIT", - "dependencies": { - "pify": "^2.3.0" - } - }, - "node_modules/read-yaml-file": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/read-yaml-file/-/read-yaml-file-2.1.0.tgz", - "integrity": "sha512-UkRNRIwnhG+y7hpqnycCL/xbTk7+ia9VuVTC0S+zVbwd65DI9eUpRMfsWIGrCWxTU/mi+JW8cHQCrv+zfCbEPQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "js-yaml": "^4.0.0", - "strip-bom": "^4.0.0" - }, - "engines": { - "node": ">=10.13" - } - }, - "node_modules/read-yaml-file/node_modules/strip-bom": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz", - "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/readdirp": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", - "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "picomatch": "^2.2.1" - }, - "engines": { - "node": ">=8.10.0" - } - }, - "node_modules/recast": { - "version": "0.23.11", - "resolved": "https://registry.npmjs.org/recast/-/recast-0.23.11.tgz", - "integrity": "sha512-YTUo+Flmw4ZXiWfQKGcwwc11KnoRAYgzAE2E7mXKCjSviTKShtxBsN6YUUBB2gtaBzKzeKunxhUwNHQuRryhWA==", - "dev": true, - "license": "MIT", - "dependencies": { - "ast-types": "^0.16.1", - "esprima": "~4.0.0", - "source-map": "~0.6.1", - "tiny-invariant": "^1.3.3", - "tslib": "^2.0.1" - }, - "engines": { - "node": ">= 4" - } - }, - "node_modules/redent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz", - "integrity": "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==", - "dev": true, - "license": "MIT", - "dependencies": { - "indent-string": "^4.0.0", - "strip-indent": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/resolve": { - "version": "1.22.10", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.10.tgz", - "integrity": "sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-core-module": "^2.16.0", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve-from": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", - "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/restore-cursor": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-5.1.0.tgz", - "integrity": "sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA==", - "dev": true, - "license": "MIT", - "dependencies": { - "onetime": "^7.0.0", - "signal-exit": "^4.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/reusify": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", - "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", - "dev": true, - "license": "MIT", - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" - } - }, - "node_modules/run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "queue-microtask": "^1.2.2" - } - }, - "node_modules/scheduler": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", - "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0" - } - }, - "node_modules/semver": { - "version": "7.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz", - "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, - "license": "MIT", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/siginfo": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", - "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==", - "dev": true, - "license": "ISC" - }, - "node_modules/signal-exit": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", - "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sirv": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/sirv/-/sirv-3.0.2.tgz", - "integrity": "sha512-2wcC/oGxHis/BoHkkPwldgiPSYcpZK3JU28WoMVv55yHJgcZ8rlXvuG9iZggz+sU1d4bRgIGASwyWqjxu3FM0g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@polka/url": "^1.0.0-next.24", - "mrmime": "^2.0.0", - "totalist": "^3.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/sisteransi": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", - "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", - "dev": true, - "license": "MIT" - }, - "node_modules/slash": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-5.1.0.tgz", - "integrity": "sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/stackback": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", - "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==", - "dev": true, - "license": "MIT" - }, - "node_modules/std-env": { - "version": "3.9.0", - "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.9.0.tgz", - "integrity": "sha512-UGvjygr6F6tpH7o2qyqR6QYpwraIjKSdtzyBdyytFOHmPZY917kwdwLG0RbOjWOnKmnm3PeHjaoLLMie7kPLQw==", - "dev": true, - "license": "MIT" - }, - "node_modules/stdin-discarder": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/stdin-discarder/-/stdin-discarder-0.2.2.tgz", - "integrity": "sha512-UhDfHmA92YAlNnCfhmq0VeNL5bDbiZGg7sZ2IvPsXubGkiNa9EC+tUTsjBRsYUAz87btI6/1wf4XoVvQ3uRnmQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/storybook": { - "version": "9.1.5", - "resolved": "https://registry.npmjs.org/storybook/-/storybook-9.1.5.tgz", - "integrity": "sha512-cGwJ2AE6nxlwqQlOiI+HKX5qa7+FOV7Ha7Qa+GoASBIQSSnLfbY6UldgAxHCJGJOFtgW/wuqfDtNvni6sj1/OQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@storybook/global": "^5.0.0", - "@testing-library/jest-dom": "^6.6.3", - "@testing-library/user-event": "^14.6.1", - "@vitest/expect": "3.2.4", - "@vitest/mocker": "3.2.4", - "@vitest/spy": "3.2.4", - "better-opn": "^3.0.2", - "esbuild": "^0.18.0 || ^0.19.0 || ^0.20.0 || ^0.21.0 || ^0.22.0 || ^0.23.0 || ^0.24.0 || ^0.25.0", - "esbuild-register": "^3.5.0", - "recast": "^0.23.5", - "semver": "^7.6.2", - "ws": "^8.18.0" - }, - "bin": { - "storybook": "bin/index.cjs" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/storybook" - }, - "peerDependencies": { - "prettier": "^2 || ^3" - }, - "peerDependenciesMeta": { - "prettier": { - "optional": true - } - } - }, - "node_modules/string-width": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", - "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", - "dev": true, - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^9.2.2", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/string-width-cjs": { - "name": "string-width", - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/string-width-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", - "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/strip-ansi-cjs": { - "name": "strip-ansi", - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi/node_modules/ansi-regex": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", - "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/strip-indent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", - "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "min-indent": "^1.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-literal": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.0.0.tgz", - "integrity": "sha512-TcccoMhJOM3OebGhSBEmp3UZ2SfDMZUEBdRA/9ynfLi8yYajyWX3JiXArcJt4Umh4vISpspkQIY8ZZoCqjbviA==", - "dev": true, - "license": "MIT", - "dependencies": { - "js-tokens": "^9.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/strip-literal/node_modules/js-tokens": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", - "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/sucrase": { - "version": "3.35.0", - "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", - "integrity": "sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.2", - "commander": "^4.0.0", - "glob": "^10.3.10", - "lines-and-columns": "^1.1.6", - "mz": "^2.7.0", - "pirates": "^4.0.1", - "ts-interface-checker": "^0.1.9" - }, - "bin": { - "sucrase": "bin/sucrase", - "sucrase-node": "bin/sucrase-node" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/supports-preserve-symlinks-flag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", - "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/syncpack": { - "version": "13.0.4", - "resolved": "https://registry.npmjs.org/syncpack/-/syncpack-13.0.4.tgz", - "integrity": "sha512-kJ9VlRxNCsBD5pJAE29oXeBYbPLhEySQmK4HdpsLv81I6fcDDW17xeJqMwiU3H7/woAVsbgq25DJNS8BeiN5+w==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^5.4.1", - "chalk-template": "^1.1.0", - "commander": "^13.1.0", - "cosmiconfig": "^9.0.0", - "effect": "^3.13.7", - "enquirer": "^2.4.1", - "fast-check": "^3.23.2", - "globby": "^14.1.0", - "jsonc-parser": "^3.3.1", - "minimatch": "9.0.5", - "npm-package-arg": "^12.0.2", - "ora": "^8.2.0", - "prompts": "^2.4.2", - "read-yaml-file": "^2.1.0", - "semver": "^7.7.1", - "tightrope": "0.2.0", - "ts-toolbelt": "^9.6.0" - }, - "bin": { - "syncpack": "dist/bin.js", - "syncpack-fix-mismatches": "dist/bin-fix-mismatches/index.js", - "syncpack-format": "dist/bin-format/index.js", - "syncpack-lint": "dist/bin-lint/index.js", - "syncpack-lint-semver-ranges": "dist/bin-lint-semver-ranges/index.js", - "syncpack-list": "dist/bin-list/index.js", - "syncpack-list-mismatches": "dist/bin-list-mismatches/index.js", - "syncpack-prompt": "dist/bin-prompt/index.js", - "syncpack-set-semver-ranges": "dist/bin-set-semver-ranges/index.js", - "syncpack-update": "dist/bin-update/index.js" - }, - "engines": { - "node": ">=18.18.0" - } - }, - "node_modules/syncpack/node_modules/commander": { - "version": "13.1.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-13.1.0.tgz", - "integrity": "sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/tailwind-merge": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.1.tgz", - "integrity": "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g==", - "dev": true, - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/dcastil" - } - }, - "node_modules/tailwind-scrollbar": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/tailwind-scrollbar/-/tailwind-scrollbar-4.0.2.tgz", - "integrity": "sha512-wAQiIxAPqk0MNTPptVe/xoyWi27y+NRGnTwvn4PQnbvB9kp8QUBiGl/wsfoVBHnQxTmhXJSNt9NHTmcz9EivFA==", - "dev": true, - "license": "MIT", - "dependencies": { - "prism-react-renderer": "^2.4.1" - }, - "engines": { - "node": ">=12.13.0" - }, - "peerDependencies": { - "tailwindcss": "4.x" - } - }, - "node_modules/tailwindcss": { - "version": "3.4.17", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.17.tgz", - "integrity": "sha512-w33E2aCvSDP0tW9RZuNXadXlkHXqFzSkQew/aIa2i/Sj8fThxwovwlXHSPXTbAHwEIhBFXAedUhP2tueAKP8Og==", - "dev": true, - "license": "MIT", - "dependencies": { - "@alloc/quick-lru": "^5.2.0", - "arg": "^5.0.2", - "chokidar": "^3.6.0", - "didyoumean": "^1.2.2", - "dlv": "^1.1.3", - "fast-glob": "^3.3.2", - "glob-parent": "^6.0.2", - "is-glob": "^4.0.3", - "jiti": "^1.21.6", - "lilconfig": "^3.1.3", - "micromatch": "^4.0.8", - "normalize-path": "^3.0.0", - "object-hash": "^3.0.0", - "picocolors": "^1.1.1", - "postcss": "^8.4.47", - "postcss-import": "^15.1.0", - "postcss-js": "^4.0.1", - "postcss-load-config": "^4.0.2", - "postcss-nested": "^6.2.0", - "postcss-selector-parser": "^6.1.2", - "resolve": "^1.22.8", - "sucrase": "^3.35.0" - }, - "bin": { - "tailwind": "lib/cli.js", - "tailwindcss": "lib/cli.js" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/tailwindcss/node_modules/postcss-selector-parser": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", - "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/thenify": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", - "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", - "dev": true, - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0" - } - }, - "node_modules/thenify-all": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", - "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", - "dev": true, - "license": "MIT", - "dependencies": { - "thenify": ">= 3.1.0 < 4" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/tightrope": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/tightrope/-/tightrope-0.2.0.tgz", - "integrity": "sha512-Kw36UHxJEELq2VUqdaSGR2/8cAsPgMtvX8uGVU6Jk26O66PhXec0A5ZnRYs47btbtwPDpXXF66+Fo3vimCM9aQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=16" - } - }, - "node_modules/tiny-invariant": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", - "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", - "dev": true, - "license": "MIT" - }, - "node_modules/tinybench": { - "version": "2.9.0", - "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", - "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==", - "dev": true, - "license": "MIT" - }, - "node_modules/tinyexec": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", - "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==", - "dev": true, - "license": "MIT" - }, - "node_modules/tinyglobby": { - "version": "0.2.14", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz", - "integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "fdir": "^6.4.4", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=12.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/SuperchupuDev" - } - }, - "node_modules/tinyglobby/node_modules/fdir": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", - "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12.0.0" - }, - "peerDependencies": { - "picomatch": "^3 || ^4" - }, - "peerDependenciesMeta": { - "picomatch": { - "optional": true - } - } - }, - "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/tinypool": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", - "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.0.0 || >=20.0.0" - } - }, - "node_modules/tinyrainbow": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-2.0.0.tgz", - "integrity": "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/tinyspy": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.3.tgz", - "integrity": "sha512-t2T/WLB2WRgZ9EpE4jgPJ9w+i66UZfDc8wHh0xrwiRNN+UwH98GIJkTeZqX9rg0i0ptwzqW+uYeIF0T4F8LR7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/totalist": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/totalist/-/totalist-3.0.1.tgz", - "integrity": "sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/ts-api-utils": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz", - "integrity": "sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18.12" - }, - "peerDependencies": { - "typescript": ">=4.8.4" - } - }, - "node_modules/ts-dedent": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/ts-dedent/-/ts-dedent-2.2.0.tgz", - "integrity": "sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.10" - } - }, - "node_modules/ts-interface-checker": { - "version": "0.1.13", - "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", - "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==", - "dev": true, - "license": "Apache-2.0" - }, - "node_modules/ts-toolbelt": { - "version": "9.6.0", - "resolved": "https://registry.npmjs.org/ts-toolbelt/-/ts-toolbelt-9.6.0.tgz", - "integrity": "sha512-nsZd8ZeNUzukXPlJmTBwUAuABDe/9qtVDelJeT/qW0ow3ZS3BsQJtNkan1802aM9Uf68/Y8ljw86Hu0h5IUW3w==", - "dev": true, - "license": "Apache-2.0" - }, - "node_modules/tsconfig-paths": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-4.2.0.tgz", - "integrity": "sha512-NoZ4roiN7LnbKn9QqE1amc9DJfzvZXxF4xDavcOWt1BPkdx+m+0gJuPM+S0vCe7zTJMYUP0R8pO2XMr+Y8oLIg==", - "dev": true, - "license": "MIT", - "dependencies": { - "json5": "^2.2.2", - "minimist": "^1.2.6", - "strip-bom": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "dev": true, - "license": "0BSD" - }, - "node_modules/typescript": { - "resolved": "../../node_modules/.pnpm/typescript@5.8.3/node_modules/typescript", - "link": true - }, - "node_modules/typescript-eslint": { - "resolved": "../../node_modules/.pnpm/typescript-eslint@8.38.0_eslint@9.31.0_jiti@2.4.2__typescript@5.8.3/node_modules/typescript-eslint", - "link": true - }, - "node_modules/undici-types": { - "version": "6.21.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", - "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/unicorn-magic": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.1.0.tgz", - "integrity": "sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/universalify": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", - "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 10.0.0" - } - }, - "node_modules/unplugin": { - "version": "1.16.1", - "resolved": "https://registry.npmjs.org/unplugin/-/unplugin-1.16.1.tgz", - "integrity": "sha512-4/u/j4FrCKdi17jaxuJA0jClGxB1AvU2hw/IuayPc4ay1XGaJs/rbb4v5WKwAjNifjmXK9PIFyuPiaK8azyR9w==", - "dev": true, - "license": "MIT", - "dependencies": { - "acorn": "^8.14.0", - "webpack-virtual-modules": "^0.6.2" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/update-browserslist-db": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz", - "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "escalade": "^3.2.0", - "picocolors": "^1.1.1" - }, - "bin": { - "update-browserslist-db": "cli.js" - }, - "peerDependencies": { - "browserslist": ">= 4.21.0" - } - }, - "node_modules/use-callback-ref": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz", - "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==", - "dev": true, - "license": "MIT", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sidecar": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz", - "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "detect-node-es": "^1.1.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sync-external-store": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.5.0.tgz", - "integrity": "sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "dev": true, - "license": "MIT" - }, - "node_modules/validate-npm-package-name": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/validate-npm-package-name/-/validate-npm-package-name-6.0.2.tgz", - "integrity": "sha512-IUoow1YUtvoBBC06dXs8bR8B9vuA3aJfmQNKMoaPG/OFsPmoQvw8xh+6Ye25Gx9DQhoEom3Pcu9MKHerm/NpUQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/vite": { - "resolved": "../../node_modules/.pnpm/vite@6.3.5_@types+node@24.1.0_jiti@2.4.2_lightningcss@1.30.1_terser@5.43.1_tsx@4.20.3_yaml@2.8.0/node_modules/vite", - "link": true - }, - "node_modules/vite-node": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz", - "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cac": "^6.7.14", - "debug": "^4.4.1", - "es-module-lexer": "^1.7.0", - "pathe": "^2.0.3", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" - }, - "bin": { - "vite-node": "vite-node.mjs" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/vite-plugin-dts": { - "resolved": "../../node_modules/.pnpm/vite-plugin-dts@4.5.4_@types+node@24.1.0_rollup@4.45.1_typescript@5.8.3_vite@6.3.5_@types+nod_ddgp24sr5pf6ze3b5hs7mrzr5e/node_modules/vite-plugin-dts", - "link": true - }, - "node_modules/vite-plugin-static-copy": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/vite-plugin-static-copy/-/vite-plugin-static-copy-3.1.2.tgz", - "integrity": "sha512-aVmYOzptLVOI2b1jL+cmkF7O6uhRv1u5fvOkQgbohWZp2CbR22kn9ZqkCUIt9umKF7UhdbsEpshn1rf4720QFg==", - "dev": true, - "license": "MIT", - "dependencies": { - "chokidar": "^3.6.0", - "fs-extra": "^11.3.0", - "p-map": "^7.0.3", - "picocolors": "^1.1.1", - "tinyglobby": "^0.2.14" - }, - "engines": { - "node": "^18.0.0 || >=20.0.0" - }, - "peerDependencies": { - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/vitest": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", - "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/chai": "^5.2.2", - "@vitest/expect": "3.2.4", - "@vitest/mocker": "3.2.4", - "@vitest/pretty-format": "^3.2.4", - "@vitest/runner": "3.2.4", - "@vitest/snapshot": "3.2.4", - "@vitest/spy": "3.2.4", - "@vitest/utils": "3.2.4", - "chai": "^5.2.0", - "debug": "^4.4.1", - "expect-type": "^1.2.1", - "magic-string": "^0.30.17", - "pathe": "^2.0.3", - "picomatch": "^4.0.2", - "std-env": "^3.9.0", - "tinybench": "^2.9.0", - "tinyexec": "^0.3.2", - "tinyglobby": "^0.2.14", - "tinypool": "^1.1.1", - "tinyrainbow": "^2.0.0", - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0", - "vite-node": "3.2.4", - "why-is-node-running": "^2.3.0" - }, - "bin": { - "vitest": "vitest.mjs" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "@edge-runtime/vm": "*", - "@types/debug": "^4.1.12", - "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", - "@vitest/browser": "3.2.4", - "@vitest/ui": "3.2.4", - "happy-dom": "*", - "jsdom": "*" - }, - "peerDependenciesMeta": { - "@edge-runtime/vm": { - "optional": true - }, - "@types/debug": { - "optional": true - }, - "@types/node": { - "optional": true - }, - "@vitest/browser": { - "optional": true - }, - "@vitest/ui": { - "optional": true - }, - "happy-dom": { - "optional": true - }, - "jsdom": { - "optional": true - } - } - }, - "node_modules/vitest/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/webpack-virtual-modules": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/webpack-virtual-modules/-/webpack-virtual-modules-0.6.2.tgz", - "integrity": "sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, - "license": "ISC", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/why-is-node-running": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz", - "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==", - "dev": true, - "license": "MIT", - "dependencies": { - "siginfo": "^2.0.0", - "stackback": "0.0.2" - }, - "bin": { - "why-is-node-running": "cli.js" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", - "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.1.0", - "string-width": "^5.0.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs": { - "name": "wrap-ansi", - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/wrap-ansi-cjs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-styles": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", - "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/ws": { - "version": "8.18.3", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", - "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/yallist": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", - "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", - "dev": true, - "license": "ISC" - }, - "node_modules/yaml": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", - "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", - "dev": true, - "license": "ISC", - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - } - }, - "node_modules/yocto-queue": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.1.tgz", - "integrity": "sha512-AyeEbWOu/TAXdxlV9wmGcR0+yh2j3vYPGOECcIj2S7MkrLyC7ne+oye2BKTItt0ii2PHk4cDy+95+LshzbXnGg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12.20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/zustand": { - "version": "4.5.7", - "resolved": "https://registry.npmjs.org/zustand/-/zustand-4.5.7.tgz", - "integrity": "sha512-CHOUy7mu3lbD6o6LJLfllpjkzhHXSBlX8B9+qPddUsIfeF5S/UZ5q0kmCsnRqT1UHFQZchNFDDzMbQsuesHWlw==", - "dev": true, - "license": "MIT", - "dependencies": { - "use-sync-external-store": "^1.2.2" - }, - "engines": { - "node": ">=12.7.0" - }, - "peerDependencies": { - "@types/react": ">=16.8", - "immer": ">=9.0.6", - "react": ">=16.8" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "immer": { - "optional": true - }, - "react": { - "optional": true - } - } - } - } -} diff --git a/web/common/package.json b/web/common/package.json index f576696b61..924bbaa883 100644 --- a/web/common/package.json +++ b/web/common/package.json @@ -2,46 +2,53 @@ "name": "@tobikodata/sqlmesh-common", "version": "0.0.1", "devDependencies": { - "@eslint/js": "^9.31.0", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@storybook/addon-docs": "^9.1.5", - "@storybook/react-vite": "^9.1.5", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@testing-library/dom": "^10.4.1", - "@testing-library/jest-dom": "^6.6.3", - "@testing-library/react": "^16.3.0", - "@types/node": "^20.11.25", - "@types/react": "^18.3.23", - "@types/react-dom": "^18.3.7", - "@vitejs/plugin-react": "^4.7.0", - "@vitest/browser": "^3.2.4", - "@xyflow/react": "^12.8.4", - "autoprefixer": "^10.4.21", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "eslint": "^9.31.0", - "eslint-plugin-react-hooks": "^5.2.0", - "eslint-plugin-storybook": "^9.1.5", - "fuse.js": "^7.1.0", - "globals": "^16.3.0", - "lucide-react": "^0.542.0", - "playwright": "^1.54.1", - "postcss": "^8.5.6", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "storybook": "^9.1.5", - "syncpack": "^13.0.4", - "tailwind-merge": "^3.3.1", - "tailwind-scrollbar": "^3.1.0", - "tailwindcss": "^3.4.17", - "typescript": "^5.8.3", - "typescript-eslint": "^8.38.0", - "vite": "^6.3.5", - "vite-plugin-dts": "^4.5.4", - "vite-plugin-static-copy": "^3.1.1", - "vitest": "^3.2.4" + "@eslint/js": "9.31.0", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-tooltip": "1.2.8", + "@storybook/addon-docs": "9.1.5", + "@storybook/react-vite": "9.1.5", + "@tailwindcss/typography": "0.5.16", + "@tanstack/react-virtual": "3.13.12", + "@testing-library/dom": "10.4.1", + "@testing-library/jest-dom": "6.6.3", + "@testing-library/react": "16.3.0", + "@testing-library/user-event": "14.6.1", + "@types/dagre": "0.7.53", + "@types/node": "20.11.25", + "@types/react": "18.3.23", + "@types/react-dom": "18.3.7", + "@vitejs/plugin-react": "4.7.0", + "@vitest/browser": "3.2.4", + "@xyflow/react": "12.8.4", + "autoprefixer": "10.4.21", + "browserslist": "4.26.2", + "caniuse-lite": "1.0.30001746", + "class-variance-authority": "0.7.1", + "clsx": "2.1.1", + "cronstrue": "3.3.0", + "dagre": "0.8.5", + "deepmerge": "4.3.1", + "eslint": "9.31.0", + "eslint-plugin-react-hooks": "5.2.0", + "eslint-plugin-storybook": "9.1.5", + "fuse.js": "7.1.0", + "globals": "16.3.0", + "lucide-react": "0.542.0", + "playwright": "1.54.1", + "postcss": "8.5.6", + "react": "18.3.1", + "react-dom": "18.3.1", + "storybook": "9.1.5", + "syncpack": "13.0.4", + "tailwind-merge": "3.3.1", + "tailwind-scrollbar": "3.1.0", + "tailwindcss": "3.4.17", + "typescript": "5.8.3", + "typescript-eslint": "8.38.0", + "vite": "6.3.5", + "vite-plugin-dts": "4.5.4", + "vite-plugin-static-copy": "3.1.1", + "vitest": "3.2.4" }, "exports": { ".": { @@ -56,7 +63,17 @@ }, "./styles/*": "./dist/styles/*", "./design/*": "./dist/styles/design/*", - "./configs/*": "./dist/configs/*" + "./configs/*": "./dist/configs/*", + "./lineage": { + "import": { + "types": "./dist/lineage/index.d.ts", + "default": "./dist/lineage/index.es.js" + }, + "require": { + "types": "./dist/lineage/index.d.ts", + "default": "./dist/lineage/index.umd.js" + } + } }, "files": [ "/dist" @@ -65,22 +82,26 @@ "main": "dist/sqlmesh-common.umd.js", "module": "dist/sqlmesh-common.es.js", "peerDependencies": { - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@tailwindcss/typography": "^0.5.16", - "@tanstack/react-virtual": "^3.13.12", - "@xyflow/react": "^12.8.4", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "fuse.js": "^7.1.0", - "lucide-react": "^0.542.0", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "tailwind-merge": "^3.3.1", - "tailwindcss": "^3.4.17" + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-tooltip": "1.2.8", + "@tailwindcss/typography": "0.5.16", + "@tanstack/react-virtual": "3.13.12", + "@xyflow/react": "12.8.4", + "class-variance-authority": "0.7.1", + "clsx": "2.1.1", + "cronstrue": "3.3.0", + "dagre": "0.8.5", + "deepmerge": "4.3.1", + "fuse.js": "7.1.0", + "lucide-react": "0.542.0", + "react": "18.3.1", + "react-dom": "18.3.1", + "tailwind-merge": "3.3.1", + "tailwind-scrollbar": "3.1.0", + "tailwindcss": "3.4.17" }, "private": false, - "repository": "TobikoData/sqlmesh", + "repository": "SQLMesh/sqlmesh", "scripts": { "build": "tsc -p tsconfig.build.json && vite build --base './' && pnpm run build:css", "build-storybook": "storybook build", @@ -92,6 +113,7 @@ "storybook": "storybook dev -p 6006", "syncpack": "syncpack lint", "syncpack:fix": "syncpack fix-mismatches", + "syncpack:format": "syncpack format", "syncpack:list": "syncpack list-mismatches", "test": "vitest", "test:ui": "vitest --ui", diff --git a/web/common/src/components/Badge/Badge.css b/web/common/src/components/Badge/Badge.css index 582a1264fb..0efef35e41 100644 --- a/web/common/src/components/Badge/Badge.css +++ b/web/common/src/components/Badge/Badge.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-badge-background: var(--color-neutral-100); --color-badge-foreground: var(--color-prose); } diff --git a/web/common/src/components/Badge/Badge.stories.tsx b/web/common/src/components/Badge/Badge.stories.tsx index 09754d29a8..143440037e 100644 --- a/web/common/src/components/Badge/Badge.stories.tsx +++ b/web/common/src/components/Badge/Badge.stories.tsx @@ -1,6 +1,6 @@ import type { Meta, StoryObj } from '@storybook/react-vite' -import type { Shape, Size } from '@/types' +import type { Shape, Size } from '@sqlmesh-common/types' import { Badge } from './Badge' const meta: Meta = { diff --git a/web/common/src/components/Badge/Badge.tsx b/web/common/src/components/Badge/Badge.tsx index cd6df21c26..2bc23940ad 100644 --- a/web/common/src/components/Badge/Badge.tsx +++ b/web/common/src/components/Badge/Badge.tsx @@ -1,8 +1,8 @@ import { Slot } from '@radix-ui/react-slot' import React from 'react' -import type { Shape, Size } from '@/types' -import { cn } from '@/utils' +import type { Shape, Size } from '@sqlmesh-common/types' +import { cn } from '@sqlmesh-common/utils' import { cva } from 'class-variance-authority' import './Badge.css' diff --git a/web/common/src/components/Button/Button.css b/web/common/src/components/Button/Button.css index 7e8b856bf3..a95397dabb 100644 --- a/web/common/src/components/Button/Button.css +++ b/web/common/src/components/Button/Button.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-button-primary-background: var(--color-action); --color-button-primary-foreground: var(--color-light); --color-button-primary-hover: var(--color-action-hover); diff --git a/web/common/src/components/Button/Button.stories.tsx b/web/common/src/components/Button/Button.stories.tsx index 57fb9f26e2..8836a35a5c 100644 --- a/web/common/src/components/Button/Button.stories.tsx +++ b/web/common/src/components/Button/Button.stories.tsx @@ -1,5 +1,5 @@ import type { Meta, StoryObj } from '@storybook/react-vite' -import type { Size } from '@/types' +import type { Size } from '@sqlmesh-common/types' import { Button, type ButtonVariant } from './Button' import { fn, expect, userEvent, within } from 'storybook/test' diff --git a/web/common/src/components/Button/Button.tsx b/web/common/src/components/Button/Button.tsx index cc34ce192a..fd9baebdf2 100644 --- a/web/common/src/components/Button/Button.tsx +++ b/web/common/src/components/Button/Button.tsx @@ -2,8 +2,8 @@ import React from 'react' import { Slot } from '@radix-ui/react-slot' import { cva } from 'class-variance-authority' -import { cn } from '@/utils' -import type { Shape, Size } from '@/types' +import { cn } from '@sqlmesh-common/utils' +import type { Shape, Size } from '@sqlmesh-common/types' import './Button.css' diff --git a/web/common/src/components/CopyButton/CopyButton.tsx b/web/common/src/components/CopyButton/CopyButton.tsx index 45aae3d817..1e5ba2580e 100644 --- a/web/common/src/components/CopyButton/CopyButton.tsx +++ b/web/common/src/components/CopyButton/CopyButton.tsx @@ -1,7 +1,10 @@ import React from 'react' -import { Button, type ButtonProps } from '@/components/Button/Button' -import { useCopyClipboard } from '@/hooks/useCopyClipboard' +import { + Button, + type ButtonProps, +} from '@sqlmesh-common/components/Button/Button' +import { useCopyClipboard } from '@sqlmesh-common/hooks/useCopyClipboard' export interface CopyButtonProps extends Omit { text: string @@ -36,6 +39,7 @@ export const CopyButton = React.forwardRef( onClick={e => { e.stopPropagation() copyToClipboard(text) + onClick?.(e) }} disabled={disabled || !!isCopied} {...props} diff --git a/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx b/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx index c1e2c66ed0..b92eaa418b 100644 --- a/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx +++ b/web/common/src/components/HorizontalContainer/HorizontalContainer.tsx @@ -1,6 +1,6 @@ import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { ScrollContainer } from '../ScrollContainer/ScrollContainer' export interface HorizontalContainerProps diff --git a/web/common/src/components/Input/Input.css b/web/common/src/components/Input/Input.css new file mode 100644 index 0000000000..2cb6ab9695 --- /dev/null +++ b/web/common/src/components/Input/Input.css @@ -0,0 +1,7 @@ +:where(:root) { + --color-input-background: var(--color-light); + --color-input-background-translucid: var(--color-neutral-5); + --color-input-foreground: var(--color-prose); + --color-input-placeholder: var(--color-neutral-400); + --color-input-border: var(--color-neutral-300); +} diff --git a/web/common/src/components/Input/Input.tsx b/web/common/src/components/Input/Input.tsx index 5c25b0a698..8d5c6fc7e4 100644 --- a/web/common/src/components/Input/Input.tsx +++ b/web/common/src/components/Input/Input.tsx @@ -1,8 +1,10 @@ import * as React from 'react' -import { cn } from '@/utils' -import type { Size } from '@/types' +import { cn } from '@sqlmesh-common/utils' +import type { Size } from '@sqlmesh-common/types' import { cva } from 'class-variance-authority' +import './Input.css' + export interface InputProps extends React.ComponentProps<'input'> { inputSize?: Size } @@ -15,9 +17,9 @@ export const Input = React.forwardRef( className={cn( inputVariants({ size: inputSize }), 'border items-center border-input-border bg-input-background text-input-foreground transition-colors placeholder:text-input-placeholder', - 'file:border-0 file:h-fit file:bg-background-lucid file:rounded-sm file:flex-col file:mt-0.5', + 'file:border-0 file:h-fit file:bg-background-translucid file:rounded-sm file:flex-col file:mt-0.5', type === 'file' && - 'bg-input-background-lucid border-[transparent] pl-1', + 'bg-input-background-translucid border-[transparent] pl-1', className, )} ref={ref} diff --git a/web/common/src/components/Lineage/Lineage.css b/web/common/src/components/Lineage/Lineage.css new file mode 100644 index 0000000000..7855ced10a --- /dev/null +++ b/web/common/src/components/Lineage/Lineage.css @@ -0,0 +1,3 @@ +.react-flow__node { + height: auto !important; +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts b/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts new file mode 100644 index 0000000000..4dd6ca93ef --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/ColumnLevelLineageContext.ts @@ -0,0 +1,82 @@ +import React from 'react' + +import { type PortId } from '../utils' + +export type ColumnLevelLineageAdjacencyList< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, +> = { + [K in TAdjacencyListKey]: { + [C in TAdjacencyListColumnKey]: { + source?: string | null + expression?: string | null + models: Record + } + } +} + +export type ColumnLevelLineageContextValue< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +> = { + adjacencyListColumnLevel: TColumnLevelLineageAdjacencyList + selectedColumns: Set + columnLevelLineage: Map + setColumnLevelLineage: React.Dispatch< + React.SetStateAction> + > + showColumns: boolean + setShowColumns: React.Dispatch> + fetchingColumns: Set + setFetchingColumns: React.Dispatch>> +} + +export function getColumnLevelLineageContextInitial< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +>() { + return { + adjacencyListColumnLevel: {} as TColumnLevelLineageAdjacencyList, + columnLevelLineage: new Map(), + setColumnLevelLineage: () => {}, + showColumns: false, + setShowColumns: () => {}, + selectedColumns: new Set(), + fetchingColumns: new Set(), + setFetchingColumns: () => {}, + } as const +} + +export type ColumnLevelLineageContextHook< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +> = () => ColumnLevelLineageContextValue< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnID, + TColumnLevelLineageAdjacencyList +> diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css new file mode 100644 index 0000000000..8da848c684 --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.css @@ -0,0 +1,31 @@ +:where(:root) { + --color-lineage-model-column-badge-background: var( + --color-lineage-node-badge-background + ); + --color-lineage-model-column-badge-foreground: var( + --color-lineage-node-badge-foreground + ); + + --color-lineage-model-column-metadata-label: var(--color-metadata-label); + --color-lineage-model-column-metadata-value: var(--color-metadata-value); + + --color-lineage-model-column-information-info: var(--color-information-info); +} + +.FactoryColumn__Metadata { + --color-metadata-label: var(--color-lineage-model-column-metadata-label); + --color-metadata-value: var(--color-lineage-model-column-metadata-value); +} + +.FactoryColumn__NodeBadge { + --color-lineage-node-badge-background: var( + --color-lineage-model-column-badge-background + ); + --color-lineage-node-badge-foreground: var( + --color-lineage-model-column-badge-foreground + ); +} + +.FactoryColumn__Information { + --color-typography-info: var(--color-lineage-model-column-information-info); +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx new file mode 100644 index 0000000000..3b83288615 --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/FactoryColumn.tsx @@ -0,0 +1,292 @@ +import { + AlertCircle, + CircleOff, + FileCode, + FileMinus, + Workflow, +} from 'lucide-react' +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' +import { NodeBadge } from '../node/NodeBadge' +import { NodePort } from '../node/NodePort' +import { type NodeId, type PortHandleId, type PortId } from '../utils' +import { + type ColumnLevelLineageAdjacencyList, + type ColumnLevelLineageContextHook, +} from './ColumnLevelLineageContext' +import { Tooltip } from '@sqlmesh-common/components/Tooltip/Tooltip' +import { Metadata } from '@sqlmesh-common/components/Metadata/Metadata' +import { HorizontalContainer } from '@sqlmesh-common/components/HorizontalContainer/HorizontalContainer' +import { Information } from '@sqlmesh-common/components/Typography/Information' +import { LoadingContainer } from '@sqlmesh-common/components/LoadingContainer/LoadingContainer' + +import './FactoryColumn.css' + +export function FactoryColumn< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TNodeID extends string = NodeId, + TColumnID extends string = PortId, + TLeftPortHandleId extends string = PortHandleId, + TRightPortHandleId extends string = PortHandleId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +>( + useLineage: ColumnLevelLineageContextHook< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnID, + TColumnLevelLineageAdjacencyList + >, +) { + return React.memo(function FactoryColumn({ + id, + nodeId, + modelName, + name, + description, + type, + className, + data, + isFetching = false, + error, + renderError, + renderExpression, + renderSource, + onClick, + onCancel, + }: { + id: TColumnID + nodeId: TNodeID + modelName: TAdjacencyListKey + name: TAdjacencyListColumnKey + type: string + description?: string | null + className?: string + data?: TColumnLevelLineageAdjacencyList + isFetching?: boolean + error?: Error | null + renderError?: (error: Error) => React.ReactNode + renderExpression?: (expression: string) => React.ReactNode + renderSource?: ( + source: string, + expression?: string | null, + ) => React.ReactNode + onClick?: () => void + onCancel?: () => void + }) { + const { selectedColumns, adjacencyListColumnLevel, columnLevelLineage } = + useLineage() + + const column = adjacencyListColumnLevel?.[modelName]?.[name] + const currentColumnLineage = columnLevelLineage.get(id) + const isSelectedColumn = selectedColumns.has(id) + const isTriggeredColumn = + column != null && currentColumnLineage != null && isSelectedColumn + + // Column that has no upstream connections + const isSourceColumn = React.useMemo(() => { + if (data == null) return false + + const models = Object.values(data) + + console.assert( + data[modelName], + `Model: ${modelName} not found in column lineage data`, + ) + console.assert( + data[modelName][name], + `Column: ${name} for model: ${modelName} not found in column lineage data`, + ) + + const columns = Object.values(data[modelName]) + + if (models.length > 1 || columns.length > 1) return false + + const columnModels = data[modelName][name].models + + return Object.keys(columnModels).length === 0 + }, [data, modelName, name]) + + const isDisabledColumn = isSourceColumn && !isSelectedColumn + + function renderColumnStates() { + if (isFetching) return <> + if (error && renderError) + return ( + + } + side="left" + sideOffset={20} + delayDuration={0} + className="bg-lineage-model-column-error-background p-0" + > + {renderError(error)} + + ) + + return ( + <> + {isSourceColumn ? ( + + ) : ( + + )} + {column?.source && renderSource && ( + + } + side="left" + sideOffset={20} + className="p-0 min-w-[30rem] max-w-xl bg-lineage-model-column-source-background" + delayDuration={0} + > + {renderSource(column.source, column.expression)} + + )} + {column?.expression && renderExpression && ( + + } + side="left" + sideOffset={20} + className="p-0 min-w-[30rem] max-w-xl bg-lineage-model-column-expression-background" + delayDuration={0} + > + {renderExpression(column.expression)} + + )} + + ) + } + + function renderColumn() { + return ( + + + {renderColumnStates()} + {description ? ( + + + + ) : ( + + )} + + + } + value={ + {type} + } + className={cn( + 'FactoryColumn__Metadata relative overflow-visible', + isDisabledColumn && 'cursor-not-allowed', + className, + )} + /> + ) + } + + function handleSelectColumn(e: React.MouseEvent) { + e.stopPropagation() + e.preventDefault() + + if (isFetching) { + onCancel?.() + } else if ((isSelectedColumn || isSourceColumn) && !isTriggeredColumn) { + return + } else { + onClick?.() + } + } + + return isSelectedColumn ? ( + + id={id} + nodeId={nodeId} + className={cn( + 'border-t border-lineage-divider first:border-t-0', + isTriggeredColumn && 'bg-lineage-model-column-active-background', + )} + > + {renderColumn()} + + ) : ( + renderColumn() + ) + }) +} + +function DisplayColumName({ + name, + className, +}: { + name: string + className?: string +}) { + return ( + + {name} + + ) +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/help.ts b/web/common/src/components/Lineage/LineageColumnLevel/help.ts new file mode 100644 index 0000000000..0966308a72 --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/help.ts @@ -0,0 +1,253 @@ +import { + toEdgeID, + toNodeID, + toPortID, + type LineageEdge, + type LineageEdgeData, + type EdgeId, + type NodeId, + type PortId, + type TransformEdgeFn, +} from '../utils' +import { type ColumnLevelLineageAdjacencyList } from './ColumnLevelLineageContext' + +export const MAX_COLUMNS_TO_DISPLAY = 5 + +export function getAdjacencyListKeysFromColumnLineage< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +>(columnLineage: TColumnLevelLineageAdjacencyList) { + const adjacencyListKeys = new Set() + + const targets = Object.entries(columnLineage) as [ + TAdjacencyListKey, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey], + ][] + + for (const [sourceModelName, targetColumns] of targets) { + adjacencyListKeys.add(sourceModelName) + + const targetConnections = Object.entries(targetColumns) as [ + TAdjacencyListColumnKey, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey][TAdjacencyListColumnKey], + ][] + + for (const [, { models: sourceModels }] of targetConnections) { + for (const targetModelName of Object.keys( + sourceModels, + ) as TAdjacencyListKey[]) { + adjacencyListKeys.add(targetModelName) + } + } + } + + return Array.from(adjacencyListKeys) +} + +export function getEdgesFromColumnLineage< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +>({ + columnLineage, + transformEdge, +}: { + columnLineage: TColumnLevelLineageAdjacencyList + transformEdge: TransformEdgeFn< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > +}) { + const edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] = [] + const modelLevelEdgeIDs = new Map() + const targets = Object.entries(columnLineage || {}) as [ + TAdjacencyListKey, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey], + ][] + + for (const [targetModelName, targetColumns] of targets) { + const targetConnections = Object.entries(targetColumns) as [ + TAdjacencyListColumnKey, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey][TAdjacencyListColumnKey], + ][] + + const targetNodeId = toNodeID(targetModelName) + + for (const [ + targetColumnName, + { models: sourceModels }, + ] of targetConnections) { + const sources = Object.entries(sourceModels) as [ + TAdjacencyListKey, + TAdjacencyListColumnKey[], + ][] + + for (const [sourceModelName, sourceColumns] of sources) { + const sourceNodeId = toNodeID(sourceModelName) + + modelLevelEdgeIDs.set( + toEdgeID(sourceModelName, targetModelName), + [sourceNodeId, targetNodeId], + ) + + sourceColumns.forEach(sourceColumnName => { + const edgeId = toEdgeID( + sourceModelName, + sourceColumnName, + targetModelName, + targetColumnName, + ) + const sourceColumnId = toPortID( + sourceModelName, + sourceColumnName, + ) + const targetColumnId = toPortID( + targetModelName, + targetColumnName, + ) + + edges.push( + transformEdge( + 'port', + edgeId, + sourceNodeId, + targetNodeId, + sourceColumnId, + targetColumnId, + ), + ) + }) + } + } + } + + Array.from(modelLevelEdgeIDs.entries()).forEach( + ([edgeId, [sourceNodeId, targetNodeId]]) => { + edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) + }, + ) + return edges +} + +export function getConnectedColumnsIDs< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +>(adjacencyList: TColumnLevelLineageAdjacencyList) { + const connectedColumns = new Set() + const targets = Object.entries(adjacencyList) as [ + TAdjacencyListKey, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey], + ][] + + for (const [sourceModelName, targetColumns] of targets) { + const targetConnections = Object.entries(targetColumns) as [ + TAdjacencyListColumnKey, + TColumnLevelLineageAdjacencyList[TAdjacencyListKey][TAdjacencyListColumnKey], + ][] + + for (const [ + sourceColumnName, + { models: sourceModels }, + ] of targetConnections) { + connectedColumns.add(toPortID(sourceModelName, sourceColumnName)) + + const sources = Object.entries(sourceModels) as [ + TAdjacencyListKey, + TAdjacencyListColumnKey[], + ][] + + for (const [targetModelName, sourceColumns] of sources) { + sourceColumns.forEach(sourceColumnName => { + connectedColumns.add(toPortID(targetModelName, sourceColumnName)) + }) + } + } + } + return connectedColumns +} + +export function calculateNodeColumnsCount(columnsCount: number = 0) { + return Math.min(columnsCount, MAX_COLUMNS_TO_DISPLAY) +} + +export function calculateSelectedColumnsHeight( + selectedColumnsCount: number = 0, +) { + const selectedColumnsTopSeparatorHeight = 1 + const selectedColumnSeparatorHeight = 1 + const selectedColumnHeight = 24 // tailwind h-6 + const selectedColumnsSeparators = + selectedColumnsCount > 1 ? selectedColumnsCount - 1 : 0 + + return [ + selectedColumnsCount > 0 ? selectedColumnsTopSeparatorHeight : 0, + selectedColumnsCount * selectedColumnHeight, + selectedColumnsCount > 0 + ? selectedColumnsSeparators * selectedColumnSeparatorHeight + : 0, + ].reduce((acc, h) => acc + h, 0) +} + +export function calculateColumnsHeight({ + columnsCount = 0, + hasColumnsFilter = true, +}: { + columnsCount: number + hasColumnsFilter?: boolean +}) { + const hasColumns = columnsCount > 0 + const columnHeight = 24 // tailwind h-6 + const columnsTopSeparator = 1 + const columnSeparator = 0 + const columnsContainerPadding = 4 + const columnsPadding = 0 + const columnsFilterHeight = hasColumnsFilter && hasColumns ? columnHeight : 0 + const columnsSeparators = columnsCount > 1 ? columnsCount - 1 : 0 + + return [ + hasColumns ? columnsSeparators * columnSeparator : 0, + columnsCount * columnHeight, + hasColumns ? columnsPadding * 2 : 0, + hasColumns ? columnsContainerPadding * 2 : 0, + hasColumns ? columnsFilterHeight : 0, + hasColumns ? columnsTopSeparator : 0, + ].reduce((acc, height) => acc + height, 0) +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts b/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts new file mode 100644 index 0000000000..53032c2c12 --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/useColumnLevelLineage.ts @@ -0,0 +1,52 @@ +import merge from 'deepmerge' +import React from 'react' + +import { type PortId } from '../utils' +import { type ColumnLevelLineageAdjacencyList } from './ColumnLevelLineageContext' +import { + getAdjacencyListKeysFromColumnLineage, + getConnectedColumnsIDs, +} from './help' + +export function useColumnLevelLineage< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumnID extends string = PortId, + TColumnLevelLineageAdjacencyList extends ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + > = ColumnLevelLineageAdjacencyList< + TAdjacencyListKey, + TAdjacencyListColumnKey + >, +>(columnLevelLineage: Map) { + const adjacencyListColumnLevel = React.useMemo(() => { + return merge.all(Array.from(columnLevelLineage.values()), { + arrayMerge: (dest, source) => Array.from(new Set([...dest, ...source])), + }) as TColumnLevelLineageAdjacencyList + }, [columnLevelLineage]) + + const selectedColumns = React.useMemo(() => { + return getConnectedColumnsIDs< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnID + >(adjacencyListColumnLevel) + }, [adjacencyListColumnLevel]) + + const adjacencyListKeysColumnLevel = React.useMemo(() => { + return adjacencyListColumnLevel != null + ? getAdjacencyListKeysFromColumnLineage< + TAdjacencyListKey, + TAdjacencyListColumnKey, + TColumnLevelLineageAdjacencyList + >(adjacencyListColumnLevel) + : [] + }, [adjacencyListColumnLevel]) + + return { + adjacencyListColumnLevel, + selectedColumns, + adjacencyListKeysColumnLevel, + } +} diff --git a/web/common/src/components/Lineage/LineageColumnLevel/useColumns.tsx b/web/common/src/components/Lineage/LineageColumnLevel/useColumns.tsx new file mode 100644 index 0000000000..3ed1278a5c --- /dev/null +++ b/web/common/src/components/Lineage/LineageColumnLevel/useColumns.tsx @@ -0,0 +1,58 @@ +import React from 'react' + +import { toPortID } from '../utils' +import { type PortId } from '../utils' + +export interface Column { + data_type: string + description?: string | null +} + +export function useColumns< + TAdjacencyListKey extends string, + TAdjacencyListColumnKey extends string, + TColumn extends Column, + TColumnID extends string = PortId, +>( + selectedPorts: Set, + adjacencyListKey: TAdjacencyListKey, + rawColumns?: Record, +) { + const columnNames = React.useMemo(() => { + return new Set( + Object.keys(rawColumns ?? {}).map(column => + toPortID(adjacencyListKey, column), + ), + ) + }, [rawColumns, adjacencyListKey]) + + const [selectedColumns, columns] = React.useMemo(() => { + const selected = [] + const output = [] + + for (const [column, info] of Object.entries(rawColumns ?? {}) as [ + TAdjacencyListColumnKey, + TColumn, + ][]) { + const columnId = toPortID(adjacencyListKey, column) + const nodeColumn = { + name: column, + ...info, + id: columnId, + } + + if (selectedPorts.has(columnId)) { + selected.push(nodeColumn) + } else { + output.push(nodeColumn) + } + } + return [selected, output] + }, [rawColumns, adjacencyListKey, selectedPorts]) + + return { + columns, + columnNames, + selectedColumns, + } +} diff --git a/web/common/src/components/Lineage/LineageContext.ts b/web/common/src/components/Lineage/LineageContext.ts new file mode 100644 index 0000000000..10ed0a1e1a --- /dev/null +++ b/web/common/src/components/Lineage/LineageContext.ts @@ -0,0 +1,149 @@ +import React from 'react' + +import { + type EdgeId, + type LineageEdge, + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, + ZOOM_THRESHOLD, +} from './utils' + +export interface LineageContextValue< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +> { + // Node selection + showOnlySelectedNodes: boolean + setShowOnlySelectedNodes: React.Dispatch> + selectedNodes: Set + setSelectedNodes: React.Dispatch>> + selectedEdges: Set + setSelectedEdges: React.Dispatch>> + selectedNodeId: TNodeID | null + setSelectedNodeId: React.Dispatch> + + // Layout + zoom: number + setZoom: React.Dispatch> + + // Nodes and Edges + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] + setEdges: React.Dispatch< + React.SetStateAction< + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] + > + > + nodes: LineageNode[] + nodesMap: LineageNodesMap + setNodesMap: React.Dispatch< + React.SetStateAction> + > + currentNodeId: TNodeID | null + selectedNode: LineageNode | null +} + +export function getInitial< + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, +>() { + return { + showOnlySelectedNodes: false, + setShowOnlySelectedNodes: () => {}, + selectedNodes: new Set(), + setSelectedNodes: () => {}, + selectedEdges: new Set(), + setSelectedEdges: () => {}, + setSelectedNodeId: () => {}, + zoom: ZOOM_THRESHOLD, + setZoom: () => {}, + edges: [], + setEdges: () => {}, + nodes: [], + nodesMap: {}, + setNodesMap: () => {}, + selectedNodeId: null, + selectedNode: null, + currentNodeId: null, + } +} + +export type LineageContextHook< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +> = () => LineageContextValue< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID +> + +export function createLineageContext< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, + TLineageContextValue extends LineageContextValue< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > = LineageContextValue< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >, +>(initial: TLineageContextValue) { + const LineageContext = React.createContext(initial) + + return { + Provider: LineageContext.Provider, + useLineage: () => React.useContext(LineageContext), + } +} diff --git a/web/common/src/components/Lineage/LineageControlButton.tsx b/web/common/src/components/Lineage/LineageControlButton.tsx new file mode 100644 index 0000000000..fff79e822f --- /dev/null +++ b/web/common/src/components/Lineage/LineageControlButton.tsx @@ -0,0 +1,44 @@ +import { ControlButton } from '@xyflow/react' + +import { cn } from '@sqlmesh-common/utils' +import { Tooltip } from '../Tooltip/Tooltip' + +export function LineageControlButton({ + text, + onClick, + disabled = false, + className, + children, +}: { + text: string + children: React.ReactNode + onClick?: (e: React.MouseEvent) => void + disabled?: boolean + className?: string +}) { + return ( + + + {children} + + + } + > + {text} + + ) +} diff --git a/web/common/src/components/Lineage/LineageControlIcon.tsx b/web/common/src/components/Lineage/LineageControlIcon.tsx new file mode 100644 index 0000000000..a16f611a63 --- /dev/null +++ b/web/common/src/components/Lineage/LineageControlIcon.tsx @@ -0,0 +1,43 @@ +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' + +export interface LineageControlIconProps extends React.SVGProps { + Icon: React.ElementType + size?: number + className?: string +} + +export const LineageControlIcon = React.forwardRef< + HTMLSpanElement, + LineageControlIconProps +>( + ( + { + Icon, + size = 16, + className, + ...props + }: { + Icon: React.ElementType + size?: number + className?: string + }, + ref, + ) => { + return ( + + ) + }, +) + +LineageControlIcon.displayName = 'LineageControlIcon' diff --git a/web/common/src/components/Lineage/LineageLayout.tsx b/web/common/src/components/Lineage/LineageLayout.tsx new file mode 100644 index 0000000000..32cb989d31 --- /dev/null +++ b/web/common/src/components/Lineage/LineageLayout.tsx @@ -0,0 +1,92 @@ +import { + type EdgeTypes, + type NodeTypes, + ReactFlowProvider, + type SetCenter, +} from '@xyflow/react' + +import React from 'react' + +import { type LineageContextHook } from './LineageContext' + +import { + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + type NodeId, + type EdgeId, + type PortId, +} from './utils' + +import { LineageLayoutBase } from './LineageLayoutBase' +import { LineageLayoutContainer } from './LineageLayoutContainer' + +export function LineageLayout< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>({ + nodeTypes, + edgeTypes, + className, + controls, + isBuildingLayout, + useLineage, + onNodeClick, + onNodeDoubleClick, + showControlOnlySelectedNodes, + showControlZoomToSelectedNode, +}: { + useLineage: LineageContextHook< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > + isBuildingLayout?: boolean + nodeTypes?: NodeTypes + edgeTypes?: EdgeTypes + className?: string + showControlOnlySelectedNodes?: boolean + showControlZoomToSelectedNode?: boolean + controls?: + | React.ReactNode + | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) + onNodeClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void + onNodeDoubleClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void +}) { + return ( + + + + + + ) +} diff --git a/web/common/src/components/Lineage/LineageLayoutBase.tsx b/web/common/src/components/Lineage/LineageLayoutBase.tsx new file mode 100644 index 0000000000..612bdcbb15 --- /dev/null +++ b/web/common/src/components/Lineage/LineageLayoutBase.tsx @@ -0,0 +1,308 @@ +import { + Background, + BackgroundVariant, + Controls, + type EdgeTypes, + type NodeTypes, + ReactFlow, + type SetCenter, + getConnectedEdges, + getIncomers, + getOutgoers, + useReactFlow, + useViewport, +} from '@xyflow/react' + +import '@xyflow/react/dist/style.css' +import './Lineage.css' + +import { CircuitBoard, LocateFixed, RotateCcw } from 'lucide-react' +import React from 'react' + +import { type LineageContextHook } from './LineageContext' +import { LineageControlButton } from './LineageControlButton' +import { LineageControlIcon } from './LineageControlIcon' +import { + DEFAULT_ZOOM, + type LineageEdge, + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + MAX_ZOOM, + MIN_ZOOM, + NODES_TRESHOLD, + NODES_TRESHOLD_ZOOM, + type NodeId, + type EdgeId, + type PortId, + ZOOM_THRESHOLD, +} from './utils' + +import '@xyflow/react/dist/style.css' +import './Lineage.css' +import { cn } from '@sqlmesh-common/utils' + +export function LineageLayoutBase< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TNodeID extends string = NodeId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>({ + nodeTypes, + edgeTypes, + className, + controls, + useLineage, + onNodeClick, + onNodeDoubleClick, + showControlOnlySelectedNodes = true, + showControlZoomToSelectedNode = true, +}: { + useLineage: LineageContextHook< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > + nodeTypes?: NodeTypes + edgeTypes?: EdgeTypes + className?: string + showControlOnlySelectedNodes?: boolean + showControlZoomToSelectedNode?: boolean + controls?: + | React.ReactNode + | (({ setCenter }: { setCenter: SetCenter }) => React.ReactNode) + onNodeClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void + onNodeDoubleClick?: ( + event: React.MouseEvent, + node: LineageNode, + ) => void +}) { + const { zoom: viewportZoom } = useViewport() + const { setCenter } = useReactFlow() + + const { + zoom, + nodes, + edges, + selectedNode, + showOnlySelectedNodes, + selectedNodeId, + setZoom, + setShowOnlySelectedNodes, + setSelectedNodes, + setSelectedEdges, + } = useLineage() + + const updateZoom = React.useMemo(() => debounce(setZoom, 200), [setZoom]) + + const zoomToSelectedNode = React.useCallback( + (zoom: number = DEFAULT_ZOOM) => { + if (selectedNode) { + setCenter(selectedNode.position.x, selectedNode.position.y, { + zoom, + duration: 0, + }) + } + }, + [selectedNode?.position.x, selectedNode?.position.y], + ) + + const getAllIncomers = React.useCallback( + ( + node: LineageNode, + visited: Set = new Set(), + ): LineageNode[] => { + if (visited.has(node.id)) return [] + + visited.add(node.id) + + return Array.from( + new Set>([ + node, + ...getIncomers(node, nodes, edges) + .map(n => getAllIncomers(n, visited)) + .flat(), + ]), + ) + }, + [nodes, edges], + ) + + const getAllOutgoers = React.useCallback( + ( + node: LineageNode, + visited: Set = new Set(), + ): LineageNode[] => { + if (visited.has(node.id)) return [] + + visited.add(node.id) + + return Array.from( + new Set>([ + node, + ...getOutgoers(node, nodes, edges) + .map(n => getAllOutgoers(n, visited)) + .flat(), + ]), + ) + }, + [nodes, edges], + ) + + const connectedNodes = React.useMemo(() => { + if (selectedNode == null) return [] + + const all = [ + ...getAllIncomers(selectedNode), + ...getAllOutgoers(selectedNode), + ] + + return all + }, [selectedNode, getAllIncomers, getAllOutgoers]) + + const connectedEdges = React.useMemo(() => { + return getConnectedEdges< + LineageNode, + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > + >(connectedNodes, edges) + }, [connectedNodes, edges]) + + React.useEffect(() => { + if (selectedNodeId == null) { + setShowOnlySelectedNodes(false) + setSelectedNodes(new Set()) + setSelectedEdges(new Set()) + } + }, [selectedNodeId, selectedNode]) + + React.useLayoutEffect(() => { + const selectedNodes = new Set(connectedNodes.map(node => node.id)) + const selectedEdges = new Set( + connectedEdges.reduce((acc, edge) => { + if ( + [edge.source, edge.target].every(id => + selectedNodes.has(id as unknown as TNodeID), + ) + ) { + edge.zIndex = 2 + acc.add(edge.id) + } else { + edge.zIndex = 1 + } + return acc + }, new Set()), + ) + + setSelectedNodes(selectedNodes) + setSelectedEdges(selectedEdges) + }, [connectedNodes, connectedEdges]) + + React.useLayoutEffect(() => { + zoomToSelectedNode() + }, [zoomToSelectedNode]) + + React.useEffect(() => { + updateZoom(viewportZoom) + }, [updateZoom, viewportZoom]) + + return ( + , + LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > + > + className={cn('shrink-0', className)} + nodes={nodes} + edges={edges} + nodeTypes={nodeTypes} + edgeTypes={edgeTypes} + zoomOnDoubleClick={false} + panOnScroll={true} + zoomOnScroll={true} + minZoom={nodes.length > NODES_TRESHOLD ? NODES_TRESHOLD_ZOOM : MIN_ZOOM} + maxZoom={MAX_ZOOM} + fitView={false} + onlyRenderVisibleElements + onNodeClick={onNodeClick} + onNodeDoubleClick={onNodeDoubleClick} + > + {zoom > ZOOM_THRESHOLD && ( + + )} + + {selectedNodeId && ( + <> + {showControlOnlySelectedNodes && ( + setShowOnlySelectedNodes(!showOnlySelectedNodes)} + > + + + )} + {showControlZoomToSelectedNode && ( + zoomToSelectedNode(DEFAULT_ZOOM)} + > + + + )} + + )} + {controls && typeof controls === 'function' + ? controls({ setCenter }) + : controls} + + + ) +} + +function debounce(func: T, wait: number) { + let timeout: NodeJS.Timeout + return (...args: unknown[]) => { + clearTimeout(timeout) + timeout = setTimeout(() => func(...args), wait) + } +} diff --git a/web/common/src/components/Lineage/LineageLayoutContainer.tsx b/web/common/src/components/Lineage/LineageLayoutContainer.tsx new file mode 100644 index 0000000000..2ba0e00d56 --- /dev/null +++ b/web/common/src/components/Lineage/LineageLayoutContainer.tsx @@ -0,0 +1,43 @@ +import { cn } from '@sqlmesh-common/utils' + +import React from 'react' + +import { VerticalContainer } from '../VerticalContainer/VerticalContainer' +import { MessageContainer } from '../MessageContainer/MessageContainer' +import { LoadingContainer } from '../LoadingContainer/LoadingContainer' + +export function LineageLayoutContainer({ + isBuildingLayout, + loadingMessage = 'Building layout...', + className, + children, +}: { + isBuildingLayout?: boolean + loadingMessage?: string + className?: string + children: React.ReactNode +}) { + return ( + + {isBuildingLayout && ( + + + {loadingMessage} + + + )} + {children} + + ) +} diff --git a/web/common/src/components/Lineage/edge/EdgeWithGradient.tsx b/web/common/src/components/Lineage/edge/EdgeWithGradient.tsx new file mode 100644 index 0000000000..2a1da5eed1 --- /dev/null +++ b/web/common/src/components/Lineage/edge/EdgeWithGradient.tsx @@ -0,0 +1,114 @@ +import { + type Edge, + type EdgeProps, + getBezierPath, + getSmoothStepPath, + getStraightPath, +} from '@xyflow/react' +import React, { useId } from 'react' + +import { type EdgeId, type LineageEdgeData, type PathType } from '../utils' + +export interface EdgeData extends LineageEdgeData { + startColor?: string + endColor?: string + strokeWidth?: number + pathType?: PathType +} + +export const EdgeWithGradient = React.memo( + ({ + id, + sourceX, + sourceY, + targetX, + targetY, + sourcePosition, + targetPosition, + style, + data, + markerEnd, + }: EdgeProps>) => { + const edgeId = id as EdgeId + + const gradientId = useId() + const startColor = data?.startColor || 'var(--color-lineage-edge)' + const endColor = data?.endColor || 'var(--color-lineage-edge)' + const pathType = data?.pathType || 'bezier' + const strokeWidth = data?.strokeWidth || 4 + const edgePath = getEdgePath(pathType) + + function getEdgePath(pathType: PathType) { + return { + straight: getStraightPath({ + sourceX, + sourceY, + targetX, + targetY, + }), + smoothstep: getSmoothStepPath({ + sourceX, + sourceY, + sourcePosition, + targetX, + targetY, + targetPosition, + borderRadius: 10, + }), + bezier: getBezierPath({ + sourceX, + sourceY, + sourcePosition, + targetX, + targetY, + targetPosition, + }), + step: getSmoothStepPath({ + sourceX, + sourceY, + sourcePosition, + targetX, + targetY, + targetPosition, + borderRadius: 0, + }), + }[pathType] + } + + return ( + <> + + + + + + + + + ) + }, +) diff --git a/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx b/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx new file mode 100644 index 0000000000..aee8790b35 --- /dev/null +++ b/web/common/src/components/Lineage/edge/FactoryEdgeWithGradient.tsx @@ -0,0 +1,64 @@ +import React from 'react' + +import { type LineageContextHook } from '../LineageContext' +import { + type EdgeId, + type LineageNodeData, + type NodeId, + type PortId, +} from '../utils' +import { EdgeWithGradient, type EdgeData } from './EdgeWithGradient' +import type { Edge, EdgeProps } from '@xyflow/react' + +export function FactoryEdgeWithGradient< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends EdgeData = EdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TSourceID extends string = TNodeID, + TTargetID extends string = TNodeID, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>( + useLineage: LineageContextHook< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >, +) { + return React.memo(({ data, id, ...props }: EdgeProps>) => { + const edgeId = id as TEdgeID + + const { selectedEdges } = useLineage() + + const isActive = selectedEdges.has(edgeId) + + let startColor = 'var(--color-lineage-edge)' + let endColor = 'var(--color-lineage-edge)' + + if (isActive && data?.startColor) { + startColor = data?.startColor + } + + if (isActive && data?.endColor) { + endColor = data?.endColor + } + + return ( + + ) + }) +} diff --git a/web/common/src/components/Lineage/help.test.ts b/web/common/src/components/Lineage/help.test.ts new file mode 100644 index 0000000000..14e921cd49 --- /dev/null +++ b/web/common/src/components/Lineage/help.test.ts @@ -0,0 +1,626 @@ +import { describe, expect, test } from 'vitest' +import { Position } from '@xyflow/react' + +import { + getOnlySelectedNodes, + getTransformedNodes, + getTransformedModelEdgesSourceTargets, + getTransformedModelEdgesTargetSources, + createNode, + calculateNodeBaseHeight, + calculateNodeDetailsHeight, + createEdge, +} from './help' +import type { + LineageNode, + LineageNodesMap, + LineageNodeData, + LineageDetails, + LineageAdjacencyList, + NodeId, + EdgeId, + PortId, +} from './utils' +import { toNodeID, toEdgeID } from './utils' + +describe('Lineage Help Functions', () => { + describe('getOnlySelectedNodes', () => { + test('should return only selected nodes from the node map', () => { + const nodesMap = { + node1: { + id: 'node1' as NodeId, + position: { x: 0, y: 0 }, + data: {}, + }, + node2: { + id: 'node2' as NodeId, + position: { x: 100, y: 100 }, + data: {}, + }, + node3: { + id: 'node3' as NodeId, + position: { x: 200, y: 200 }, + data: {}, + }, + } + + const selectedNodes = new Set([ + 'node1' as NodeId, + 'node3' as NodeId, + ]) + const result = getOnlySelectedNodes(nodesMap, selectedNodes) + + expect(Object.keys(result)).toHaveLength(2) + expect(result).toHaveProperty('node1') + expect(result).toHaveProperty('node3') + expect(result).not.toHaveProperty('node2') + }) + + test('should return empty object when no nodes are selected', () => { + const nodesMap = { + node1: { + id: 'node1' as NodeId, + position: { x: 0, y: 0 }, + data: {}, + }, + } + + const selectedNodes = new Set() + const result = getOnlySelectedNodes(nodesMap, selectedNodes) + + expect(Object.keys(result)).toHaveLength(0) + }) + + test('should handle empty node map', () => { + const nodesMap: LineageNodesMap = {} + const selectedNodes = new Set(['node1' as NodeId]) + const result = getOnlySelectedNodes(nodesMap, selectedNodes) + + expect(Object.keys(result)).toHaveLength(0) + }) + }) + + describe('getTransformedNodes', () => { + test('should transform nodes using the provided transform function', () => { + const adjacencyListKeys = ['model1', 'model2'] + const lineageDetails: LineageDetails< + string, + { name: string; type: string } + > = { + model1: { name: 'Model 1', type: 'table' }, + model2: { name: 'Model 2', type: 'view' }, + } + + const transformNode = ( + nodeId: NodeId, + data: { name: string; type: string }, + ) => + ({ + id: nodeId, + position: { x: 0, y: 0 }, + data: { label: data.name, nodeType: data.type }, + }) as LineageNode<{ label: string; nodeType: string }> + + const result = getTransformedNodes( + adjacencyListKeys, + lineageDetails, + transformNode, + ) + + const encodedModel1 = toNodeID('model1') + const encodedModel2 = toNodeID('model2') + + expect(Object.keys(result)).toHaveLength(2) + expect(result[encodedModel1]).toEqual({ + id: encodedModel1, + position: { x: 0, y: 0 }, + data: { label: 'Model 1', nodeType: 'table' }, + }) + expect(result[encodedModel2]).toEqual({ + id: encodedModel2, + position: { x: 0, y: 0 }, + data: { label: 'Model 2', nodeType: 'view' }, + }) + }) + + test('should handle empty adjacency list', () => { + const adjacencyListKeys: string[] = [] + const lineageDetails: LineageDetails = {} + const transformNode = (nodeId: NodeId, data: { name: string }) => + ({ + id: nodeId, + position: { x: 0, y: 0 }, + data: { label: data.name }, + }) as LineageNode<{ label: string }> + + const result = getTransformedNodes( + adjacencyListKeys, + lineageDetails, + transformNode, + ) + + expect(Object.keys(result)).toHaveLength(0) + }) + }) + + describe('getTransformedModelEdgesSourceTargets', () => { + test('should transform edges from source to targets using the provided transform function', () => { + const adjacencyListKeys = ['model1', 'model2', 'model3'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: ['model2', 'model3'], + model2: ['model3'], + model3: [], + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(3) + + const model1Id = toNodeID('model1') + const model2Id = toNodeID('model2') + const model3Id = toNodeID('model3') + + expect(result[0]).toEqual({ + id: toEdgeID('model1', 'model2'), + source: model1Id, + target: model2Id, + type: 'edge', + zIndex: 1, + }) + expect(result[1]).toEqual({ + id: toEdgeID('model1', 'model3'), + source: model1Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + expect(result[2]).toEqual({ + id: toEdgeID('model2', 'model3'), + source: model2Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + }) + + test('should skip edges where target is not in adjacency list', () => { + const adjacencyListKeys = ['model1'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: ['model2'], // model2 is not in the adjacency list + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle empty adjacency list', () => { + const adjacencyListKeys: string[] = [] + const lineageAdjacencyList: LineageAdjacencyList = {} + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle nodes with no targets', () => { + const adjacencyListKeys = ['model1', 'model2'] + const lineageAdjacencyList = { + model1: [], + model2: null, + } as unknown as LineageAdjacencyList + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesSourceTargets( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + }) + + describe('getTransformedModelEdgesTargetSources', () => { + test('should transform edges from target to sources using the provided transform function', () => { + const adjacencyListKeys = ['model1', 'model2', 'model3'] + const lineageAdjacencyList: LineageAdjacencyList = { + model1: [], + model2: ['model1'], + model3: ['model1', 'model2'], + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(3) + + const model1Id = toNodeID('model1') + const model2Id = toNodeID('model2') + const model3Id = toNodeID('model3') + + expect(result[0]).toEqual({ + id: toEdgeID('model1', 'model2'), + source: model1Id, + target: model2Id, + type: 'edge', + zIndex: 1, + }) + expect(result[1]).toEqual({ + id: toEdgeID('model1', 'model3'), + source: model1Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + expect(result[2]).toEqual({ + id: toEdgeID('model2', 'model3'), + source: model2Id, + target: model3Id, + type: 'edge', + zIndex: 1, + }) + }) + + test('should skip edges where source is not in adjacency list', () => { + const adjacencyListKeys = ['model2'] + const lineageAdjacencyList: LineageAdjacencyList = { + model2: ['model1'], // model1 is not in the adjacency list + } + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle empty adjacency list', () => { + const adjacencyListKeys: string[] = [] + const lineageAdjacencyList: LineageAdjacencyList = {} + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + + test('should handle nodes with no sources', () => { + const adjacencyListKeys = ['model1', 'model2'] + const lineageAdjacencyList = { + model1: [], + model2: null, + } as unknown as LineageAdjacencyList + + const transformEdge = ( + type: string, + edgeId: EdgeId, + sourceId: NodeId, + targetId: NodeId, + ) => ({ + id: edgeId, + source: sourceId, + target: targetId, + type, + zIndex: 1, + }) + + const result = getTransformedModelEdgesTargetSources( + adjacencyListKeys, + lineageAdjacencyList, + transformEdge, + ) + + expect(result).toHaveLength(0) + }) + }) + + describe('createNode', () => { + test('should create a node with provided data', () => { + const nodeId = 'test-node' as NodeId + const data = { label: 'Test Node', value: 42 } + const node = createNode('custom', nodeId, data) + + expect(node).toEqual({ + id: nodeId, + sourcePosition: Position.Right, + targetPosition: Position.Left, + width: 300, // DEFAULT_NODE_WIDTH + height: 32, // DEFAULT_NODE_HEIGHT + data, + type: 'custom', + hidden: false, + position: { x: 0, y: 0 }, + zIndex: 10, + }) + }) + + test('should create a node with minimal data', () => { + const nodeId = 'minimal' as NodeId + const data = {} + const node = createNode('default', nodeId, data) + + expect(node.id).toBe(nodeId) + expect(node.type).toBe('default') + expect(node.data).toEqual({}) + expect(node.hidden).toBe(false) + }) + }) + + describe('calculateNodeBaseHeight', () => { + test('should calculate base height with no additional components', () => { + const height = calculateNodeBaseHeight({}) + // border (2*2) + base (28) = 32 + expect(height).toBe(32) + }) + + test('should include footer height when specified', () => { + const height = calculateNodeBaseHeight({ includeNodeFooterHeight: true }) + // border (2*2) + base (28) + footer (20) = 52 + expect(height).toBe(52) + }) + + test('should include ceiling height when specified', () => { + const height = calculateNodeBaseHeight({ includeCeilingHeight: true }) + // border (2*2) + base (28) + ceiling (20) + ceilingGap (4) = 56 + expect(height).toBe(56) + }) + + test('should include floor height when specified', () => { + const height = calculateNodeBaseHeight({ includeFloorHeight: true }) + // border (2*2) + base (28) + floor (20) + floorGap (4) = 56 + expect(height).toBe(56) + }) + + test('should include all components when specified', () => { + const height = calculateNodeBaseHeight({ + includeNodeFooterHeight: true, + includeCeilingHeight: true, + includeFloorHeight: true, + }) + // border (2*2) + base (28) + footer (20) + ceiling (20) + ceilingGap (4) + floor (20) + floorGap (4) = 100 + expect(height).toBe(100) + }) + }) + + describe('calculateNodeDetailsHeight', () => { + test('should return 0 when no details', () => { + const height = calculateNodeDetailsHeight({}) + expect(height).toBe(0) + }) + + test('should calculate height for single detail', () => { + const height = calculateNodeDetailsHeight({ nodeDetailsCount: 1 }) + // 1 * 24 (nodeOptionHeight) = 24 + expect(height).toBe(24) + }) + + test('should calculate height for multiple details with separators', () => { + const height = calculateNodeDetailsHeight({ nodeDetailsCount: 3 }) + // 3 * 24 (nodeOptionHeight) + 2 * 1 (separators between items) = 74 + expect(height).toBe(74) + }) + + test('should handle zero details count', () => { + const height = calculateNodeDetailsHeight({ nodeDetailsCount: 0 }) + expect(height).toBe(0) + }) + }) + + describe('createEdge', () => { + test('should create edge with basic parameters', () => { + const edgeId = 'edge1' as EdgeId + const sourceId = 'source1' as NodeId + const targetId = 'target1' as NodeId + + const edge = createEdge('straight', edgeId, sourceId, targetId) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'straight', + sourceHandle: undefined, + targetHandle: undefined, + data: undefined, + zIndex: 1, + }) + }) + + test('should create edge with handles', () => { + const edgeId = 'edge2' as EdgeId + const sourceId = 'source2' as NodeId + const targetId = 'target2' as NodeId + const sourceHandleId = 'handle1' as PortId + const targetHandleId = 'handle2' as PortId + + const edge = createEdge( + 'bezier', + edgeId, + sourceId, + targetId, + sourceHandleId, + targetHandleId, + ) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'bezier', + sourceHandle: sourceHandleId, + targetHandle: targetHandleId, + data: undefined, + zIndex: 1, + }) + }) + + test('should create edge with data', () => { + const edgeId = 'edge3' as EdgeId + const sourceId = 'source3' as NodeId + const targetId = 'target3' as NodeId + const data = { label: 'Connection', weight: 5 } + + const edge = createEdge( + 'smoothstep', + edgeId, + sourceId, + targetId, + undefined, + undefined, + data, + ) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'smoothstep', + sourceHandle: undefined, + targetHandle: undefined, + data, + zIndex: 1, + }) + }) + + test('should create edge with all parameters', () => { + const edgeId = 'edge4' as EdgeId + const sourceId = 'source4' as NodeId + const targetId = 'target4' as NodeId + const sourceHandleId = 'handle3' as PortId + const targetHandleId = 'handle4' as PortId + const data = { animated: true } + + const edge = createEdge( + 'step', + edgeId, + sourceId, + targetId, + sourceHandleId, + targetHandleId, + data, + ) + + expect(edge).toEqual({ + id: edgeId, + source: sourceId, + target: targetId, + type: 'step', + sourceHandle: sourceHandleId, + targetHandle: targetHandleId, + data, + zIndex: 1, + }) + }) + }) +}) diff --git a/web/common/src/components/Lineage/help.ts b/web/common/src/components/Lineage/help.ts new file mode 100644 index 0000000000..63a7c049c5 --- /dev/null +++ b/web/common/src/components/Lineage/help.ts @@ -0,0 +1,258 @@ +import { Position } from '@xyflow/react' + +import { + DEFAULT_NODE_HEIGHT, + DEFAULT_NODE_WIDTH, + type EdgeId, + type LineageAdjacencyList, + type LineageDetails, + type LineageEdge, + type LineageEdgeData, + type LineageNode, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, + toEdgeID, + toNodeID, + type TransformEdgeFn, + type TransformNodeFn, +} from './utils' + +export function getOnlySelectedNodes< + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +>(nodeMaps: LineageNodesMap, selectedNodes: Set) { + return ( + Object.values(nodeMaps) satisfies LineageNode[] + ).reduce( + (acc, node) => + selectedNodes.has(node.id) ? { ...acc, [node.id]: node } : acc, + {} as LineageNodesMap, + ) +} + +export function getTransformedNodes< + TAdjacencyListKey extends string, + TDetailsNode, + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +>( + adjacencyListKeys: TAdjacencyListKey[], + lineageDetails: LineageDetails, + transformNode: TransformNodeFn, + allNodesMap?: LineageNodesMap, +): LineageNodesMap { + const nodesCount = adjacencyListKeys.length + const nodesMap: LineageNodesMap = Object.create(null) + + for (let i = 0; i < nodesCount; i++) { + const adjacencyListKey = adjacencyListKeys[i] + const nodeId = toNodeID(adjacencyListKey) + nodesMap[nodeId] = + allNodesMap?.[nodeId] || + transformNode(nodeId, lineageDetails[adjacencyListKey]) + } + + return nodesMap +} + +export function getTransformedModelEdgesSourceTargets< + TAdjacencyListKey extends string, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>( + adjacencyListKeys: TAdjacencyListKey[], + lineageAdjacencyList: LineageAdjacencyList, + transformEdge: TransformEdgeFn< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >, +) { + const nodesCount = adjacencyListKeys.length + + if (nodesCount === 0) return [] + + const edges = [] + + for (let i = 0; i < nodesCount; i++) { + const sourceAdjacencyListKey = adjacencyListKeys[i] + const sourceNodeId = toNodeID(sourceAdjacencyListKey) + const targets = lineageAdjacencyList[sourceAdjacencyListKey] + const targetsCount = targets?.length || 0 + + if (targets == null || targetsCount < 1) continue + + for (let j = 0; j < targetsCount; j++) { + const targetAdjacencyListKey = targets[j] + + if (!(targetAdjacencyListKey in lineageAdjacencyList)) continue + + const edgeId = toEdgeID( + sourceAdjacencyListKey, + targetAdjacencyListKey, + ) + const targetNodeId = toNodeID(targetAdjacencyListKey) + + edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) + } + } + + return edges +} + +export function getTransformedModelEdgesTargetSources< + TAdjacencyListKey extends string, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>( + adjacencyListKeys: TAdjacencyListKey[], + lineageAdjacencyList: LineageAdjacencyList, + transformEdge: TransformEdgeFn< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >, +) { + const nodesCount = adjacencyListKeys.length + + if (nodesCount === 0) return [] + + const edges = [] + + for (let i = 0; i < nodesCount; i++) { + const targetAdjacencyListKey = adjacencyListKeys[i] + const targetNodeId = toNodeID(targetAdjacencyListKey) + const sources = lineageAdjacencyList[targetAdjacencyListKey] + const sourcesCount = sources?.length || 0 + + if (sources == null || sourcesCount < 1) continue + + for (let j = 0; j < sourcesCount; j++) { + const sourceAdjacencyListKey = sources[j] + + if (!(sourceAdjacencyListKey in lineageAdjacencyList)) continue + + const edgeId = toEdgeID( + sourceAdjacencyListKey, + targetAdjacencyListKey, + ) + const sourceNodeId = toNodeID(sourceAdjacencyListKey) + + edges.push(transformEdge('edge', edgeId, sourceNodeId, targetNodeId)) + } + } + + return edges +} + +export function createNode< + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +>(type: string, nodeId: TNodeID, data: TNodeData) { + return { + id: nodeId, + sourcePosition: Position.Right, + targetPosition: Position.Left, + width: DEFAULT_NODE_WIDTH, + height: DEFAULT_NODE_HEIGHT, + data, + type, + hidden: false, + position: { x: 0, y: 0 }, + zIndex: 10, + } +} + +export function calculateNodeBaseHeight({ + includeNodeFooterHeight = false, + includeCeilingHeight = false, + includeFloorHeight = false, +}: { + includeNodeFooterHeight?: boolean + includeCeilingHeight?: boolean + includeFloorHeight?: boolean +}) { + const border = 2 + const footerHeight = 20 // tailwind h-5 + const base = 28 // tailwind h-7 + const ceilingHeight = 20 // tailwind h-5 + const floorHeight = 20 // tailwind h-5 + + const ceilingGap = 4 + const floorGap = 4 + + return [ + border * 2, + base, + includeNodeFooterHeight ? footerHeight : 0, + includeCeilingHeight ? ceilingHeight + ceilingGap : 0, + includeFloorHeight ? floorHeight + floorGap : 0, + ].reduce((acc, h) => acc + h, 0) +} + +export function calculateNodeDetailsHeight({ + nodeDetailsCount = 0, +}: { + nodeDetailsCount?: number +}) { + const nodeOptionHeight = 24 // tailwind h-6 + + const nodeOptionsSeparator = 1 + const nodeOptionsSeparators = nodeDetailsCount > 1 ? nodeDetailsCount - 1 : 0 + + return [ + nodeOptionsSeparators * nodeOptionsSeparator, + nodeDetailsCount * nodeOptionHeight, + ].reduce((acc, h) => acc + h, 0) +} + +export function createEdge< + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>( + type: string, + edgeId: TEdgeID, + sourceId: TSourceID, + targetId: TTargetID, + sourceHandleId?: TSourceHandleID, + targetHandleId?: TTargetHandleID, + data?: TEdgeData, +): LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID +> { + return { + id: edgeId, + source: sourceId, + target: targetId, + type, + sourceHandle: sourceHandleId ? sourceHandleId : undefined, + targetHandle: targetHandleId ? targetHandleId : undefined, + data, + zIndex: 1, + } +} diff --git a/web/common/src/components/Lineage/index.ts b/web/common/src/components/Lineage/index.ts new file mode 100644 index 0000000000..4a0b6eccc7 --- /dev/null +++ b/web/common/src/components/Lineage/index.ts @@ -0,0 +1,29 @@ +export * from './utils' +export * from './LineageLayout' +export * from './LineageContext' +export * from './LineageControlButton' +export * from './LineageControlIcon' +export * from './help' +export * from './node/base-handle' +export * from './node/base-node' +export * from './node/NodeContainer' +export * from './node/NodeBase' +export * from './node/NodeDivider' +export * from './node/NodeHandleIcon' +export * from './node/NodeHandles' +export * from './node/NodeHandle' +export * from './node/NodeHeader' +export * from './node/NodePorts' +export * from './node/NodePort' +export * from './node/NodeAppendix' +export * from './node/NodeBadge' +export * from './node/useNodeMetadata' +export * from './edge/EdgeWithGradient' +export * from './edge/FactoryEdgeWithGradient' +export * from './layout/dagreLayout' +export * from './layout/help' +export * from './LineageColumnLevel/ColumnLevelLineageContext' +export * from './LineageColumnLevel/FactoryColumn' +export * from './LineageColumnLevel/useColumns' +export * from './LineageColumnLevel/useColumnLevelLineage' +export * from './LineageColumnLevel/help' diff --git a/web/common/src/components/Lineage/layout/dagreLayout.ts b/web/common/src/components/Lineage/layout/dagreLayout.ts new file mode 100644 index 0000000000..16f59f495a --- /dev/null +++ b/web/common/src/components/Lineage/layout/dagreLayout.ts @@ -0,0 +1,101 @@ +import { + DEFAULT_NODE_WIDTH, + type EdgeId, + type LineageEdge, + type LineageEdgeData, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, +} from '../utils' +import dagre from 'dagre' + +export function buildLayout< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>({ + edges, + nodesMap, + shouldReuseExistingPosition = true, +}: { + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] + nodesMap: LineageNodesMap + shouldReuseExistingPosition?: boolean +}) { + const nodes = Object.values(nodesMap) + const nodeCount = nodes.length + const edgeCount = edges.length + + if (nodeCount === 0) return {} + + const g = new dagre.graphlib.Graph({ + compound: true, + multigraph: true, + directed: true, + }) + + g.setGraph({ + rankdir: 'LR', + nodesep: 12, + ranksep: 48, + edgesep: 0, + ranker: 'longest-path', + }) + + g.setDefaultEdgeLabel(() => ({})) + + // Building layout already heavy operation, so trying to optimize it a bit + for (let i = 0; i < edgeCount; i++) { + g.setEdge(edges[i].source, edges[i].target) + } + + for (let i = 0; i < nodeCount; i++) { + const node = nodes[i] + g.setNode(node.id, { + width: node.width || DEFAULT_NODE_WIDTH, + height: node.height || 0, + }) + } + + dagre.layout(g) + + // Building layout already heavy operation, so trying to optimize it a bit + for (let i = 0; i < nodeCount; i++) { + const node = nodes[i] + const width = node.width || DEFAULT_NODE_WIDTH + const height = node.height || 0 + const nodeId = node.id as NodeId + const nodeWithPosition = g.node(nodeId) + const halfWidth = width / 2 + const halfHeight = height / 2 + const isDefaultPosition = node.position.x === 0 && node.position.y === 0 + + nodesMap[nodeId] = { + ...node, + position: { + x: + shouldReuseExistingPosition && isDefaultPosition + ? nodeWithPosition.x - halfWidth + : node.position.x, + y: + shouldReuseExistingPosition && isDefaultPosition + ? nodeWithPosition.y - halfHeight + : node.position.y, + }, + } + } + + return { ...nodesMap } +} diff --git a/web/common/src/components/Lineage/layout/help.ts b/web/common/src/components/Lineage/layout/help.ts new file mode 100644 index 0000000000..d0dada83f5 --- /dev/null +++ b/web/common/src/components/Lineage/layout/help.ts @@ -0,0 +1,129 @@ +import { + type LineageEdge, + type LineageEdgeData, + type LineageNodeData, + type LineageNodesMap, + type NodeId, + type PortId, + type LayoutedGraph, + type EdgeId, +} from '../utils' + +const DEFAULT_TIMEOUT = 1000 * 60 // 1 minute + +let workerInstance: Worker | null = null + +export function getWorker(url: URL): Worker { + if (workerInstance) return workerInstance + + workerInstance = new Worker(url, { type: 'module' }) + + return workerInstance +} + +export async function getLayoutedGraph< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +>( + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[], + nodesMap: LineageNodesMap, + workerUrl: URL, +): Promise< + LayoutedGraph< + TNodeData, + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > +> { + let timeoutId: NodeJS.Timeout | null = null + + return new Promise((resolve, reject) => { + const nodes = Object.values(nodesMap) + + if (nodes.length === 0) return resolve({ edges: [], nodesMap: {} }) + + const worker = getWorker(workerUrl) + + if (worker == null) + return errorHandler(new ErrorEvent('Failed to create worker')) + + timeoutId = setTimeout( + () => errorHandler(new ErrorEvent('Layout calculation timed out')), + DEFAULT_TIMEOUT, + ) + + worker.addEventListener('message', handler) + worker.addEventListener('error', errorHandler) + + try { + worker.postMessage({ edges, nodesMap } as LayoutedGraph< + TNodeData, + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >) + } catch (postError) { + errorHandler(postError as ErrorEvent) + } + + function handler( + event: MessageEvent< + LayoutedGraph< + TNodeData, + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + > & { + error: ErrorEvent + } + >, + ) { + cleanup() + + if (event.data.error) return errorHandler(event.data.error) + + resolve(event.data) + } + + function errorHandler(error: ErrorEvent) { + cleanup() + reject(error) + } + + function cleanup() { + if (timeoutId) { + clearTimeout(timeoutId) + timeoutId = null + } + worker?.removeEventListener('message', handler) + worker?.removeEventListener('error', errorHandler) + } + }) +} + +export function cleanupLayoutWorker(): void { + workerInstance?.terminate() + workerInstance = null +} diff --git a/web/common/src/components/Lineage/node/NodeAppendix.tsx b/web/common/src/components/Lineage/node/NodeAppendix.tsx new file mode 100644 index 0000000000..48194c1442 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeAppendix.tsx @@ -0,0 +1,45 @@ +import { cva, type VariantProps } from 'class-variance-authority' +import { forwardRef, type HTMLAttributes } from 'react' + +import { cn } from '@sqlmesh-common/utils' + +const appendixVariants = cva( + 'node-appendix absolute flex w-full flex-col items-center', + { + variants: { + position: { + top: '-translate-y-[100%] -my-1', + bottom: 'top-[100%] my-1', + left: '-left-[100%] -mx-1', + right: 'left-[100%] mx-1', + }, + }, + defaultVariants: { + position: 'top', + }, + }, +) + +export interface NodeAppendixProps + extends HTMLAttributes, + VariantProps { + className?: string + position?: 'top' | 'bottom' | 'left' | 'right' +} + +export const NodeAppendix = forwardRef( + ({ children, className, position, ...props }, ref) => { + return ( +
+ {children} +
+ ) + }, +) + +NodeAppendix.displayName = 'NodeAppendix' diff --git a/web/common/src/components/Lineage/node/NodeBadge.tsx b/web/common/src/components/Lineage/node/NodeBadge.tsx new file mode 100644 index 0000000000..b05283dfa8 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeBadge.tsx @@ -0,0 +1,24 @@ +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' +import { Badge, type BadgeProps } from '@sqlmesh-common/components/Badge/Badge' + +export const NodeBadge = React.forwardRef( + ({ className, children, ...props }, ref) => { + return ( + + {children} + + ) + }, +) +NodeBadge.displayName = 'NodeBadge' diff --git a/web/common/src/components/Lineage/node/NodeBase.tsx b/web/common/src/components/Lineage/node/NodeBase.tsx new file mode 100644 index 0000000000..89342d83c8 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeBase.tsx @@ -0,0 +1,31 @@ +import { type NodeProps } from '@xyflow/react' +import React from 'react' + +import { BaseNode } from '@sqlmesh-common/components/Lineage/node/base-node' +import { cn } from '@sqlmesh-common/utils' + +export interface NodeBaseProps extends NodeProps { + className?: string + children?: React.ReactNode +} + +export const NodeBase = React.memo( + React.forwardRef( + ({ className, children }, ref) => { + return ( + + {children} + + ) + }, + ), +) +NodeBase.displayName = 'NodeBase' diff --git a/web/common/src/components/Lineage/node/NodeContainer.tsx b/web/common/src/components/Lineage/node/NodeContainer.tsx new file mode 100644 index 0000000000..c72d60e4ed --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeContainer.tsx @@ -0,0 +1,21 @@ +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' +import { VerticalContainer } from '@sqlmesh-common/components/VerticalContainer/VerticalContainer' + +export const NodeContainer = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, children, ...props }, ref) => { + return ( + + {children} + + ) +}) +NodeContainer.displayName = 'NodeContainer' diff --git a/web/common/src/components/Lineage/node/NodeDetail.tsx b/web/common/src/components/Lineage/node/NodeDetail.tsx new file mode 100644 index 0000000000..f57978d865 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeDetail.tsx @@ -0,0 +1,27 @@ +import { Metadata, cn } from '@sqlmesh-common/index' + +import { NodeDivider } from './NodeDivider' + +export function NodeDetail({ + label, + value, + hasDivider = true, + className, +}: { + label: string + value: string + hasDivider?: boolean + className?: string +}) { + return ( + <> + {hasDivider && } + + + ) +} diff --git a/web/common/src/components/Lineage/node/NodeDivider.tsx b/web/common/src/components/Lineage/node/NodeDivider.tsx new file mode 100644 index 0000000000..1b6af8cc91 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeDivider.tsx @@ -0,0 +1,8 @@ +export function NodeDivider() { + return ( +
+ ) +} diff --git a/web/common/src/components/Lineage/node/NodeHandle.tsx b/web/common/src/components/Lineage/node/NodeHandle.tsx new file mode 100644 index 0000000000..6e7aa4dd22 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHandle.tsx @@ -0,0 +1,33 @@ +import { Position } from '@xyflow/react' +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' +import { BaseHandle } from './base-handle' +import type { HandleId } from '../utils' + +export function NodeHandle({ + type, + id, + children, + className, + ...props +}: { + type: 'target' | 'source' + id: THandleId + children: React.ReactNode + className?: string +}) { + return ( + + {children} + + ) +} diff --git a/web/common/src/components/Lineage/node/NodeHandleIcon.tsx b/web/common/src/components/Lineage/node/NodeHandleIcon.tsx new file mode 100644 index 0000000000..caafa617a9 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHandleIcon.tsx @@ -0,0 +1,23 @@ +import { ArrowRight } from 'lucide-react' + +import { cn } from '@sqlmesh-common/utils' + +export function NodeHandleIcon({ + className, + iconSize = 20, +}: { + className?: string + iconSize?: number +}) { + return ( + + ) +} diff --git a/web/common/src/components/Lineage/node/NodeHandles.tsx b/web/common/src/components/Lineage/node/NodeHandles.tsx new file mode 100644 index 0000000000..3d7d6e08ab --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHandles.tsx @@ -0,0 +1,54 @@ +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' +import { HorizontalContainer } from '@sqlmesh-common/components/HorizontalContainer/HorizontalContainer' +import { NodeHandle } from './NodeHandle' +import type { HandleId } from '../utils' + +export function NodeHandles< + TLeftHandleId extends string = HandleId, + TRightHandleId extends string = HandleId, +>({ + leftIcon, + rightIcon, + leftId, + rightId, + className, + handleClassName, + children, +}: { + leftId?: TLeftHandleId + rightId?: TRightHandleId + className?: string + handleClassName?: string + children: React.ReactNode + leftIcon: React.ReactNode + rightIcon: React.ReactNode +}) { + return ( + + {leftId && ( + + type="target" + id={leftId} + className={cn('left-0', handleClassName)} + > + {leftIcon} + + )} + {children} + {rightId && ( + + type="source" + id={rightId} + className={cn('right-0', handleClassName)} + > + {rightIcon} + + )} + + ) +} diff --git a/web/common/src/components/Lineage/node/NodeHeader.tsx b/web/common/src/components/Lineage/node/NodeHeader.tsx new file mode 100644 index 0000000000..41e83aaa4e --- /dev/null +++ b/web/common/src/components/Lineage/node/NodeHeader.tsx @@ -0,0 +1,29 @@ +import { type HTMLAttributes, forwardRef } from 'react' + +import { cn } from '@sqlmesh-common/utils' + +/* NODE HEADER -------------------------------------------------------------- */ + +export type NodeHeaderProps = HTMLAttributes + +/** + * A container for a consistent header layout intended to be used inside the + * `` component. + */ +export const NodeHeader = forwardRef( + ({ className, ...props }, ref) => { + return ( +
+ ) + }, +) + +NodeHeader.displayName = 'NodeHeader' diff --git a/web/common/src/components/Lineage/node/NodePort.tsx b/web/common/src/components/Lineage/node/NodePort.tsx new file mode 100644 index 0000000000..207be24576 --- /dev/null +++ b/web/common/src/components/Lineage/node/NodePort.tsx @@ -0,0 +1,66 @@ +import { useNodeConnections } from '@xyflow/react' +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' +import { type NodeId, type PortHandleId } from '../utils' +import { NodeHandles } from './NodeHandles' + +export function NodePort< + TPortId extends string = PortHandleId, + TNodeID extends string = NodeId, + TLeftPortHandleId extends string = PortHandleId, + TRightPortHandleId extends string = PortHandleId, +>({ + id, + nodeId, + className, + children, +}: { + id: TPortId + nodeId: TNodeID + className?: string + children: React.ReactNode +}) { + const sources = useNodeConnections({ + id: nodeId, + handleType: 'source', + handleId: id, + }) + const targets = useNodeConnections({ + id: nodeId, + handleType: 'target', + handleId: id, + }) + + const isLeftHandleId = (id: TPortId): id is TPortId & TLeftPortHandleId => { + return id && targets.length > 0 + } + + const isRightHandleId = (id: TPortId): id is TPortId & TRightPortHandleId => { + return id && sources.length > 0 + } + + const leftId = isLeftHandleId(id) ? id : undefined + const rightId = isRightHandleId(id) ? id : undefined + + return ( + + data-component="NodePort" + leftIcon={ + + } + rightIcon={ + + } + leftId={leftId} + rightId={rightId} + className={cn( + 'relative overflow-visible group bg-lineage-node-port-background h-auto', + className, + )} + handleClassName="absolute" + > + {children} + + ) +} diff --git a/web/common/src/components/Lineage/node/NodePorts.tsx b/web/common/src/components/Lineage/node/NodePorts.tsx new file mode 100644 index 0000000000..1f40dc764f --- /dev/null +++ b/web/common/src/components/Lineage/node/NodePorts.tsx @@ -0,0 +1,44 @@ +import { cn } from '@sqlmesh-common/utils' +import { VirtualList } from '@sqlmesh-common/components/VirtualList/VirtualList' +import { FilterableList } from '@sqlmesh-common/components/VirtualList/FilterableList' +import type { IFuseOptions } from 'fuse.js' + +export function NodePorts({ + ports, + estimatedListItemHeight, + renderPort, + className, + isFilterable = true, + filterOptions, +}: { + ports: TPort[] + estimatedListItemHeight: number + renderPort: (port: TPort) => React.ReactNode + className?: string + isFilterable?: boolean + filterOptions?: IFuseOptions +}) { + function renderVirtualList(items: TPort[]) { + return ( + renderPort(item)} + className={cn(!isFilterable && className)} + /> + ) + } + return isFilterable ? ( + + {renderVirtualList} + + ) : ( + renderVirtualList(ports) + ) +} diff --git a/web/common/src/components/Lineage/node/base-handle.tsx b/web/common/src/components/Lineage/node/base-handle.tsx new file mode 100644 index 0000000000..0ce6a98745 --- /dev/null +++ b/web/common/src/components/Lineage/node/base-handle.tsx @@ -0,0 +1,26 @@ +import { Handle, type HandleProps } from '@xyflow/react' +import { forwardRef } from 'react' +import type { ForwardRefExoticComponent, RefAttributes } from 'react' + +import { cn } from '@sqlmesh-common/utils' + +export const BaseHandle: ForwardRefExoticComponent< + HandleProps & RefAttributes +> = forwardRef( + ({ className, children, ...props }, ref) => { + return ( + + {children} + + ) + }, +) + +BaseHandle.displayName = 'BaseHandle' diff --git a/web/common/src/components/Lineage/node/base-node.tsx b/web/common/src/components/Lineage/node/base-node.tsx new file mode 100644 index 0000000000..f1b5c7d509 --- /dev/null +++ b/web/common/src/components/Lineage/node/base-node.tsx @@ -0,0 +1,17 @@ +import { type HTMLAttributes, forwardRef } from 'react' + +import { cn } from '@sqlmesh-common/utils' + +export const BaseNode = forwardRef< + HTMLDivElement, + HTMLAttributes & { selected?: boolean } +>(({ className, ...props }, ref) => ( +
+)) + +BaseNode.displayName = 'BaseNode' diff --git a/web/common/src/components/Lineage/node/useNodeMetadata.tsx b/web/common/src/components/Lineage/node/useNodeMetadata.tsx new file mode 100644 index 0000000000..72a8be90ba --- /dev/null +++ b/web/common/src/components/Lineage/node/useNodeMetadata.tsx @@ -0,0 +1,40 @@ +import { + type Node, + type NodeProps as ReactFlowNodeProps, + useNodeConnections, +} from '@xyflow/react' + +import { type LineageNodeData, type NodeId } from '../utils' + +export type NodeProps = + ReactFlowNodeProps> + +export function useNodeMetadata( + nodeId: TNodeID, + currentNodeId: TNodeID | null, + selectedNodeId: TNodeID | null, + selectedNodes: Set, +) { + const sources = useNodeConnections({ + id: nodeId, + handleType: 'source', + }) + const targets = useNodeConnections({ + id: nodeId, + handleType: 'target', + }) + + const leftId = targets.length > 0 ? nodeId : undefined + const rightId = sources.length > 0 ? nodeId : undefined + const isCurrent = currentNodeId === nodeId + const isSelected = selectedNodeId === nodeId + const isActive = selectedNodes.has(nodeId) + + return { + leftId, + rightId, + isCurrent, + isSelected, + isActive, + } +} diff --git a/web/common/src/components/Lineage/stories/Lineage.stories.tsx b/web/common/src/components/Lineage/stories/Lineage.stories.tsx new file mode 100644 index 0000000000..87a757bbc4 --- /dev/null +++ b/web/common/src/components/Lineage/stories/Lineage.stories.tsx @@ -0,0 +1,225 @@ +import { ModelLineage } from './ModelLineage' +import type { + BrandedLineageAdjacencyList, + BrandedLineageDetails, + ModelLineageNodeDetails, + ModelName, +} from './ModelLineageContext' + +export default { + title: 'Components/Lineage', +} + +const adjacencyList = { + 'sqlmesh.sushi.raw_orders': ['sqlmesh.sushi.orders'], + 'sqlmesh.sushi.orders': [], +} as Record + +const lineageDetails = { + 'sqlmesh.sushi.raw_orders': { + name: 'sqlmesh.sushi.raw_orders', + display_name: 'sushi.raw_orders', + identifier: '123456789', + version: '123456789', + dialect: 'bigquery', + cron: '0 0 * * *', + owner: 'admin', + kind: 'INCREMENTAL_BY_TIME', + model_type: 'python', + tags: ['test', 'tag', 'another tag'], + columns: { + user_id: { + data_type: 'STRING', + description: 'node', + }, + event_id: { + data_type: 'STRING', + description: 'node', + }, + created_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + }, + }, + 'sqlmesh.sushi.orders': { + name: 'sqlmesh.sushi.orders', + display_name: 'sushi.orders', + identifier: '123456789', + version: '123456789', + dialect: 'bigquery', + cron: '0 0 * * *', + owner: 'admin', + kind: 'INCREMENTAL_BY_TIME', + model_type: 'sql', + tags: ['test', 'tag', 'another tag'], + columns: { + user_id: { + data_type: 'STRING', + description: 'node', + columnLineageData: { + 'sqlmesh.sushi.orders': { + user_id: { + source: 'sqlmesh.sushi.raw_orders', + expression: 'select user_id from sqlmesh.sushi.raw_orders', + models: { + 'sqlmesh.sushi.raw_orders': ['user_id'], + }, + }, + }, + }, + }, + event_id: { + data_type: 'STRING', + description: 'node', + columnLineageData: { + 'sqlmesh.sushi.orders': { + event_id: { + models: { + 'sqlmesh.sushi.raw_orders': ['event_id'], + }, + }, + }, + }, + }, + product_id: { + data_type: 'STRING', + description: 'node', + }, + customer_id: { + data_type: 'STRING', + description: 'node', + }, + updated_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + deleted_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + expired_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + start_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + end_at: { + data_type: 'TIMESTAMP', + description: 'node', + }, + created_ts: { + data_type: 'TIMESTAMP', + description: 'node', + }, + }, + }, +} as Record + +export const LineageModel = () => { + return ( +
+ + +
+ ) +} diff --git a/web/common/src/components/Lineage/stories/ModelLineage.tsx b/web/common/src/components/Lineage/stories/ModelLineage.tsx new file mode 100644 index 0000000000..b4a65fd71e --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelLineage.tsx @@ -0,0 +1,438 @@ +import { Focus, Rows2, Rows3 } from 'lucide-react' +import React from 'react' + +import { + MAX_COLUMNS_TO_DISPLAY, + calculateColumnsHeight, + calculateNodeColumnsCount, + calculateSelectedColumnsHeight, + getEdgesFromColumnLineage, +} from '../LineageColumnLevel/help' +import { useColumnLevelLineage } from '../LineageColumnLevel/useColumnLevelLineage' +import { LineageLayout } from '../LineageLayout' +import { FactoryEdgeWithGradient } from '../edge/FactoryEdgeWithGradient' +import { + calculateNodeBaseHeight, + calculateNodeDetailsHeight, + createEdge, + createNode, + getOnlySelectedNodes, + getTransformedModelEdgesSourceTargets, + getTransformedNodes, +} from '../help' +import { + type LineageEdge, + type LineageNodesMap, + toNodeID, + toPortID, + ZOOM_THRESHOLD, +} from '../utils' +import { + type EdgeData, + ModelLineageContext, + type ModelLineageNodeDetails, + type ModelName, + type ColumnName, + type NodeData, + useModelLineage, + type ModelNodeId, + type ModelColumnID, + type ModelEdgeId, + type NodeType, + type BrandedLineageAdjacencyList, + type BrandedLineageDetails, + type BrandedColumnLevelLineageAdjacencyList, + type ModelColumn, + type ModelDisplayName, + type LeftHandleId, + type RightHandleId, + type LeftPortHandleId, + type RightPortHandleId, +} from './ModelLineageContext' +import { ModelNode } from './ModelNode' +import { getNodeTypeColorVar } from './help' +import { EdgeWithGradient } from '../edge/EdgeWithGradient' +import { cleanupLayoutWorker, getLayoutedGraph } from '../layout/help' +import { LineageControlButton } from '../LineageControlButton' +import { LineageControlIcon } from '../LineageControlIcon' +import type { BrandedRecord } from '@sqlmesh-common/types' + +const nodeTypes = { + node: ModelNode, +} +const edgeTypes = { + edge: FactoryEdgeWithGradient(useModelLineage), + port: EdgeWithGradient, +} + +export const ModelLineage = ({ + selectedModelName, + adjacencyList, + lineageDetails, + className, +}: { + adjacencyList: BrandedLineageAdjacencyList + lineageDetails: BrandedLineageDetails + selectedModelName?: ModelName + className?: string +}) => { + const currentNodeId = selectedModelName + ? toNodeID(selectedModelName) + : null + + const [zoom, setZoom] = React.useState(ZOOM_THRESHOLD) + const [isBuildingLayout, setIsBuildingLayout] = React.useState(false) + const [edges, setEdges] = React.useState< + LineageEdge< + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + >[] + >([]) + const [nodesMap, setNodesMap] = React.useState< + LineageNodesMap + >({}) + const [showOnlySelectedNodes, setShowOnlySelectedNodes] = + React.useState(false) + const [selectedNodes, setSelectedNodes] = React.useState>( + new Set(), + ) + const [selectedEdges, setSelectedEdges] = React.useState>( + new Set(), + ) + const [selectedNodeId, setSelectedNodeId] = + React.useState(currentNodeId) + + const [showColumns, setShowColumns] = React.useState(false) + const [columnLevelLineage, setColumnLevelLineage] = React.useState< + Map + >(new Map()) + const [fetchingColumns, setFetchingColumns] = React.useState< + Set + >(new Set()) + + const { + adjacencyListColumnLevel, + selectedColumns, + adjacencyListKeysColumnLevel, + } = useColumnLevelLineage< + ModelName, + ColumnName, + ModelColumnID, + BrandedColumnLevelLineageAdjacencyList + >(columnLevelLineage) + + const adjacencyListKeys = React.useMemo(() => { + let keys: ModelName[] = [] + + if (adjacencyListKeysColumnLevel.length > 0) { + keys = adjacencyListKeysColumnLevel + } else { + keys = Object.keys(adjacencyList) as ModelName[] + } + + return keys + }, [adjacencyListKeysColumnLevel, adjacencyList]) + + const transformNode = React.useCallback( + (nodeId: ModelNodeId, detail: ModelLineageNodeDetails) => { + const columns = detail.columns + + const node = createNode('node', nodeId, { + name: detail.name as ModelName, + displayName: detail.display_name as ModelDisplayName, + identifier: detail.identifier, + model_type: detail.model_type as NodeType, + kind: detail.kind!, + cron: detail.cron, + owner: detail.owner!, + dialect: detail.dialect, + version: detail.version, + tags: detail.tags || [], + columns: columns as BrandedRecord, + }) + const selectedColumnsCount = new Set( + Object.keys(columns ?? {}).map(k => toPortID(detail.name, k)), + ).intersection(selectedColumns).size + // We are trying to project the node hight so we are including the ceiling and floor heights + const nodeBaseHeight = calculateNodeBaseHeight({ + includeNodeFooterHeight: false, + includeCeilingHeight: true, + includeFloorHeight: true, + }) + const nodeDetailsHeight = calculateNodeDetailsHeight({ + nodeDetailsCount: 0, + }) + const selectedColumnsHeight = + calculateSelectedColumnsHeight(selectedColumnsCount) + + const columnsHeight = calculateColumnsHeight({ + columnsCount: calculateNodeColumnsCount( + Object.keys(columns ?? {}).length, + ), + hasColumnsFilter: + Object.keys(columns ?? {}).length > MAX_COLUMNS_TO_DISPLAY, + }) + + node.height = + nodeBaseHeight + + nodeDetailsHeight + + selectedColumnsHeight + + columnsHeight + + return node + }, + [selectedColumns], + ) + + const transformedNodesMap = React.useMemo(() => { + return getTransformedNodes< + ModelName, + ModelLineageNodeDetails, + NodeData, + ModelNodeId + >(adjacencyListKeys, lineageDetails, transformNode) + }, [adjacencyListKeys, lineageDetails, transformNode]) + + const transformEdge = React.useCallback( + ( + edgeType: string, + edgeId: ModelEdgeId, + sourceId: LeftHandleId, + targetId: RightHandleId, + sourceHandleId?: LeftPortHandleId, + targetHandleId?: RightPortHandleId, + ) => { + const sourceNode = transformedNodesMap[sourceId] + const targetNode = transformedNodesMap[targetId] + const data: EdgeData = {} + + if (sourceHandleId) { + data.startColor = 'var(--color-lineage-node-port-edge-source)' + } else { + if (sourceNode?.data?.model_type) { + data.startColor = getNodeTypeColorVar( + sourceNode.data.model_type as NodeType, + ) + } + } + + if (targetHandleId) { + data.endColor = 'var(--color-lineage-node-port-edge-target)' + } else { + if (targetNode?.data?.model_type) { + data.endColor = getNodeTypeColorVar( + targetNode.data.model_type as NodeType, + ) + } + } + + if (sourceHandleId && targetHandleId) { + data.strokeWidth = 2 + } + + return createEdge< + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + >( + edgeType, + edgeId, + sourceId, + targetId, + sourceHandleId, + targetHandleId, + data, + ) + }, + [transformedNodesMap], + ) + + const edgesColumnLevel = React.useMemo( + () => + getEdgesFromColumnLineage< + ModelName, + ColumnName, + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId, + BrandedColumnLevelLineageAdjacencyList + >({ + columnLineage: adjacencyListColumnLevel, + transformEdge, + }), + [adjacencyListColumnLevel, transformEdge], + ) + + const transformedEdges = React.useMemo(() => { + return edgesColumnLevel.length > 0 + ? edgesColumnLevel + : getTransformedModelEdgesSourceTargets< + ModelName, + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + >(adjacencyListKeys, adjacencyList, transformEdge) + }, [adjacencyListKeys, adjacencyList, transformEdge, edgesColumnLevel]) + + const calculateLayout = React.useCallback( + ( + eds: LineageEdge< + EdgeData, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + >[], + nds: LineageNodesMap, + ) => + getLayoutedGraph( + eds, + nds, + new URL('./dagreLayout.worker.ts', import.meta.url), + ) + .then(({ edges, nodesMap }) => { + setEdges(edges) + setNodesMap(nodesMap) + }) + .catch(error => { + console.error('Layout processing failed:', error) + + setEdges([]) + setNodesMap({}) + }) + .finally(() => { + setIsBuildingLayout(false) + }), + [setEdges, setNodesMap, setIsBuildingLayout], + ) + + const nodes = React.useMemo(() => { + return Object.values(nodesMap) + }, [nodesMap]) + + const selectedNode = selectedNodeId ? nodesMap[selectedNodeId] : null + + const handleReset = React.useCallback(() => { + setShowColumns(false) + setEdges([]) + setNodesMap({}) + setShowOnlySelectedNodes(false) + setSelectedNodes(new Set()) + setSelectedEdges(new Set()) + setSelectedNodeId(null) + setColumnLevelLineage(new Map()) + }, []) + + React.useEffect(() => { + setIsBuildingLayout(true) + + if (showOnlySelectedNodes) { + const onlySelectedNodesMap = getOnlySelectedNodes( + transformedNodesMap, + selectedNodes, + ) + const onlySelectedEdges = transformedEdges.filter(edge => + selectedEdges.has(edge.id), + ) + calculateLayout(onlySelectedEdges, onlySelectedNodesMap) + } else { + calculateLayout(transformedEdges, transformedNodesMap) + } + }, [showOnlySelectedNodes, transformedEdges, transformedNodesMap]) + + React.useEffect(() => { + setSelectedNodeId(currentNodeId) + }, [currentNodeId]) + + // Cleanup worker on unmount + React.useEffect(() => () => cleanupLayoutWorker(), []) + + function toggleColumns() { + setShowColumns(prev => !prev) + } + + return ( + + + isBuildingLayout={isBuildingLayout} + useLineage={useModelLineage} + nodeTypes={nodeTypes} + edgeTypes={edgeTypes} + className={className} + controls={ + <> + toggleColumns()} + disabled={isBuildingLayout} + > + {showColumns ? ( + + ) : ( + + )} + + handleReset()} + disabled={isBuildingLayout} + > + + + + } + /> + + ) +} diff --git a/web/common/src/components/Lineage/stories/ModelLineageContext.ts b/web/common/src/components/Lineage/stories/ModelLineageContext.ts new file mode 100644 index 0000000000..00fa62563a --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelLineageContext.ts @@ -0,0 +1,132 @@ +import type { Branded, BrandedRecord } from '@sqlmesh-common/types' +import { + type ColumnLevelLineageAdjacencyList, + type ColumnLevelLineageContextValue, + getColumnLevelLineageContextInitial, +} from '../LineageColumnLevel/ColumnLevelLineageContext' +import { type Column } from '../LineageColumnLevel/useColumns' +import { + type LineageContextValue, + createLineageContext, + getInitial as getLineageContextInitial, +} from '../LineageContext' +import { + type LineageAdjacencyList, + type LineageDetails, + type PathType, +} from '../utils' + +export type ModelName = Branded +export type ModelDisplayName = Branded +export type ColumnName = Branded +export type ModelColumnID = Branded +export type ModelEdgeId = Branded +export type LeftHandleId = Branded +export type RightHandleId = Branded +export type ModelNodeId = LeftHandleId | RightHandleId +export type LeftPortHandleId = Branded +export type RightPortHandleId = Branded + +export type BrandedColumnLevelLineageAdjacencyList = + ColumnLevelLineageAdjacencyList & { + readonly __adjacencyListKeyBrand: ModelName + readonly __adjacencyListColumnKeyBrand: ColumnName + } + +export type BrandedLineageAdjacencyList = LineageAdjacencyList & { + readonly __adjacencyListKeyBrand: ModelName +} + +export type BrandedLineageDetails = LineageDetails< + ModelName, + ModelLineageNodeDetails +> & { + readonly __lineageDetailsKeyBrand: ModelName +} + +export type ModelColumn = Column & { + id: ModelColumnID + name: ColumnName + columnLineageData?: BrandedColumnLevelLineageAdjacencyList +} + +export type NodeType = 'sql' | 'python' +export type ModelLineageNodeDetails = { + name: string + display_name: string + identifier: string + version: string + dialect: string + cron: string + owner?: string + kind?: string + model_type?: string + tags?: string[] + columns?: BrandedRecord +} + +export type NodeData = { + name: ModelName + displayName: ModelDisplayName + model_type: NodeType + identifier: string + version: string + kind: string + cron: string + owner: string + dialect: string + tags: string[] + columns?: BrandedRecord +} + +export type EdgeData = { + pathType?: PathType + startColor?: string + endColor?: string + strokeWidth?: number +} + +export type ModelLineageContextValue = ColumnLevelLineageContextValue< + ModelName, + ColumnName, + ModelColumnID, + BrandedColumnLevelLineageAdjacencyList +> & + LineageContextValue< + NodeData, + EdgeData, + ModelNodeId, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId + > + +export const initial = { + ...getLineageContextInitial(), + ...getColumnLevelLineageContextInitial< + ModelName, + ColumnName, + ModelColumnID, + BrandedColumnLevelLineageAdjacencyList + >(), +} + +export const { Provider, useLineage } = createLineageContext< + NodeData, + EdgeData, + ModelNodeId, + ModelEdgeId, + LeftHandleId, + RightHandleId, + LeftPortHandleId, + RightPortHandleId, + ModelLineageContextValue +>(initial) + +export const ModelLineageContext = { + Provider, +} + +export const useModelLineage = useLineage diff --git a/web/common/src/components/Lineage/stories/ModelNode.tsx b/web/common/src/components/Lineage/stories/ModelNode.tsx new file mode 100644 index 0000000000..4af804aad2 --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelNode.tsx @@ -0,0 +1,337 @@ +import cronstrue from 'cronstrue' +import React from 'react' + +import { cn } from '@sqlmesh-common/utils' +import { HorizontalContainer } from '../../HorizontalContainer/HorizontalContainer' +import { VerticalContainer } from '../../VerticalContainer/VerticalContainer' +import { + MAX_COLUMNS_TO_DISPLAY, + calculateColumnsHeight, + calculateNodeColumnsCount, + calculateSelectedColumnsHeight, +} from '../LineageColumnLevel/help' +import { useColumns, type Column } from '../LineageColumnLevel/useColumns' +import { calculateNodeBaseHeight, calculateNodeDetailsHeight } from '../help' +import { NodeAppendix } from '../node/NodeAppendix' +import { NodeBadge } from '../node/NodeBadge' +import { NodeBase } from '../node/NodeBase' +import { NodeContainer } from '../node/NodeContainer' +import { NodeHandleIcon } from '../node/NodeHandleIcon' +import { NodeHandles } from '../node/NodeHandles' +import { NodeHeader } from '../node/NodeHeader' +import { useNodeMetadata, type NodeProps } from '../node/useNodeMetadata' +import { ZOOM_THRESHOLD } from '../utils' +import { + type ModelName as ModelNameType, + type ColumnName, + type NodeData, + useModelLineage, + type ModelColumn, + type ModelNodeId, + type ModelColumnID, + type NodeType, +} from './ModelLineageContext' +import { ModelNodeColumn } from './ModelNodeColumn' +import { + getNodeTypeBorderColor, + getNodeTypeColor, + getNodeTypeTextColor, +} from './help' +import { Tooltip } from '@sqlmesh-common/components/Tooltip/Tooltip' +import type { ColumnLevelLineageAdjacencyList } from '../LineageColumnLevel/ColumnLevelLineageContext' +import { ModelName } from '@sqlmesh-common/components/ModelName/ModelName' +import { Badge } from '@sqlmesh-common/components/Badge/Badge' +import { NodePorts } from '../node/NodePorts' + +export const ModelNode = React.memo(function ModelNode({ + id, + data, + ...props +}: NodeProps) { + const { + selectedColumns, + zoom, + currentNodeId, + selectedNodeId, + selectedNodes, + showColumns, + fetchingColumns, + setSelectedNodeId, + } = useModelLineage() + + const [showNodeColumns, setShowNodeColumns] = React.useState(showColumns) + const [isHovered, setIsHovered] = React.useState(false) + + const nodeId = id as ModelNodeId + + const { + leftId, + rightId, + isCurrent, + isSelected, // if selected from inside the lineage and node is selcted + isActive, // if selected from inside the lineage and node is not selected but in path + } = useNodeMetadata(nodeId, currentNodeId, selectedNodeId, selectedNodes) + + const { + columns, + selectedColumns: modelSelectedColumns, + columnNames, + } = useColumns( + selectedColumns, + data.name, + data.columns, + ) + + const hasSelectedColumns = selectedColumns.intersection(columnNames).size > 0 + const hasFetchingColumns = fetchingColumns.intersection(columnNames).size > 0 + + React.useEffect(() => { + setShowNodeColumns(showColumns || isSelected) + }, [columnNames, isSelected, showColumns]) + + function toggleSelectedNode() { + setSelectedNodeId(prev => (prev === nodeId ? null : nodeId)) + } + + const shouldShowColumns = + showNodeColumns || hasSelectedColumns || hasFetchingColumns || isHovered + const modelType = data.model_type?.toLowerCase() as NodeType + const hasColumnsFilter = + shouldShowColumns && columns.length > MAX_COLUMNS_TO_DISPLAY + // We are not including the footer, because we need actual height to dynamically adjust node container height + const nodeBaseHeight = calculateNodeBaseHeight({ + includeNodeFooterHeight: false, + includeCeilingHeight: false, + includeFloorHeight: false, + }) + const nodeDetailsHeight = + zoom > ZOOM_THRESHOLD + ? calculateNodeDetailsHeight({ + nodeDetailsCount: 0, + }) + : 0 + const selectedColumnsHeight = calculateSelectedColumnsHeight( + modelSelectedColumns.length, + ) + const columnsHeight = + zoom > ZOOM_THRESHOLD && shouldShowColumns + ? calculateColumnsHeight({ + columnsCount: calculateNodeColumnsCount(columns.length), + hasColumnsFilter, + }) + : 0 + + // If zoom is less than ZOOM_THRESHOLD, we are making node looks bigger + const nodeHeight = + (zoom > ZOOM_THRESHOLD ? nodeBaseHeight : nodeBaseHeight * 2) + + nodeDetailsHeight + + selectedColumnsHeight + + columnsHeight + + return ( + setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + > + + + {isCurrent && ( + + current + + )} + {zoom > ZOOM_THRESHOLD && ( + <> + {data.kind?.toUpperCase()} + {data.cron && ( + + {data.cron.toUpperCase()} + + } + className="text-xs p-2 rounded-md font-semibold" + > + + UTC Time + {cronstrue.toString(data.cron, { + dayOfWeekStartIndexZero: true, + use24HourTimeFormat: true, + verbose: true, + })} + + + )} + + )} + + + + ZOOM_THRESHOLD ? 'shrink-0 h-7' : 'h-full')} + onClick={toggleSelectedNode} + > + + } + rightIcon={ + + } + handleClassName="top-4" + > + + ZOOM_THRESHOLD + ? ' text-xs' + : 'text-2xl justify-center', + )} + /> + + + + {shouldShowColumns && ( + <> + {modelSelectedColumns.length > 0 && ( + + {modelSelectedColumns.map(column => ( + + } + ).columnLineageData + } + /> + ))} + + )} + {columns.length > 0 && zoom > ZOOM_THRESHOLD && ( + + ports={columns} + estimatedListItemHeight={24} + isFilterable={hasColumnsFilter} + filterOptions={{ + keys: ['name', 'description'], + threshold: 0.3, + }} + renderPort={column => ( + + } + ).columnLineageData + } + /> + )} + className="border-t border-lineage-divider" + /> + )} + + )} + + {modelType && ( + + ZOOM_THRESHOLD ? 'h-5' : 'h-8', + )} + > + ZOOM_THRESHOLD ? '2xs' : 'm'} + className={cn( + 'text-[white] font-black', + getNodeTypeColor(modelType), + )} + > + {modelType.toUpperCase()} + + + + )} + + ) +}) diff --git a/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx b/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx new file mode 100644 index 0000000000..dbb3f92dad --- /dev/null +++ b/web/common/src/components/Lineage/stories/ModelNodeColumn.tsx @@ -0,0 +1,81 @@ +import React from 'react' + +import { FactoryColumn } from '../LineageColumnLevel/FactoryColumn' + +import { + useModelLineage, + type ModelColumnID, + type ModelName, + type ModelNodeId, + type ColumnName, + type BrandedColumnLevelLineageAdjacencyList, + type LeftPortHandleId, + type RightPortHandleId, +} from './ModelLineageContext' + +const ModelColumn = FactoryColumn< + ModelName, + ColumnName, + ModelNodeId, + ModelColumnID, + LeftPortHandleId, + RightPortHandleId, + BrandedColumnLevelLineageAdjacencyList +>(useModelLineage) + +export const ModelNodeColumn = React.memo(function ModelNodeColumn({ + id, + nodeId, + modelName, + name, + description, + type, + className, + columnLineageData, +}: { + id: ModelColumnID + nodeId: ModelNodeId + modelName: ModelName + name: ColumnName + type: string + description?: string | null + className?: string + columnLineageData?: BrandedColumnLevelLineageAdjacencyList +}) { + const { selectedColumns, setColumnLevelLineage } = useModelLineage() + + const isSelectedColumn = selectedColumns.has(id) + + async function toggleSelectedColumn() { + if (isSelectedColumn) { + setColumnLevelLineage(prev => { + prev.delete(id) + return new Map(prev) + }) + } else { + if (columnLineageData != null) { + setColumnLevelLineage(prev => new Map(prev).set(id, columnLineageData)) + } + } + } + + return ( + console.log('cancel')} + renderError={error =>
Error: {error.message}
} + renderExpression={expression =>
{expression}
} + renderSource={source =>
{source}
} + /> + ) +}) diff --git a/web/common/src/components/Lineage/stories/dagreLayout.worker.ts b/web/common/src/components/Lineage/stories/dagreLayout.worker.ts new file mode 100644 index 0000000000..7ca75c60c2 --- /dev/null +++ b/web/common/src/components/Lineage/stories/dagreLayout.worker.ts @@ -0,0 +1,38 @@ +import { + type LayoutedGraph, + type LineageEdgeData, + type LineageNodeData, + type EdgeId, + type NodeId, + type PortId, +} from '../utils' +import { buildLayout } from '../layout/dagreLayout' + +self.onmessage = < + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TNodeID extends string = NodeId, + TEdgeID extends string = EdgeId, + TPortID extends string = PortId, +>( + event: MessageEvent< + LayoutedGraph + >, +) => { + try { + const nodesMap = buildLayout< + TNodeData, + TEdgeData, + TNodeID, + TEdgeID, + TPortID + >(event.data) + + self.postMessage({ + edges: event.data.edges, + nodesMap, + } satisfies LayoutedGraph) + } catch (error) { + self.postMessage({ error }) + } +} diff --git a/web/common/src/components/Lineage/stories/help.ts b/web/common/src/components/Lineage/stories/help.ts new file mode 100644 index 0000000000..f26c8c5752 --- /dev/null +++ b/web/common/src/components/Lineage/stories/help.ts @@ -0,0 +1,29 @@ +import { type NodeType } from './ModelLineageContext' + +export function getNodeTypeColorVar(nodeType: NodeType) { + return { + sql: 'var(--color-lineage-node-type-background-sql)', + python: 'var(--color-lineage-node-type-background-python)', + }[nodeType] +} + +export function getNodeTypeColor(nodeType: NodeType) { + return { + sql: 'bg-lineage-node-type-background-sql', + python: 'bg-lineage-node-type-background-python', + }[nodeType] +} + +export function getNodeTypeTextColor(nodeType: NodeType) { + return { + sql: 'text-lineage-node-type-foreground-sql', + python: 'text-lineage-node-type-foreground-python', + }[nodeType] +} + +export function getNodeTypeBorderColor(nodeType: NodeType) { + return { + sql: 'border-lineage-node-type-border-sql', + python: 'border-lineage-node-type-border-python', + }[nodeType] +} diff --git a/web/common/src/components/Lineage/utils.ts b/web/common/src/components/Lineage/utils.ts new file mode 100644 index 0000000000..5f59dbafe4 --- /dev/null +++ b/web/common/src/components/Lineage/utils.ts @@ -0,0 +1,130 @@ +import type { Branded } from '@sqlmesh-common/types' +import { type Edge, type Node } from '@xyflow/react' + +export type NodeId = Branded +export type EdgeId = Branded +export type PortId = Branded +export type HandleId = Branded +export type PortHandleId = Branded + +export type LineageNodeData = Record +export type LineageEdgeData = Record + +export type LineageAdjacencyList = + Record + +export type LineageDetails = Record< + TAdjacencyListKey, + TValue +> + +export type LineageNodesMap< + TNodeData extends LineageNodeData, + TNodeID extends string = NodeId, +> = Record> +export interface LineageNode< + TNodeData extends LineageNodeData, + TNodeID extends string = NodeId, +> extends Node { + id: TNodeID +} + +export interface LineageEdge< + TEdgeData extends LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +> extends Edge { + id: TEdgeID + source: TSourceID + target: TTargetID + sourceHandle?: TSourceHandleID + targetHandle?: TTargetHandleID +} + +export type LayoutedGraph< + TNodeData extends LineageNodeData = LineageNodeData, + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +> = { + edges: LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID + >[] + nodesMap: LineageNodesMap +} + +export type PathType = 'bezier' | 'smoothstep' | 'step' | 'straight' +export type TransformNodeFn< + TData, + TNodeData extends LineageNodeData = LineageNodeData, + TNodeID extends string = NodeId, +> = (nodeId: TNodeID, data: TData) => LineageNode + +export type TransformEdgeFn< + TEdgeData extends LineageEdgeData = LineageEdgeData, + TEdgeID extends string = EdgeId, + TSourceID extends string = NodeId, + TTargetID extends string = NodeId, + TSourceHandleID extends string = PortId, + TTargetHandleID extends string = PortId, +> = ( + edgeType: string, + edgeId: TEdgeID, + sourceId: TSourceID, + targetId: TTargetID, + sourceHandleId?: TSourceHandleID, + targetHandleId?: TTargetHandleID, +) => LineageEdge< + TEdgeData, + TEdgeID, + TSourceID, + TTargetID, + TSourceHandleID, + TTargetHandleID +> + +export const DEFAULT_NODE_HEIGHT = 32 +export const DEFAULT_NODE_WIDTH = 300 +export const DEFAULT_ZOOM = 0.85 +export const MIN_ZOOM = 0.01 +export const MAX_ZOOM = 1.75 +export const ZOOM_THRESHOLD = 0.5 +export const NODES_TRESHOLD = 200 +export const NODES_TRESHOLD_ZOOM = 0.1 + +// ID generated from toInternalID is meant to be used only internally to identify nodes, edges and ports within the graph +// Do not rely on the ID to be a valid URL, or anythjin outside of the graph +export function toInternalID( + ...args: string[] +): TReturn { + return encodeURI(args.filter(Boolean).join('.')) as TReturn +} + +export function toNodeID( + ...args: string[] +): TNodeID { + return toInternalID(...args) +} + +export function toEdgeID( + ...args: string[] +): TEdgeID { + return toInternalID(...args) +} + +export function toPortID( + ...args: string[] +): TPortId { + return toInternalID(...args) +} diff --git a/web/common/src/components/LoadingContainer/LoadingContainer.stories.tsx b/web/common/src/components/LoadingContainer/LoadingContainer.stories.tsx index 80c978a905..a37c26c337 100644 --- a/web/common/src/components/LoadingContainer/LoadingContainer.stories.tsx +++ b/web/common/src/components/LoadingContainer/LoadingContainer.stories.tsx @@ -1,5 +1,5 @@ import type { Meta, StoryObj } from '@storybook/react-vite' -import type { Side } from '@/types' +import type { Side } from '@sqlmesh-common/types' import { LoadingContainer } from './LoadingContainer' import { expect, within } from 'storybook/test' diff --git a/web/common/src/components/LoadingContainer/LoadingContainer.tsx b/web/common/src/components/LoadingContainer/LoadingContainer.tsx index af21ddd04c..3b7d84fdaa 100644 --- a/web/common/src/components/LoadingContainer/LoadingContainer.tsx +++ b/web/common/src/components/LoadingContainer/LoadingContainer.tsx @@ -1,5 +1,5 @@ -import type { Side } from '@/types' -import { cn } from '@/utils' +import type { Side } from '@sqlmesh-common/types' +import { cn } from '@sqlmesh-common/utils' import React from 'react' import { LoadingIcon } from './LoadingIcon' @@ -15,17 +15,20 @@ export const LoadingContainer = React.forwardRef< HTMLDivElement, LoadingContainerProps >( - ({ - isLoading = true, - side = 'left', - message, - children, - className, - }: LoadingContainerProps) => { + ( + { + isLoading = true, + side = 'left', + message, + children, + className, + }: LoadingContainerProps, + ref, + ) => { function renderLoading() { return ( <> - + {message && {message}} ) @@ -33,6 +36,7 @@ export const LoadingContainer = React.forwardRef< return isLoading ? (
diff --git a/web/common/src/components/LoadingContainer/LoadingIcon.tsx b/web/common/src/components/LoadingContainer/LoadingIcon.tsx index b683929f8f..57566b683e 100644 --- a/web/common/src/components/LoadingContainer/LoadingIcon.tsx +++ b/web/common/src/components/LoadingContainer/LoadingIcon.tsx @@ -1,4 +1,4 @@ -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import React from 'react' export const LoadingIcon = React.forwardRef< diff --git a/web/common/src/components/MessageContainer/MessageContainer.css b/web/common/src/components/MessageContainer/MessageContainer.css new file mode 100644 index 0000000000..7b438abda6 --- /dev/null +++ b/web/common/src/components/MessageContainer/MessageContainer.css @@ -0,0 +1,3 @@ +:where(:root) { + --color-message-translucid: var(--color-neutral-3); +} diff --git a/web/common/src/components/MessageContainer/MessageContainer.tsx b/web/common/src/components/MessageContainer/MessageContainer.tsx index d51213bfaf..84ba3b5245 100644 --- a/web/common/src/components/MessageContainer/MessageContainer.tsx +++ b/web/common/src/components/MessageContainer/MessageContainer.tsx @@ -1,7 +1,9 @@ -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { LoadingContainer } from '../LoadingContainer/LoadingContainer' import { HorizontalContainer } from '../HorizontalContainer/HorizontalContainer' +import './MessageContainer.css' + export interface MessageContainerProps { children: React.ReactNode className?: string @@ -19,7 +21,7 @@ export function MessageContainer({ diff --git a/web/common/src/components/Metadata/Metadata.css b/web/common/src/components/Metadata/Metadata.css new file mode 100644 index 0000000000..5d3390ab46 --- /dev/null +++ b/web/common/src/components/Metadata/Metadata.css @@ -0,0 +1,4 @@ +:where(:root) { + --color-metadata-label: var(--color-neutral-600); + --color-metadata-value: var(--color-prose); +} diff --git a/web/common/src/components/Metadata/Metadata.tsx b/web/common/src/components/Metadata/Metadata.tsx index 9227844fd3..ff55c18e73 100644 --- a/web/common/src/components/Metadata/Metadata.tsx +++ b/web/common/src/components/Metadata/Metadata.tsx @@ -1,6 +1,6 @@ import React from 'react' import { HorizontalContainer } from '../HorizontalContainer/HorizontalContainer' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export interface MetadataProps extends React.HTMLAttributes { label: React.ReactNode @@ -14,18 +14,28 @@ export const Metadata = React.forwardRef( ref={ref} data-component="Metadata" className={cn( - 'justify-between gap-2 items-center whitespace-nowrap h-auto', + 'Metadata justify-between gap-2 items-center whitespace-nowrap h-auto', className, )} {...props} > {typeof label === 'string' ? ( -
{label}
+
+ {label} +
) : ( label )} {typeof value === 'string' ? ( -
{value}
+
+ {value} +
) : ( value )} diff --git a/web/common/src/components/ModelName/ModelName.css b/web/common/src/components/ModelName/ModelName.css index 42e11a061b..e2bffa927e 100644 --- a/web/common/src/components/ModelName/ModelName.css +++ b/web/common/src/components/ModelName/ModelName.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-model-name-grayscale-link-underline: var(--color-neutral-125); --color-model-name-grayscale-link-underline-hover: var(--color-neutral-500); --color-model-name-link-underline: var(--color-link-underline); @@ -6,7 +6,7 @@ --color-model-name-grayscale-catalog: var(--color-neutral-400); --color-model-name-grayscale-schema: var(--color-neutral-600); - --color-model-name-grayscale-model: var(--color-neutral-800); + /* --color-model-name-grayscale-model: var(--color-model-name-grayscale-model, var(--color-neutral-800)); */ --color-model-name-catalog: var(--color-catalog); --color-model-name-schema: var(--color-schema); @@ -14,4 +14,6 @@ --color-model-name-copy-icon: var(--color-neutral-600); --color-model-name-copy-icon-hover: var(--color-neutral-100); + --color-model-name-copy-icon-background: var(--color-neutral-100); + --color-model-name-copy-icon-background-hover: var(--color-neutral-200); } diff --git a/web/common/src/components/ModelName/ModelName.tsx b/web/common/src/components/ModelName/ModelName.tsx index 1c902018a1..dd86e5737d 100644 --- a/web/common/src/components/ModelName/ModelName.tsx +++ b/web/common/src/components/ModelName/ModelName.tsx @@ -1,8 +1,8 @@ import { Box, Check, Copy } from 'lucide-react' import { useMemo } from 'react' -import { cn, truncate } from '@/utils' -import { Tooltip } from '@/components/Tooltip/Tooltip' +import { cn, truncate } from '@sqlmesh-common/utils' +import { Tooltip } from '@sqlmesh-common/components/Tooltip/Tooltip' import React from 'react' import './ModelName.css' @@ -144,7 +144,13 @@ export const ModelName = React.forwardRef( : 'text-model-name-model', )} > - {truncate(model, truncateMaxCharsModel, 15)} + {truncate( + model, + truncateMaxCharsModel, + truncateLimitBefore * 2, + '...', + truncateLimitBefore * 2, + )} ) @@ -192,8 +198,9 @@ export const ModelName = React.forwardRef( {showCopy && ( {copied => copied ? ( diff --git a/web/common/src/components/ScrollContainer/ScrollContainer.css b/web/common/src/components/ScrollContainer/ScrollContainer.css index 0ab7dd033f..d5c01a1c9f 100644 --- a/web/common/src/components/ScrollContainer/ScrollContainer.css +++ b/web/common/src/components/ScrollContainer/ScrollContainer.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --scrollbar-thumb: var(--color-neutral-300); --scrollbar-track: var(--color-transparent); } diff --git a/web/common/src/components/ScrollContainer/ScrollContainer.tsx b/web/common/src/components/ScrollContainer/ScrollContainer.tsx index 19ce969e2f..942b42d06b 100644 --- a/web/common/src/components/ScrollContainer/ScrollContainer.tsx +++ b/web/common/src/components/ScrollContainer/ScrollContainer.tsx @@ -1,7 +1,7 @@ import React from 'react' -import { cn } from '@/utils' -import type { LayoutDirection } from '@/types' +import { cn } from '@sqlmesh-common/utils' +import type { LayoutDirection } from '@sqlmesh-common/types' import './ScrollContainer.css' diff --git a/web/common/src/components/Tooltip/Tooltip.css b/web/common/src/components/Tooltip/Tooltip.css index ba080f6974..9a0da07b41 100644 --- a/web/common/src/components/Tooltip/Tooltip.css +++ b/web/common/src/components/Tooltip/Tooltip.css @@ -1,4 +1,4 @@ -:root { +:where(:root) { --color-tooltip-background: var(--color-dark); --color-tooltip-foreground: var(--color-light); } diff --git a/web/common/src/components/Tooltip/Tooltip.stories.tsx b/web/common/src/components/Tooltip/Tooltip.stories.tsx index 37f76f7e27..3a876f1da2 100644 --- a/web/common/src/components/Tooltip/Tooltip.stories.tsx +++ b/web/common/src/components/Tooltip/Tooltip.stories.tsx @@ -1,6 +1,6 @@ import type { Meta, StoryObj } from '@storybook/react-vite' -import { Tooltip } from '@/components/Tooltip/Tooltip' -import { Button } from '@/components/Button/Button' +import { Tooltip } from '@sqlmesh-common/components/Tooltip/Tooltip' +import { Button } from '@sqlmesh-common/components/Button/Button' const meta: Meta = { title: 'Components/Tooltip', diff --git a/web/common/src/components/Tooltip/Tooltip.tsx b/web/common/src/components/Tooltip/Tooltip.tsx index c28d960bc9..1417f9fe17 100644 --- a/web/common/src/components/Tooltip/Tooltip.tsx +++ b/web/common/src/components/Tooltip/Tooltip.tsx @@ -7,7 +7,7 @@ import { } from '@radix-ui/react-tooltip' import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import './Tooltip.css' diff --git a/web/common/src/components/Typography/Description.tsx b/web/common/src/components/Typography/Description.tsx index 512a216fac..3cae951c30 100644 --- a/web/common/src/components/Typography/Description.tsx +++ b/web/common/src/components/Typography/Description.tsx @@ -1,4 +1,4 @@ -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import React from 'react' export interface DescriptionProps { diff --git a/web/common/src/components/Typography/Headline.tsx b/web/common/src/components/Typography/Headline.tsx index 1978751a44..6bead184fa 100644 --- a/web/common/src/components/Typography/Headline.tsx +++ b/web/common/src/components/Typography/Headline.tsx @@ -1,7 +1,7 @@ import React from 'react' import { getHeadlineTextSize } from './help' -import type { HeadlineLevel } from '@/types' -import { cn } from '@/utils' +import type { HeadlineLevel } from '@sqlmesh-common/types' +import { cn } from '@sqlmesh-common/utils' export interface HeadlineProps { level: HeadlineLevel diff --git a/web/common/src/components/Typography/Information.tsx b/web/common/src/components/Typography/Information.tsx index d0da7622d2..ce7301673c 100644 --- a/web/common/src/components/Typography/Information.tsx +++ b/web/common/src/components/Typography/Information.tsx @@ -1,9 +1,9 @@ import { Info } from 'lucide-react' import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { getTextSize } from './help' -import type { Size } from '@/types' +import type { Size } from '@sqlmesh-common/types' import { Tooltip } from '../Tooltip/Tooltip' export interface InformationProps { @@ -47,7 +47,7 @@ export function Information({ sideOffset={sideOffset} side={side} className={cn( - 'z-50 select-none max-w-md whitespace-wrap rounded-md bg-dark text-light px-4 py-2 shadow-[hsl(206_22%_7%_/_35%)_0px_10px_38px_-10px,_hsl(206_22%_7%_/_20%)_0px_10px_20px_-15px] will-change-[transform,opacity] data-[state=delayed-open]:data-[side=bottom]:animate-slideUpAndFade data-[state=delayed-open]:data-[side=left]:animate-slideRightAndFade data-[state=delayed-open]:data-[side=right]:animate-slideLeftAndFade data-[state=delayed-open]:data-[side=top]:animate-slideDownAndFade', + 'z-50 select-none whitespace-wrap rounded-md', getTextSize(size), classNameTooltip, )} diff --git a/web/common/src/components/Typography/Tagline.tsx b/web/common/src/components/Typography/Tagline.tsx index 87a7ed29f6..46a69ee7ac 100644 --- a/web/common/src/components/Typography/Tagline.tsx +++ b/web/common/src/components/Typography/Tagline.tsx @@ -1,4 +1,4 @@ -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export interface TaglineProps { className?: string diff --git a/web/common/src/components/Typography/Text.tsx b/web/common/src/components/Typography/Text.tsx index de9233e296..a06b0f6d24 100644 --- a/web/common/src/components/Typography/Text.tsx +++ b/web/common/src/components/Typography/Text.tsx @@ -1,4 +1,4 @@ -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' export interface TextProps { className?: string diff --git a/web/common/src/components/Typography/help.ts b/web/common/src/components/Typography/help.ts index 429c6f3852..1d18db2f06 100644 --- a/web/common/src/components/Typography/help.ts +++ b/web/common/src/components/Typography/help.ts @@ -1,4 +1,4 @@ -import type { HeadlineLevel, Size } from '@/types' +import type { HeadlineLevel, Size } from '@sqlmesh-common/types' export function getHeadlineTextSize(level: HeadlineLevel) { return { diff --git a/web/common/src/components/VerticalContainer/VerticalContainer.tsx b/web/common/src/components/VerticalContainer/VerticalContainer.tsx index e592265dca..2532bad847 100644 --- a/web/common/src/components/VerticalContainer/VerticalContainer.tsx +++ b/web/common/src/components/VerticalContainer/VerticalContainer.tsx @@ -1,6 +1,6 @@ import React from 'react' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { ScrollContainer } from '../ScrollContainer/ScrollContainer' export interface VerticalContainerProps diff --git a/web/common/src/components/VirtualList/FilterableList.css b/web/common/src/components/VirtualList/FilterableList.css new file mode 100644 index 0000000000..7cc60e51f9 --- /dev/null +++ b/web/common/src/components/VirtualList/FilterableList.css @@ -0,0 +1,16 @@ +:where(:root) { + --color-filterable-list-counter-background: var(--color-badge-background); + --color-filterable-list-counter-foreground: var(--color-badge-foreground); + + --color-filterable-list-input-background: var(--color-input-background); + --color-filterable-list-input-foreground: var(--color-input-foreground); + --color-filterable-list-input-placeholder: var(--color-input-placeholder); + --color-filterable-list-input-border: var(--color-input-border); +} + +.FilterableList__Input { + --color-input-background: var(--color-filterable-list-input-background); + --color-input-foreground: var(--color-filterable-list-input-foreground); + --color-input-placeholder: var(--color-filterable-list-input-placeholder); + --color-input-border: var(--color-filterable-list-input-border); +} diff --git a/web/common/src/components/VirtualList/FilterableList.tsx b/web/common/src/components/VirtualList/FilterableList.tsx index ba6c5950b5..169fb916d9 100644 --- a/web/common/src/components/VirtualList/FilterableList.tsx +++ b/web/common/src/components/VirtualList/FilterableList.tsx @@ -4,10 +4,12 @@ import React from 'react' import { VerticalContainer } from '../VerticalContainer/VerticalContainer' import { HorizontalContainer } from '../HorizontalContainer/HorizontalContainer' import { Badge } from '../Badge/Badge' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { MessageContainer } from '../MessageContainer/MessageContainer' import { Input } from '../Input/Input' +import './FilterableList.css' + export interface FilterableListProps { items: TItem[] filterOptions?: IFuseOptions @@ -55,7 +57,10 @@ export function FilterableList({ setSearch(e.target.value) } inputSize="xs" - className="w-full" + className="FilterableList__Input w-full" + onClick={(e: React.MouseEvent) => { + e.stopPropagation() + }} /> {itemsLength !== filteredItemsLength && ( <> diff --git a/web/common/src/components/VirtualList/VirtualList.tsx b/web/common/src/components/VirtualList/VirtualList.tsx index 94e5d93c05..32c6fa47c8 100644 --- a/web/common/src/components/VirtualList/VirtualList.tsx +++ b/web/common/src/components/VirtualList/VirtualList.tsx @@ -1,7 +1,11 @@ -import { useVirtualizer } from '@tanstack/react-virtual' +import { + useVirtualizer, + Virtualizer, + type VirtualItem, +} from '@tanstack/react-virtual' import React from 'react' import { HorizontalContainer } from '../HorizontalContainer/HorizontalContainer' -import { cn } from '@/utils' +import { cn } from '@sqlmesh-common/utils' import { Button } from '../Button/Button' import { ScrollContainer } from '../ScrollContainer/ScrollContainer' import { VerticalContainer } from '../VerticalContainer/VerticalContainer' @@ -9,7 +13,11 @@ import { VerticalContainer } from '../VerticalContainer/VerticalContainer' export interface VirtualListProps { items: TItem[] estimatedListItemHeight: number - renderListItem: (item: TItem) => React.ReactNode + renderListItem: ( + item: TItem, + virtualItem?: VirtualItem, + virtualizer?: Virtualizer, + ) => React.ReactNode isSelected?: (item: TItem) => boolean className?: string } @@ -98,7 +106,7 @@ export function VirtualList({ )}
= { [__brand]: B } + +/** + * Branded is a type that adds a brand to a type. It is a type that is used to + * ensure that the type is unique and that it is not possible to mix up types + * with the same brand. + * + * @example + * + * type UserId = Branded + * type UserName = Branded + * + * const userId = '123' as UserId + * const userName = 'John Doe' as UserName + * + * userId == userName -> compile error + */ export type Branded = T & Brand +/** + * Constraint that only accepts branded string types + */ +export type BrandedString = string & Brand + +/** + * BrandedRecord is a type that creates a branded Record type with strict key checking. + * This ensures that Record is NOT assignable to Record + * + * @example + * type ModelFQN = Branded + * type ModelName = Branded + * + * type FQNMap = BrandedRecord + * type NameMap = BrandedRecord + * + * const fqnMap: FQNMap = {} + * const nameMap: NameMap = fqnMap // TypeScript error! + */ +export type BrandedRecord = Record & { + readonly __recordKeyBrand: K +} + export type Callback = (data?: T) => void export type Size = '2xs' | 'xs' | 's' | 'm' | 'l' | 'xl' | '2xl' diff --git a/web/common/tailwind.base.config.js b/web/common/tailwind.base.config.js index cbba9768c2..8f385b53dc 100644 --- a/web/common/tailwind.base.config.js +++ b/web/common/tailwind.base.config.js @@ -1,5 +1,9 @@ -/** @type {import('tailwindcss').Config} */ -module.exports = { +import lineageConfig from './tailwind.lineage.config' +import typography from '@tailwindcss/typography' +import scrollbar from 'tailwind-scrollbar' + +export default { + presets: [lineageConfig], theme: { colors: {}, extend: { @@ -43,7 +47,7 @@ module.exports = { info: 'var(--color-typography-info)', }, message: { - lucid: 'var(--color-message-lucid)', + translucid: 'var(--color-message-translucid)', }, link: { underline: 'var(--color-link-underline)', @@ -67,13 +71,29 @@ module.exports = { model: 'var(--color-model-name-model)', 'copy-icon': 'var(--color-model-name-copy-icon)', 'copy-icon-hover': 'var(--color-model-name-copy-icon-hover)', + 'copy-icon-background': + 'var(--color-model-name-copy-icon-background)', + 'copy-icon-background-hover': + 'var(--color-model-name-copy-icon-background-hover)', }, badge: { background: 'var(--color-badge-background)', foreground: 'var(--color-badge-foreground)', }, + 'filterable-list': { + counter: { + background: 'var(--color-filterable-list-counter-background)', + foreground: 'var(--color-filterable-list-counter-foreground)', + }, + input: { + background: 'var(--color-filterable-list-input-background)', + foreground: 'var(--color-filterable-list-input-foreground)', + placeholder: 'var(--color-filterable-list-input-placeholder)', + border: 'var(--color-filterable-list-input-border)', + }, + }, input: { - 'background-lucid': 'var(--color-input-background-lucid)', + 'background-translucid': 'var(--color-input-background-translucid)', background: 'var(--color-input-background)', foreground: 'var(--color-input-foreground)', placeholder: 'var(--color-input-placeholder)', @@ -121,6 +141,10 @@ module.exports = { background: 'var(--color-tooltip-background)', foreground: 'var(--color-tooltip-foreground)', }, + metadata: { + label: 'var(--color-metadata-label)', + value: 'var(--color-metadata-value)', + }, }, borderRadius: { '2xs': 'var(--radius-xs)', @@ -148,8 +172,8 @@ module.exports = { }, }, plugins: [ - require('@tailwindcss/typography'), - require('tailwind-scrollbar')({ + typography, + scrollbar({ nocompatible: true, preferredStrategy: 'pseudoelements', }), diff --git a/web/common/tailwind.config.js b/web/common/tailwind.config.js index 67fe2ac528..4e7eee7f2f 100644 --- a/web/common/tailwind.config.js +++ b/web/common/tailwind.config.js @@ -1,5 +1,6 @@ -/** @type {import('tailwindcss').Config} */ -module.exports = { +import baseConfig from './tailwind.base.config' + +export default { + presets: [baseConfig], content: ['./src/**/*.{js,ts,jsx,tsx}', './src/**/*.stories.{js,ts,jsx,tsx}'], - presets: [require('./tailwind.base.config')], } diff --git a/web/common/tailwind.lineage.config.js b/web/common/tailwind.lineage.config.js new file mode 100644 index 0000000000..b615ea756f --- /dev/null +++ b/web/common/tailwind.lineage.config.js @@ -0,0 +1,102 @@ +export default { + theme: { + colors: {}, + extend: { + colors: { + lineage: { + background: 'var(--color-lineage-background)', + divider: 'var(--color-lineage-divider)', + border: 'var(--color-lineage-border)', + control: { + border: 'var(--color-lineage-control-border)', + background: { + DEFAULT: 'var(--color-lineage-control-background)', + hover: 'var(--color-lineage-control-background-hover)', + }, + icon: { + background: 'var(--color-lineage-control-icon-background)', + foreground: 'var(--color-lineage-control-icon-foreground)', + }, + button: { + tooltip: { + border: 'var(--color-lineage-control-button-tooltip-border)', + background: + 'var(--color-lineage-control-button-tooltip-background)', + foreground: + 'var(--color-lineage-control-button-tooltip-foreground)', + }, + }, + }, + grid: { + dot: 'var(--color-lineage-grid-dot)', + }, + edge: { + DEFAULT: 'var(--color-lineage-edge)', + }, + node: { + background: 'var(--color-lineage-node-background)', + foreground: 'var(--color-lineage-node-foreground)', + selected: { + border: 'var(--color-lineage-node-selected-border)', + }, + border: { + DEFAULT: 'var(--color-lineage-node-border)', + hover: 'var(--color-lineage-node-border-hover)', + }, + badge: { + background: 'var(--color-lineage-node-badge-background)', + foreground: 'var(--color-lineage-node-badge-foreground)', + }, + appendix: { + background: 'var(--color-lineage-node-appendix-background)', + }, + handle: { + icon: { + background: + 'var(--color-lineage-node-type-handle-icon-background)', + }, + }, + port: { + background: 'var(--color-lineage-node-port-background)', + handle: { + target: 'var(--color-lineage-node-port-handle-target)', + source: 'var(--color-lineage-node-port-handle-source)', + }, + edge: { + source: 'var(--color-lineage-node-port-edge-source)', + target: 'var(--color-lineage-node-port-edge-target)', + }, + }, + }, + model: { + column: { + active: { + background: + 'var(--color-lineage-model-column-active-background)', + foreground: + 'var(--color-lineage-model-column-active-foreground)', + }, + source: { + background: + 'var(--color-lineage-model-column-source-background)', + }, + expression: { + background: + 'var(--color-lineage-model-column-expression-background)', + }, + error: { + background: + 'var(--color-lineage-model-column-error-background)', + icon: 'var(--color-lineage-model-column-error-icon)', + }, + icon: { + DEFAULT: 'var(--color-lineage-model-column-icon)', + active: 'var(--color-lineage-model-column-icon-active)', + }, + }, + }, + }, + }, + }, + }, +} diff --git a/web/common/tsconfig.base.json b/web/common/tsconfig.base.json index 99a214fe47..68b9b27a73 100644 --- a/web/common/tsconfig.base.json +++ b/web/common/tsconfig.base.json @@ -3,7 +3,7 @@ "target": "ES2022", "jsx": "react-jsx", "module": "ESNext", - "lib": ["ES2022", "DOM", "DOM.Iterable"], + "lib": ["ESNext", "DOM", "DOM.Iterable"], "types": ["vite/client"], /* Bundler mode */ @@ -28,7 +28,7 @@ /* Paths */ "baseUrl": ".", "paths": { - "@/*": ["./src/*"] + "@sqlmesh-common/*": ["./src/*"] } } } diff --git a/web/common/tsconfig.build.json b/web/common/tsconfig.build.json index 7eba394efd..527242427c 100644 --- a/web/common/tsconfig.build.json +++ b/web/common/tsconfig.build.json @@ -15,6 +15,7 @@ "declarationMap": true, "declarationDir": "./dist", "emitDeclarationOnly": false, - "outDir": "./dist" + "outDir": "./dist", + "rootDir": "./src" } } diff --git a/web/common/vite.config.js b/web/common/vite.config.js index 237bed29bd..e9e2641fda 100644 --- a/web/common/vite.config.js +++ b/web/common/vite.config.js @@ -22,20 +22,34 @@ export default defineConfig({ src: 'tailwind.base.config.js', dest: 'configs', }, + { + src: 'tailwind.lineage.config.js', + dest: 'configs', + }, ], }), ], resolve: { alias: { - '@': path.resolve(__dirname, './src'), + '@sqlmesh-common': path.resolve(__dirname, './src'), }, }, build: { cssMinify: true, lib: { - entry: path.resolve(__dirname, 'src/index.ts'), + entry: { + 'sqlmesh-common': path.resolve(__dirname, 'src/index.ts'), + 'lineage/index': path.resolve( + __dirname, + 'src/components/Lineage/index.ts', + ), + }, name: 'sqlmesh-common', - fileName: format => `sqlmesh-common.${format}.js`, + fileName: (format, entryName) => + ({ + 'sqlmesh-common': `sqlmesh-common.${format}.js`, + 'lineage/index': `lineage/index.${format}.js`, + })[entryName], }, rollupOptions: { external: [ @@ -47,6 +61,7 @@ export default defineConfig({ '@radix-ui/react-slot', 'tailwindcss', '@tailwindcss/typography', + '@xyflow/react', ], output: { globals: { @@ -56,6 +71,7 @@ export default defineConfig({ 'tailwind-merge': 'tailwindMerge', 'class-variance-authority': 'classVarianceAuthority', '@radix-ui/react-slot': 'radixSlot', + '@xyflow/react': 'xyflowReact', }, assetFileNames: assetInfo => { if (assetInfo.name && assetInfo.name.endsWith('.css')) { diff --git a/web/server/api/endpoints/table_diff.py b/web/server/api/endpoints/table_diff.py index d441d49e5a..b0167ed032 100644 --- a/web/server/api/endpoints/table_diff.py +++ b/web/server/api/endpoints/table_diff.py @@ -126,7 +126,7 @@ def get_table_diff( table_diffs = context.table_diff( source=source, target=target, - on=exp.condition(on) if on else None, + on=t.cast(exp.Condition, exp.condition(on)) if on else None, select_models={model_or_snapshot} if model_or_snapshot else None, where=where, limit=limit,