From 2c91c504fc42b445760d4895c239768f17ca3547 Mon Sep 17 00:00:00 2001
From: Stephen Shen <zs435@cornell.edu>
Date: Mon, 13 Oct 2025 23:00:44 -0400
Subject: [PATCH 1/4] Add workflows

---
 .../monitor-upstream-and-analyze.yml          | 411 ++++++++++++++++++
 .github/workflows/run-analysis.yml            |  28 ++
 .github/workflows/run-filter.yml              |  33 ++
 3 files changed, 472 insertions(+)
 create mode 100644 .github/workflows/monitor-upstream-and-analyze.yml
 create mode 100644 .github/workflows/run-analysis.yml
 create mode 100644 .github/workflows/run-filter.yml

diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml
new file mode 100644
index 0000000..652e9bf
--- /dev/null
+++ b/.github/workflows/monitor-upstream-and-analyze.yml
@@ -0,0 +1,411 @@
+name: Monitor Upstream and Run Analysis
+
+on:
+  schedule:
+    - cron: "*/15 * * * *"
+  workflow_dispatch:
+    inputs:
+      start_commit:
+        description: "Start commit SHA"
+        required: false
+        type: string
+
+permissions:
+  actions: write
+  contents: write
+  issues: write
+
+jobs:
+  monitor-upstream:
+    runs-on: ubuntu-latest
+    env:
+      # NEED TO BE CONFIGURED EACH PROJECT
+      UPSTREAM_REPO: "CausalInferenceLab/Lang2SQL"
+      BRANCH: "master"
+      RUNNER_DISPATCH_TIMEOUT: 3600 # 1 hour
+      FILTER_DISPATCH_TIMEOUT: 600 # 10 minutes
+      MAX_CONCURRENT: 7
+
+    steps:
+      - name: Checkout the forked repo
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.ORG_WIDE_TOKEN }}
+          path: forked-repo
+          fetch-depth: 0
+
+      - name: Sync fork with upstream
+        run: |
+          set -euo pipefail
+          cd forked-repo
+          echo "Syncing fork with upstream..."
+      
+          # Configure Git user name and email properly
+          git config --global --add safe.directory "$PWD"
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+        
+          # Ensure refs are current
+          git fetch --prune origin
+          git remote add upstream "https://github.com/${UPSTREAM_REPO}.git" || true
+          git fetch --prune upstream --tags
+      
+          # Check out the branch if it is not already checked out
+          git checkout "${BRANCH}"
+      
+          # Rebasing the branch onto the upstream branch
+          echo "Rebasing ${BRANCH} onto upstream/${BRANCH}..."
+          if ! git rebase -X theirs --rebase-merges "upstream/${BRANCH}"; then
+            echo "Rebase failed; aborting."
+            git rebase --abort || true
+            exit 1
+          fi
+      
+          # Push only if diverged; use --force-with-lease for safety (we pushed the rebased branch to origin)
+          # This is to avoid conflicts when rebasing
+          if ! git diff --quiet "origin/${BRANCH}..HEAD"; then
+            git push --force-with-lease origin "${BRANCH}"
+            echo "Pushed rebased ${BRANCH} to origin."
+          else
+            echo "No changes to push after rebase."
+          fi
+
+          # Change back to the root directory
+          cd ..
+
+      - name: Clean up the workspace
+        run: |
+          rm -rf "$GITHUB_WORKSPACE/forked-repo"
+
+      - name: Prepare and restore cache folder
+        id: cache-folder
+        uses: actions/cache/restore@v4
+        with:
+          path: .continuous-analysis-cache
+          key: continuous-analysis-cache-${{ github.repository }}-
+
+      - name: Create cache folder if not exists
+        run: |
+          mkdir -p .continuous-analysis-cache
+
+      - name: Load last seen SHA from cache folder
+        id: last-sha
+        run: |
+          # If start_commit is provided, set it as the last seen SHA
+          if [[ -n "${{ inputs.start_commit }}" ]]; then
+            echo "last_sha=${{ inputs.start_commit }}" >> $GITHUB_OUTPUT
+            echo "Start commit provided: ${{ inputs.start_commit }}"
+            exit 0
+          fi
+
+          # Declare the file path to the last seen SHA
+          FILE=".continuous-analysis-cache/last_sha.txt"
+
+          # Check if the file exists and load the last seen SHA
+          if [[ -f "$FILE" ]]; then
+            LAST_SHA=$(cat "$FILE")
+            echo "Last seen SHA found in cache: $LAST_SHA"
+          else
+            LAST_SHA=""
+            echo "No last seen SHA found in cache"
+          fi
+
+          # Output the last seen SHA to the GitHub Actions output for further use
+          echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT
+
+      - name: Get upstream commits and find new ones
+        id: check-commits
+        run: |
+          # Print the upstream repo and branch to the console
+          echo "Finding the latest 100 commits from the upstream repo: $UPSTREAM_REPO@$BRANCH"
+
+          # Get the latest 100 commits from the upstream repo and save them to a JSON file
+          curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
+              "https://api.github.com/repos/${UPSTREAM_REPO}/commits?sha=${BRANCH}&per_page=100" \
+              > commits.json
+
+          # Parse the first commit SHA from the JSON file for sanity check
+          if ! jq -e '.[0].sha' commits.json > /dev/null; then
+            echo "Failed to parse SHA from commits.json"
+            exit 1
+          fi
+
+          # Parse all commit SHAs (from newest to oldest) from the JSON file and save to all_commits.txt
+          jq -r '.[].sha' commits.json > all_commits.txt
+
+          # Get the last seen SHA from the previous step
+          LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}"
+          echo "Last seen SHA: $LAST_SEEN"
+
+          # If the last seen SHA is empty, select the latest commit SHA
+          if [[ -z "$LAST_SEEN" ]]; then
+            head -n 1 all_commits.txt > new_commits.txt
+            echo "has_new_commits=true" >> $GITHUB_OUTPUT
+            echo "First-time run — selecting the latest commit: $(head -n 1 all_commits.txt)"
+          else
+            # If the last seen SHA is not empty, filter out previously seen commits
+            # Print all new commit SHAs (above the last seen SHA) to new_commits.txt
+            awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > new_commits.txt
+
+            if [ ! -s new_commits.txt ]; then
+              echo "No new commits to process."
+              echo "has_new_commits=false" >> $GITHUB_OUTPUT
+            else
+              echo "New commits to process:"
+              echo "has_new_commits=true" >> $GITHUB_OUTPUT
+              cat new_commits.txt
+            fi
+          fi
+
+      - name: Generate dispatch ID
+        id: dispatch-id
+        run: |
+          # If start_commit is provided, generate a random dispatch ID for history analysis
+          if [[ -n "${{ inputs.start_commit }}" ]]; then
+            dispatch_id="$(date -u +%Y%m%dT%H%M%SZ)-$RANDOM"
+            echo "dispatch_id=$dispatch_id" >> $GITHUB_OUTPUT
+            echo "Generated dispatch ID: $dispatch_id"
+          else
+            echo "dispatch_id=" >> $GITHUB_OUTPUT
+            echo "No dispatch ID needed for regular runs"
+          fi
+
+      - name: Trigger analysis workflows for new commits in parallel
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Read the commits from file
+          mapfile -t commits < new_commits.txt
+
+          # If no new commits to process, exit
+          if [ ${#commits[@]} -eq 0 ]; then
+            echo "No new commits to process"
+            exit 0
+          fi
+
+          # Use the shared dispatch ID
+          dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}"
+
+          # Extract repository name for future usages
+          repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2)
+
+          # Configuration
+          MAX_CONCURRENT=${{ env.MAX_CONCURRENT }}
+          total_commits=${#commits[@]}
+          echo "Processing ${total_commits} commits in batches of ${MAX_CONCURRENT}..."
+
+          # Process commits in batches
+          for ((batch_start=0; batch_start<total_commits; batch_start+=MAX_CONCURRENT)); do
+            batch_end=$((batch_start + MAX_CONCURRENT))
+            if [ $batch_end -gt $total_commits ]; then
+              batch_end=$total_commits
+            fi
+            
+            batch_size=$((batch_end - batch_start))
+            batch_num=$((batch_start / MAX_CONCURRENT + 1))
+            echo "Processing batch ${batch_num}: commits ${batch_start}-$((batch_end-1)) (${batch_size} workflows)"
+
+            # Create arrays to track dispatched workflows and their artifacts for this batch
+            declare -a dispatched_commits=()
+            declare -a artifact_names=()
+
+            # Dispatch workflows for this batch
+            for ((i=batch_start; i<batch_end; i++)); do
+              commit="${commits[$i]}"
+              echo "Dispatching workflow for commit: $commit"
+
+              # Generate a target artifact name to look to detect if the workflow has completed
+              if [[ -n "${{ inputs.start_commit }}" ]]; then
+                artifact_name="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${commit}"
+              else
+                artifact_name="continuous-analysis-results-${repo_name}-${commit}"
+              fi
+
+              # Trigger the analysis workflow
+              gh workflow run run-analysis.yml \
+                --repo "${GITHUB_REPOSITORY}" \
+                --ref "${{ env.BRANCH }}" \
+                --field commit="$commit" \
+                --field dispatch_id="$dispatch_id"
+
+              # Track this commit and its expected artifact
+              dispatched_commits+=("$commit")
+              artifact_names+=("$artifact_name")
+            done
+
+            # Wait for all workflows in this batch to complete
+            echo "Waiting for batch ${batch_num} (${#dispatched_commits[@]} workflows) to complete..."
+            set -e  # Exit on any error
+            
+            # Set the end time for the timeout
+            end_time=$(( $(date +%s) + RUNNER_DISPATCH_TIMEOUT ))
+            
+            # Track completed workflows
+            declare -a completed_commits=()
+            
+            # Continue checking until all workflows in this batch complete or timeout
+            while [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ] && [ "$(date +%s)" -lt "$end_time" ]; do
+              # Get current artifacts
+              current_artifacts=$(curl -s -H "Authorization: token $GH_TOKEN" \
+                "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \
+                | jq -r '.artifacts[].name' 2>/dev/null || echo "")
+              
+              # Check each dispatched workflow in this batch
+              for i in "${!dispatched_commits[@]}"; do
+                commit="${dispatched_commits[$i]}"
+                artifact_name="${artifact_names[$i]}"
+                
+                # Skip if already completed
+                if [[ " ${completed_commits[@]} " =~ " ${commit} " ]]; then
+                  continue
+                fi
+                
+                # Check if artifact exists
+                if echo "$current_artifacts" | grep -q "^${artifact_name}"; then
+                  echo "Artifact ${artifact_name} found for commit ${commit}."
+                  completed_commits+=("$commit")
+                fi
+              done
+              
+              # Report progress for this batch
+              completed_count=${#completed_commits[@]}
+              total_count=${#dispatched_commits[@]}
+              echo "Batch ${batch_num} progress: ${completed_count}/${total_count} workflows completed"
+              
+              # If not all complete, wait before next check
+              if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then
+                echo "Waiting 60 seconds before next check..."
+                sleep 60
+              fi
+            done
+
+            # Check if all workflows in this batch completed successfully
+            if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then
+              echo "ERROR: Timed out waiting for batch ${batch_num} workflows to complete" >&2
+              echo "Completed: ${#completed_commits[@]}/${#dispatched_commits[@]}" >&2
+              echo "Missing artifacts:" >&2
+              for i in "${!dispatched_commits[@]}"; do
+                commit="${dispatched_commits[$i]}"
+                if [[ ! " ${completed_commits[@]} " =~ " ${commit} " ]]; then
+                  echo "  - ${artifact_names[$i]} (commit: ${commit})" >&2
+                fi
+              done
+              exit 1
+            fi
+            
+            echo "Batch ${batch_num} completed successfully! (${#dispatched_commits[@]} workflows)"
+            
+            # Clear arrays for next batch
+            unset dispatched_commits
+            unset artifact_names
+            unset completed_commits
+          done
+          
+          echo "All ${total_commits} workflows completed successfully across all batches!"
+
+      - name: Trigger violation filter workflows for new commits in sequence
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Read the commits from file
+          mapfile -t commits < new_commits.txt
+
+          # If no new commits to process, exit
+          if [ ${#commits[@]} -eq 0 ]; then
+            echo "No new commits to process"
+            exit 0
+          fi
+
+          # Use the same dispatch_id as the analysis workflows
+          dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}"
+
+          # Extract repository name for future usages
+          repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2)
+
+          # Process commits in sequence for violation filter
+          echo "Processing ${#commits[@]} commits for violation filter in sequence..."
+          
+          # Reverse the commits array to process from oldest to newest
+          reversed_commits=()
+          for ((i=${#commits[@]}-1; i>=0; i--)); do
+            reversed_commits+=("${commits[$i]}")
+          done
+
+          # Process each commit in sequence
+          for i in "${!reversed_commits[@]}"; do
+            current_commit="${reversed_commits[$i]}"
+            
+            # For the first commit (oldest), previous_commit is empty
+            # For subsequent commits, previous_commit is the previous one in the sequence
+            if [ $i -eq 0 ]; then
+              previous_commit=""
+              echo "Processing first commit: $current_commit (no previous commit)"
+            else
+              previous_commit="${reversed_commits[$i-1]}"
+              echo "Processing commit: $current_commit (previous: $previous_commit)"
+            fi
+
+            # Generate artifact name to detect completion
+            if [[ -n "${{ inputs.start_commit }}" ]]; then
+              artifact_name="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${current_commit}-filtered"
+            else
+              artifact_name="continuous-analysis-results-${repo_name}-${current_commit}-filtered"
+            fi
+
+            # Trigger the violation filter workflow
+            gh workflow run run-filter.yml \
+              --repo "${GITHUB_REPOSITORY}" \
+              --ref "${{ env.BRANCH }}" \
+              --field current_commit="$current_commit" \
+              --field previous_commit="$previous_commit" \
+              --field dispatch_id="$dispatch_id"
+
+            # Wait for this workflow to complete
+            echo "Waiting for violation filter workflow to complete for commit: $current_commit"
+            artifact_created=false
+            
+            # Set the end time for the timeout
+            end_time=$(( $(date +%s) + FILTER_DISPATCH_TIMEOUT ))
+            
+            # Wait for the artifact to be created
+            while [ "$artifact_created" = false ] && [ "$(date +%s)" -lt "$end_time" ]; do
+              if curl -s -H "Authorization: token $GH_TOKEN" \
+                "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \
+                | jq -r '.artifacts[].name' 2>/dev/null | grep -q "^${artifact_name}"; then
+                artifact_created=true
+                echo "Violation filter artifact ${artifact_name} found for commit ${current_commit}."
+                break
+              fi
+              echo "Violation filter artifact not found yet, waiting 30 seconds..."
+              sleep 30
+            done
+
+            # Check if the workflow completed successfully
+            if [ "$artifact_created" = false ]; then
+              echo "ERROR: Timed out waiting for violation filter artifact ${artifact_name} for commit ${current_commit}" >&2
+              exit 1
+            fi
+          done
+          
+          echo "All ${#commits[@]} violation filter workflows completed successfully!"
+
+      - name: Update SHA cache
+        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == ''
+        run: |
+          NEWEST=$(head -n 1 new_commits.txt)
+          echo "$NEWEST" > .continuous-analysis-cache/last_sha.txt
+
+      - name: Generate timestamp
+        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == ''
+        id: timestamp
+        run: |
+          ts=$(date +'%Y%m%d-%H%M')
+          echo "ts=$ts" >> "$GITHUB_OUTPUT"
+          echo "Generated timestamp: $ts"
+
+      - name: Save updated SHA cache with timestamp
+        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == ''
+        uses: actions/cache/save@v4
+        with:
+          path: .continuous-analysis-cache
+          key: continuous-analysis-cache-${{ github.repository }}-${{ steps.timestamp.outputs.ts }}
diff --git a/.github/workflows/run-analysis.yml b/.github/workflows/run-analysis.yml
new file mode 100644
index 0000000..beba369
--- /dev/null
+++ b/.github/workflows/run-analysis.yml
@@ -0,0 +1,28 @@
+name: Trigger Continuous Analysis
+
+on:
+  workflow_dispatch:
+    inputs:
+      commit:
+        description: 'Single commit SHA to test'
+        required: true
+        type: string
+      dispatch_id:
+        description: "Unique id from dispatcher for history runnings"
+        required: false
+        type: string
+
+permissions:
+  actions: read
+  contents: write
+  issues: write
+
+jobs:
+  analyze-single-commit:
+    uses: ContinuousAnalysis/continuous-analysis/.github/workflows/auto-runner.yml@main
+    with:
+      project: ${{ github.repository }}
+      commit: ${{ inputs.commit }}
+      dispatch_id: ${{ inputs.dispatch_id }}
+    secrets:
+      ORG_WIDE_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }}
diff --git a/.github/workflows/run-filter.yml b/.github/workflows/run-filter.yml
new file mode 100644
index 0000000..7519390
--- /dev/null
+++ b/.github/workflows/run-filter.yml
@@ -0,0 +1,33 @@
+name: Trigger Continuous Analysis Violation Filter
+
+on:
+  workflow_dispatch:
+    inputs:
+      current_commit:
+        description: 'Current commit SHA to test'
+        required: true
+        type: string
+      previous_commit:
+        description: 'Previous commit SHA to test'
+        required: false
+        type: string
+      dispatch_id:
+        description: "Unique id from dispatcher for history runnings"
+        required: false
+        type: string
+
+permissions:
+  actions: read
+  contents: write
+  issues: write
+
+jobs:
+  analyze-single-commit:
+    uses: ContinuousAnalysis/continuous-analysis/.github/workflows/auto-filter.yml@main
+    with:
+      project: ${{ github.repository }}
+      current_commit: ${{ inputs.current_commit }}
+      previous_commit: ${{ inputs.previous_commit }}
+      dispatch_id: ${{ inputs.dispatch_id }}
+    secrets:
+      ORG_WIDE_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }}

From f35a25592374ae071c94f55ecf9d7f0214f78472 Mon Sep 17 00:00:00 2001
From: Stephen Shen <zs435@cornell.edu>
Date: Tue, 25 Nov 2025 11:08:31 -0500
Subject: [PATCH 2/4] Update workflows

---
 .github/workflows/create-release.yml          | 110 +++++
 .../monitor-upstream-and-analyze.yml          | 432 ++++++++----------
 .github/workflows/monitor-upstream.yml        | 300 ++++++++++++
 .github/workflows/run-filter.yml              |   5 +
 .github/workflows/set-cache-sha.yml           |  49 ++
 5 files changed, 658 insertions(+), 238 deletions(-)
 create mode 100644 .github/workflows/create-release.yml
 create mode 100644 .github/workflows/monitor-upstream.yml
 create mode 100644 .github/workflows/set-cache-sha.yml

diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
new file mode 100644
index 0000000..40d9270
--- /dev/null
+++ b/.github/workflows/create-release.yml
@@ -0,0 +1,110 @@
+name: Create Release for Analysis Artifacts
+
+on:
+  workflow_dispatch:
+    inputs:
+      prefix:
+        description: "Artifact name prefix"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    env:
+      GH_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }}
+
+    steps:
+      - name: Create timestamp
+        id: ts
+        run: |
+          ts="$(date -u +%Y%m%dT%H%M%SZ)"
+          echo "ts=$ts" >> $GITHUB_OUTPUT
+
+      - name: Create GitHub Release
+        id: create-release
+        run: |
+          tag="analysis-${{ steps.ts.outputs.ts }}"
+          name="Continuous Analysis Release (${{ steps.ts.outputs.ts }})"
+
+          echo "Creating release: $name"
+
+          response=$(curl -s -X POST \
+            -H "Authorization: token $GH_TOKEN" \
+            -H "Content-Type: application/json" \
+            -d "{\"tag_name\": \"$tag\", \"name\": \"$name\", \"draft\": false}" \
+            "https://api.github.com/repos/${GITHUB_REPOSITORY}/releases")
+
+          upload_url=$(echo "$response" | jq -r '.upload_url' | sed 's/{?name,label}//')
+          echo "upload_url=$upload_url" >> $GITHUB_OUTPUT
+
+      - name: Fetch all artifact metadata
+        id: fetch
+        run: |
+          echo "Fetching all artifacts..."
+
+          page=1
+          all="[]"
+          while true; do
+            response=$(curl -s -H "Authorization: token $GH_TOKEN" \
+              "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100&page=$page")
+
+            artifacts=$(echo "$response" | jq -c '.artifacts[]?')
+            if [[ -z "$artifacts" ]]; then break; fi
+
+            while IFS= read -r art; do
+              all=$(echo "$all" | jq --argjson a "$art" '. + [$a]')
+            done <<< "$artifacts"
+
+            count=$(echo "$response" | jq '.artifacts | length')
+            (( count < 100 )) && break
+
+            ((page++))
+          done
+
+          echo "$all" > all_artifacts.json
+          echo "Saved metadata for all artifacts."
+
+      - name: Upload matching artifacts to release
+        run: |
+          prefix="${{ inputs.prefix }}"
+          upload_url="${{ steps.create-release.outputs.upload_url }}"
+
+          echo "Looking for artifacts starting with: $prefix"
+          echo ""
+
+          matches=$(jq -c --arg p "$prefix" '.[] | select(.name | startswith($p))' all_artifacts.json)
+
+          if [[ "$(echo "$matches" | wc -l)" -eq 0 ]]; then
+            echo "❌ No artifacts found starting with: $prefix"
+            exit 1
+          fi
+
+          echo "$matches" | while IFS= read -r art; do
+            name=$(echo "$art" | jq -r '.name')
+            id=$(echo "$art" | jq -r '.id')
+            zip="${name}.zip"
+
+            echo "▶ Downloading artifact: $name (ID $id)"
+
+            curl -L -s \
+              -H "Authorization: token $GH_TOKEN" \
+              -o "$zip" \
+              "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts/${id}/zip"
+
+            echo "⬆ Uploading $zip to release..."
+
+            curl -s -X POST \
+              -H "Authorization: token $GH_TOKEN" \
+              -H "Content-Type: application/zip" \
+              --data-binary @"$zip" \
+              "${upload_url}?name=${zip}"
+
+            echo "✓ Uploaded: $zip"
+            echo ""
+          done
+
+          echo "🎉 Release upload completed successfully!"
diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml
index 652e9bf..fd8a1f5 100644
--- a/.github/workflows/monitor-upstream-and-analyze.yml
+++ b/.github/workflows/monitor-upstream-and-analyze.yml
@@ -2,13 +2,19 @@ name: Monitor Upstream and Run Analysis
 
 on:
   schedule:
-    - cron: "*/15 * * * *"
+    - cron: "0 */6 * * *"
   workflow_dispatch:
     inputs:
-      start_commit:
-        description: "Start commit SHA"
+      number_of_commits:
+        description: "Historical mode: analyze N previous commits (0 = continuous mode)"
         required: false
-        type: string
+        type: number
+        default: 0
+      skip_commits:
+        description: "Skip commit pattern: process every (N+1)th commit (0 = process all)"
+        required: false
+        type: number
+        default: 0
 
 permissions:
   actions: write
@@ -18,16 +24,20 @@ permissions:
 jobs:
   monitor-upstream:
     runs-on: ubuntu-latest
+
     env:
       # NEED TO BE CONFIGURED EACH PROJECT
       UPSTREAM_REPO: "CausalInferenceLab/Lang2SQL"
       BRANCH: "master"
-      RUNNER_DISPATCH_TIMEOUT: 3600 # 1 hour
-      FILTER_DISPATCH_TIMEOUT: 600 # 10 minutes
-      MAX_CONCURRENT: 7
+      RUNNER_DISPATCH_TIMEOUT: 7200 # 2 hours
+      FILTER_DISPATCH_TIMEOUT: 1800 # 30 minutes
+      MAX_CONCURRENT: 8
 
     steps:
-      - name: Checkout the forked repo
+      # --------------------------------------------------------------------
+      # STEP 1 — SYNC FORK WITH UPSTREAM
+      # --------------------------------------------------------------------
+      - name: Checkout fork
         uses: actions/checkout@v4
         with:
           token: ${{ secrets.ORG_WIDE_TOKEN }}
@@ -38,373 +48,319 @@ jobs:
         run: |
           set -euo pipefail
           cd forked-repo
-          echo "Syncing fork with upstream..."
-      
-          # Configure Git user name and email properly
+
+          echo "Syncing ${BRANCH} with upstream ${UPSTREAM_REPO}..."
+
           git config --global --add safe.directory "$PWD"
           git config user.name "github-actions[bot]"
           git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
-        
-          # Ensure refs are current
+
           git fetch --prune origin
           git remote add upstream "https://github.com/${UPSTREAM_REPO}.git" || true
           git fetch --prune upstream --tags
-      
-          # Check out the branch if it is not already checked out
+
           git checkout "${BRANCH}"
-      
-          # Rebasing the branch onto the upstream branch
-          echo "Rebasing ${BRANCH} onto upstream/${BRANCH}..."
+
+          echo "Rebasing local branch onto upstream/${BRANCH}..."
           if ! git rebase -X theirs --rebase-merges "upstream/${BRANCH}"; then
-            echo "Rebase failed; aborting."
+            echo "Rebase failed -- aborting."
             git rebase --abort || true
             exit 1
           fi
-      
-          # Push only if diverged; use --force-with-lease for safety (we pushed the rebased branch to origin)
-          # This is to avoid conflicts when rebasing
+
           if ! git diff --quiet "origin/${BRANCH}..HEAD"; then
             git push --force-with-lease origin "${BRANCH}"
-            echo "Pushed rebased ${BRANCH} to origin."
+            echo "Rebase changes pushed to origin."
           else
-            echo "No changes to push after rebase."
+            echo "No diverged changes; nothing to push."
           fi
 
-          # Change back to the root directory
           cd ..
 
-      - name: Clean up the workspace
-        run: |
-          rm -rf "$GITHUB_WORKSPACE/forked-repo"
+      - name: Cleanup forked repo
+        run: rm -rf forked-repo
 
-      - name: Prepare and restore cache folder
+      # --------------------------------------------------------------------
+      # STEP 2 — LOAD LAST-SEEN SHA (or compute from history)
+      # --------------------------------------------------------------------
+      - name: Restore analysis cache folder
         id: cache-folder
         uses: actions/cache/restore@v4
         with:
           path: .continuous-analysis-cache
           key: continuous-analysis-cache-${{ github.repository }}-
 
-      - name: Create cache folder if not exists
-        run: |
-          mkdir -p .continuous-analysis-cache
+      - name: Ensure cache folder exists
+        run: mkdir -p .continuous-analysis-cache
 
-      - name: Load last seen SHA from cache folder
+      - name: Determine last-seen SHA
         id: last-sha
         run: |
-          # If start_commit is provided, set it as the last seen SHA
-          if [[ -n "${{ inputs.start_commit }}" ]]; then
-            echo "last_sha=${{ inputs.start_commit }}" >> $GITHUB_OUTPUT
-            echo "Start commit provided: ${{ inputs.start_commit }}"
+          NUMBER_OF_COMMITS="${{ inputs.number_of_commits }}"
+
+          # ---------------------------
+          # Historical mode
+          # ---------------------------
+          if [[ "$NUMBER_OF_COMMITS" -gt 0 ]]; then
+            echo "Historical mode: computing boundary SHA for ${NUMBER_OF_COMMITS} commits."
+
+            git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
+            cd upstream-repo
+            git checkout "${BRANCH}"
+            git rev-list --first-parent "${BRANCH}" > ../linear_commits.txt
+            cd ..
+            rm -rf upstream-repo
+
+            TOTAL=$(wc -l < linear_commits.txt)
+            echo "Total commits in first-parent history: $TOTAL"
+
+            if [[ "$NUMBER_OF_COMMITS" -ge "$TOTAL" ]]; then
+              echo "ERROR: number_of_commits $NUMBER_OF_COMMITS exceeds available history ($TOTAL)." >&2
+              exit 1
+            fi
+
+            LAST_SHA=$(sed -n "$((NUMBER_OF_COMMITS + 1))p" linear_commits.txt)
+            echo "Historical boundary last_sha = $LAST_SHA"
+            echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT
             exit 0
           fi
 
-          # Declare the file path to the last seen SHA
-          FILE=".continuous-analysis-cache/last_sha.txt"
+          # ---------------------------
+          # Continuous mode
+          # ---------------------------
+          CACHE_FILE=".continuous-analysis-cache/last_sha.txt"
 
-          # Check if the file exists and load the last seen SHA
-          if [[ -f "$FILE" ]]; then
-            LAST_SHA=$(cat "$FILE")
-            echo "Last seen SHA found in cache: $LAST_SHA"
+          if [[ -f "$CACHE_FILE" ]]; then
+            LAST_SHA=$(cat "$CACHE_FILE")
+            echo "Loaded last-seen SHA from cache: $LAST_SHA"
           else
             LAST_SHA=""
-            echo "No last seen SHA found in cache"
+            echo "No last-seen SHA found; this is the first run."
           fi
 
-          # Output the last seen SHA to the GitHub Actions output for further use
           echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT
 
-      - name: Get upstream commits and find new ones
+      # --------------------------------------------------------------------
+      # STEP 3 — FETCH COMMIT HISTORY & COMPUTE NEW COMMITS
+      # --------------------------------------------------------------------
+      - name: Fetch first-parent commit history
         id: check-commits
         run: |
-          # Print the upstream repo and branch to the console
-          echo "Finding the latest 100 commits from the upstream repo: $UPSTREAM_REPO@$BRANCH"
-
-          # Get the latest 100 commits from the upstream repo and save them to a JSON file
-          curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-              "https://api.github.com/repos/${UPSTREAM_REPO}/commits?sha=${BRANCH}&per_page=100" \
-              > commits.json
+          echo "Fetching upstream first-parent history..."
 
-          # Parse the first commit SHA from the JSON file for sanity check
-          if ! jq -e '.[0].sha' commits.json > /dev/null; then
-            echo "Failed to parse SHA from commits.json"
-            exit 1
-          fi
+          git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
+          cd upstream-repo
+          git checkout "${BRANCH}"
+          git rev-list --first-parent "${BRANCH}" > ../all_commits.txt
+          cd ..
+          rm -rf upstream-repo
 
-          # Parse all commit SHAs (from newest to oldest) from the JSON file and save to all_commits.txt
-          jq -r '.[].sha' commits.json > all_commits.txt
+          echo "Total first-parent commits: $(wc -l < all_commits.txt)"
 
-          # Get the last seen SHA from the previous step
           LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}"
-          echo "Last seen SHA: $LAST_SEEN"
+          SKIP_COMMITS="${{ inputs.skip_commits }}"
+
+          [[ -z "$SKIP_COMMITS" || "$SKIP_COMMITS" -lt 0 ]] && SKIP_COMMITS=0
 
-          # If the last seen SHA is empty, select the latest commit SHA
+          # First-time run
           if [[ -z "$LAST_SEEN" ]]; then
             head -n 1 all_commits.txt > new_commits.txt
+            echo "Initial run: analyzing latest commit only: $(cat new_commits.txt)"
             echo "has_new_commits=true" >> $GITHUB_OUTPUT
-            echo "First-time run — selecting the latest commit: $(head -n 1 all_commits.txt)"
-          else
-            # If the last seen SHA is not empty, filter out previously seen commits
-            # Print all new commit SHAs (above the last seen SHA) to new_commits.txt
-            awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > new_commits.txt
+            exit 0
+          fi
 
-            if [ ! -s new_commits.txt ]; then
-              echo "No new commits to process."
-              echo "has_new_commits=false" >> $GITHUB_OUTPUT
-            else
-              echo "New commits to process:"
-              echo "has_new_commits=true" >> $GITHUB_OUTPUT
-              cat new_commits.txt
-            fi
+          # Extract new commits above LAST_SEEN
+          awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > temp_new_commits.txt
+
+          if [[ ! -s temp_new_commits.txt ]]; then
+            echo "No new commits since last analysis."
+            touch new_commits.txt
+            echo "has_new_commits=false" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          # Apply skip pattern
+          if [[ "$SKIP_COMMITS" -eq 0 ]]; then
+            mv temp_new_commits.txt new_commits.txt
+          else
+            awk "NR % ($SKIP_COMMITS + 1) == 1" temp_new_commits.txt > new_commits.txt
+            rm temp_new_commits.txt
           fi
 
+          echo "has_new_commits=true" >> $GITHUB_OUTPUT
+          echo "New commits to analyze (total: $(wc -l < new_commits.txt)):"
+          cat new_commits.txt
+
+      # --------------------------------------------------------------------
+      # STEP 4 — GENERATE DISPATCH ID (ONLY FOR HISTORICAL MODE)
+      # --------------------------------------------------------------------
       - name: Generate dispatch ID
         id: dispatch-id
         run: |
-          # If start_commit is provided, generate a random dispatch ID for history analysis
-          if [[ -n "${{ inputs.start_commit }}" ]]; then
+          if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then
             dispatch_id="$(date -u +%Y%m%dT%H%M%SZ)-$RANDOM"
+            echo "Historical run dispatch ID: $dispatch_id"
             echo "dispatch_id=$dispatch_id" >> $GITHUB_OUTPUT
-            echo "Generated dispatch ID: $dispatch_id"
           else
             echo "dispatch_id=" >> $GITHUB_OUTPUT
-            echo "No dispatch ID needed for regular runs"
           fi
 
-      - name: Trigger analysis workflows for new commits in parallel
+      # --------------------------------------------------------------------
+      # STEP 5 — TRIGGER ANALYSIS WORKFLOWS (PARALLEL)
+      # --------------------------------------------------------------------
+      - name: Run analysis workflows
+        if: steps.check-commits.outputs.has_new_commits == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          # Read the commits from file
           mapfile -t commits < new_commits.txt
-
-          # If no new commits to process, exit
-          if [ ${#commits[@]} -eq 0 ]; then
-            echo "No new commits to process"
-            exit 0
-          fi
-
-          # Use the shared dispatch ID
           dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}"
-
-          # Extract repository name for future usages
           repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2)
 
-          # Configuration
           MAX_CONCURRENT=${{ env.MAX_CONCURRENT }}
           total_commits=${#commits[@]}
-          echo "Processing ${total_commits} commits in batches of ${MAX_CONCURRENT}..."
 
-          # Process commits in batches
+          echo "Launching analysis for ${total_commits} commits..."
+
           for ((batch_start=0; batch_start<total_commits; batch_start+=MAX_CONCURRENT)); do
             batch_end=$((batch_start + MAX_CONCURRENT))
-            if [ $batch_end -gt $total_commits ]; then
-              batch_end=$total_commits
-            fi
-            
-            batch_size=$((batch_end - batch_start))
-            batch_num=$((batch_start / MAX_CONCURRENT + 1))
-            echo "Processing batch ${batch_num}: commits ${batch_start}-$((batch_end-1)) (${batch_size} workflows)"
+            ((batch_end > total_commits)) && batch_end=$total_commits
+
+            echo "Processing batch $((batch_start/MAX_CONCURRENT + 1))..."
 
-            # Create arrays to track dispatched workflows and their artifacts for this batch
             declare -a dispatched_commits=()
             declare -a artifact_names=()
 
-            # Dispatch workflows for this batch
             for ((i=batch_start; i<batch_end; i++)); do
               commit="${commits[$i]}"
-              echo "Dispatching workflow for commit: $commit"
+              echo "  Dispatching commit $commit"
 
-              # Generate a target artifact name to look to detect if the workflow has completed
-              if [[ -n "${{ inputs.start_commit }}" ]]; then
-                artifact_name="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${commit}"
+              if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then
+                artifact="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${commit}"
               else
-                artifact_name="continuous-analysis-results-${repo_name}-${commit}"
+                artifact="continuous-analysis-results-${repo_name}-${commit}"
               fi
 
-              # Trigger the analysis workflow
               gh workflow run run-analysis.yml \
                 --repo "${GITHUB_REPOSITORY}" \
-                --ref "${{ env.BRANCH }}" \
+                --ref "${BRANCH}" \
                 --field commit="$commit" \
                 --field dispatch_id="$dispatch_id"
 
-              # Track this commit and its expected artifact
               dispatched_commits+=("$commit")
-              artifact_names+=("$artifact_name")
+              artifact_names+=("$artifact")
             done
 
-            # Wait for all workflows in this batch to complete
-            echo "Waiting for batch ${batch_num} (${#dispatched_commits[@]} workflows) to complete..."
-            set -e  # Exit on any error
-            
-            # Set the end time for the timeout
+            echo "Waiting for batch artifacts..."
+
             end_time=$(( $(date +%s) + RUNNER_DISPATCH_TIMEOUT ))
-            
-            # Track completed workflows
-            declare -a completed_commits=()
-            
-            # Continue checking until all workflows in this batch complete or timeout
-            while [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ] && [ "$(date +%s)" -lt "$end_time" ]; do
-              # Get current artifacts
-              current_artifacts=$(curl -s -H "Authorization: token $GH_TOKEN" \
+            declare -a done=()
+
+            while [[ ${#done[@]} -lt ${#dispatched_commits[@]} && $(date +%s) -lt $end_time ]]; do
+              names=$(curl -s -H "Authorization: token $GH_TOKEN" \
                 "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \
-                | jq -r '.artifacts[].name' 2>/dev/null || echo "")
-              
-              # Check each dispatched workflow in this batch
-              for i in "${!dispatched_commits[@]}"; do
-                commit="${dispatched_commits[$i]}"
-                artifact_name="${artifact_names[$i]}"
-                
-                # Skip if already completed
-                if [[ " ${completed_commits[@]} " =~ " ${commit} " ]]; then
-                  continue
-                fi
-                
-                # Check if artifact exists
-                if echo "$current_artifacts" | grep -q "^${artifact_name}"; then
-                  echo "Artifact ${artifact_name} found for commit ${commit}."
-                  completed_commits+=("$commit")
+                | jq -r '.artifacts[].name')
+
+              for idx in "${!dispatched_commits[@]}"; do
+                if [[ " ${done[@]} " =~ " ${dispatched_commits[$idx]} " ]]; then continue; fi
+                if echo "$names" | grep -q "^${artifact_names[$idx]}"; then
+                  echo "  ✓ Artifact found: ${artifact_names[$idx]}"
+                  done+=("${dispatched_commits[$idx]}")
                 fi
               done
-              
-              # Report progress for this batch
-              completed_count=${#completed_commits[@]}
-              total_count=${#dispatched_commits[@]}
-              echo "Batch ${batch_num} progress: ${completed_count}/${total_count} workflows completed"
-              
-              # If not all complete, wait before next check
-              if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then
-                echo "Waiting 60 seconds before next check..."
-                sleep 60
-              fi
+
+              [[ ${#done[@]} -lt ${#dispatched_commits[@]} ]] && sleep 60
             done
 
-            # Check if all workflows in this batch completed successfully
-            if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then
-              echo "ERROR: Timed out waiting for batch ${batch_num} workflows to complete" >&2
-              echo "Completed: ${#completed_commits[@]}/${#dispatched_commits[@]}" >&2
-              echo "Missing artifacts:" >&2
-              for i in "${!dispatched_commits[@]}"; do
-                commit="${dispatched_commits[$i]}"
-                if [[ ! " ${completed_commits[@]} " =~ " ${commit} " ]]; then
-                  echo "  - ${artifact_names[$i]} (commit: ${commit})" >&2
-                fi
-              done
+            if [[ ${#done[@]} -lt ${#dispatched_commits[@]} ]]; then
+              echo "ERROR: Timeout waiting for batch artifacts."
               exit 1
             fi
-            
-            echo "Batch ${batch_num} completed successfully! (${#dispatched_commits[@]} workflows)"
-            
-            # Clear arrays for next batch
-            unset dispatched_commits
-            unset artifact_names
-            unset completed_commits
+
+            echo "Batch completed."
           done
-          
-          echo "All ${total_commits} workflows completed successfully across all batches!"
 
-      - name: Trigger violation filter workflows for new commits in sequence
+      # --------------------------------------------------------------------
+      # STEP 6 — TRIGGER FILTER WORKFLOWS (IN SEQUENCE)
+      # --------------------------------------------------------------------
+      - name: Run violation filter workflows
+        if: steps.check-commits.outputs.has_new_commits == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          # Read the commits from file
           mapfile -t commits < new_commits.txt
-
-          # If no new commits to process, exit
-          if [ ${#commits[@]} -eq 0 ]; then
-            echo "No new commits to process"
-            exit 0
-          fi
-
-          # Use the same dispatch_id as the analysis workflows
           dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}"
-
-          # Extract repository name for future usages
           repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2)
 
-          # Process commits in sequence for violation filter
-          echo "Processing ${#commits[@]} commits for violation filter in sequence..."
-          
-          # Reverse the commits array to process from oldest to newest
-          reversed_commits=()
+          echo "Executing violation filtering sequentially..."
+
+          reversed=()
           for ((i=${#commits[@]}-1; i>=0; i--)); do
-            reversed_commits+=("${commits[$i]}")
+            reversed+=("${commits[$i]}")
           done
 
-          # Process each commit in sequence
-          for i in "${!reversed_commits[@]}"; do
-            current_commit="${reversed_commits[$i]}"
-            
-            # For the first commit (oldest), previous_commit is empty
-            # For subsequent commits, previous_commit is the previous one in the sequence
-            if [ $i -eq 0 ]; then
-              previous_commit=""
-              echo "Processing first commit: $current_commit (no previous commit)"
-            else
-              previous_commit="${reversed_commits[$i-1]}"
-              echo "Processing commit: $current_commit (previous: $previous_commit)"
-            fi
+          for i in "${!reversed[@]}"; do
+            current="${reversed[$i]}"
+            previous=""
+            ((i > 0)) && previous="${reversed[$i-1]}"
 
-            # Generate artifact name to detect completion
-            if [[ -n "${{ inputs.start_commit }}" ]]; then
-              artifact_name="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${current_commit}-filtered"
+            echo "Filtering: $current (prev: $previous)"
+
+            if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then
+              artifact="continuous-analysis-history-filtered-results-${dispatch_id}-${repo_name}-${current}-${{ inputs.skip_commits }}"
             else
-              artifact_name="continuous-analysis-results-${repo_name}-${current_commit}-filtered"
+              artifact="continuous-analysis-filtered-results-${repo_name}-${current}"
             fi
 
-            # Trigger the violation filter workflow
             gh workflow run run-filter.yml \
               --repo "${GITHUB_REPOSITORY}" \
-              --ref "${{ env.BRANCH }}" \
-              --field current_commit="$current_commit" \
-              --field previous_commit="$previous_commit" \
-              --field dispatch_id="$dispatch_id"
-
-            # Wait for this workflow to complete
-            echo "Waiting for violation filter workflow to complete for commit: $current_commit"
-            artifact_created=false
-            
-            # Set the end time for the timeout
+              --ref "${BRANCH}" \
+              --field current_commit="$current" \
+              --field previous_commit="$previous" \
+              --field dispatch_id="$dispatch_id" \
+              --field skip_commits_pattern="${{ inputs.skip_commits }}"
+
             end_time=$(( $(date +%s) + FILTER_DISPATCH_TIMEOUT ))
-            
-            # Wait for the artifact to be created
-            while [ "$artifact_created" = false ] && [ "$(date +%s)" -lt "$end_time" ]; do
-              if curl -s -H "Authorization: token $GH_TOKEN" \
+            created=false
+
+            while ! $created && [[ $(date +%s) -lt $end_time ]]; do
+              names=$(curl -s -H "Authorization: token $GH_TOKEN" \
                 "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \
-                | jq -r '.artifacts[].name' 2>/dev/null | grep -q "^${artifact_name}"; then
-                artifact_created=true
-                echo "Violation filter artifact ${artifact_name} found for commit ${current_commit}."
+                | jq -r '.artifacts[].name')
+              if echo "$names" | grep -q "^${artifact}"; then
+                echo "  ✓ Filter artifact: ${artifact}"
+                created=true
                 break
               fi
-              echo "Violation filter artifact not found yet, waiting 30 seconds..."
               sleep 30
             done
 
-            # Check if the workflow completed successfully
-            if [ "$artifact_created" = false ]; then
-              echo "ERROR: Timed out waiting for violation filter artifact ${artifact_name} for commit ${current_commit}" >&2
+            if ! $created; then
+              echo "ERROR: Timeout waiting for filter artifact: ${artifact}"
               exit 1
             fi
           done
-          
-          echo "All ${#commits[@]} violation filter workflows completed successfully!"
 
-      - name: Update SHA cache
-        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == ''
+      # --------------------------------------------------------------------
+      # STEP 7 — UPDATE SHA CACHE (Continuous mode only)
+      # --------------------------------------------------------------------
+      - name: Update last-seen SHA cache
+        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.number_of_commits == 0
         run: |
           NEWEST=$(head -n 1 new_commits.txt)
           echo "$NEWEST" > .continuous-analysis-cache/last_sha.txt
+          echo "Updated cache: last_sha = $NEWEST"
 
       - name: Generate timestamp
-        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == ''
+        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.number_of_commits == 0
         id: timestamp
         run: |
           ts=$(date +'%Y%m%d-%H%M')
           echo "ts=$ts" >> "$GITHUB_OUTPUT"
-          echo "Generated timestamp: $ts"
 
-      - name: Save updated SHA cache with timestamp
-        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == ''
+      - name: Save updated cache
+        if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.number_of_commits == 0
         uses: actions/cache/save@v4
         with:
           path: .continuous-analysis-cache
diff --git a/.github/workflows/monitor-upstream.yml b/.github/workflows/monitor-upstream.yml
new file mode 100644
index 0000000..cc23aa6
--- /dev/null
+++ b/.github/workflows/monitor-upstream.yml
@@ -0,0 +1,300 @@
+name: Monitor Upstream Repository
+
+on:
+  workflow_dispatch:
+    inputs:
+      number_of_commits:
+        description: "Historical mode: analyze N previous commits (0 = continuous mode)"
+        required: true
+        type: number
+      skip_commits:
+        description: "Skip commit pattern: process every (N+1)th commit (0 = process all)"
+        required: false
+        type: number
+        default: 0
+
+permissions:
+  actions: write
+  contents: write
+  issues: write
+
+jobs:
+  monitor-upstream:
+    runs-on: ubuntu-latest
+
+    env:
+      # NEED TO BE CONFIGURED EACH PROJECT
+      UPSTREAM_REPO: "CausalInferenceLab/Lang2SQL"
+      BRANCH: "master"
+      RUNNER_DISPATCH_TIMEOUT: 7200 # 2 hours
+      MAX_CONCURRENT: 9
+
+    steps:
+      # --------------------------------------------------------------------
+      # STEP 1 — COMPUTE LAST-SEEN SHA BASED ON NUMBER OF COMMITS
+      # --------------------------------------------------------------------
+      - name: Determine last-seen SHA
+        id: last-sha
+        run: |
+          # Set error handling
+          set -euo pipefail
+
+          # Get the number of commits to analyze
+          NUMBER_OF_COMMITS="${{ inputs.number_of_commits }}"
+          echo "Historical mode: computing boundary SHA for ${NUMBER_OF_COMMITS} commits."
+
+          # Clone the upstream repository
+          git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
+          cd upstream-repo
+          git checkout "${BRANCH}"
+          git rev-list --first-parent "${BRANCH}" > ../linear_commits.txt
+          cd ..
+          rm -rf upstream-repo
+
+          # Get the total number of commits in the first-parent history
+          TOTAL=$(wc -l < linear_commits.txt)
+          echo "Total commits in first-parent history: $TOTAL"
+
+          # Check if the number of commits to analyze is greater than 0
+          if [[ "$NUMBER_OF_COMMITS" -le 0 ]]; then
+            echo "ERROR: number_of_commits $NUMBER_OF_COMMITS is less than or equal to 0." >&2
+            exit 1
+          fi
+
+          # Check if the number of commits to analyze exceeds the total number of commits in the first-parent history
+          if [[ "$NUMBER_OF_COMMITS" -ge "$TOTAL" ]]; then
+            echo "ERROR: number_of_commits $NUMBER_OF_COMMITS exceeds available history ($TOTAL)." >&2
+            exit 1
+          fi
+
+          # Extract the last seen SHA based on the number of commits to analyze
+          LAST_SHA=$(sed -n "$((NUMBER_OF_COMMITS + 1))p" linear_commits.txt)
+          echo "Historical boundary last_sha = $LAST_SHA"
+          echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT
+
+      # --------------------------------------------------------------------
+      # STEP 2 — FETCH COMMIT HISTORY & COMPUTE NEW COMMITS
+      # --------------------------------------------------------------------
+      - name: Fetch first-parent commit history
+        id: check-commits
+        run: |
+          echo "Fetching upstream first-parent history..."
+
+          git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
+          cd upstream-repo
+          git checkout "${BRANCH}"
+          git rev-list --first-parent "${BRANCH}" > ../all_commits.txt
+          cd ..
+          rm -rf upstream-repo
+
+          echo "Total first-parent commits: $(wc -l < all_commits.txt)"
+
+          LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}"
+          SKIP_COMMITS="${{ inputs.skip_commits }}"
+
+          [[ -z "$SKIP_COMMITS" || "$SKIP_COMMITS" -lt 0 ]] && SKIP_COMMITS=0
+
+          # Extract new commits above LAST_SEEN
+          awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > temp_new_commits.txt
+
+          # Apply skip pattern if skip_commits is provided
+          if [[ "$SKIP_COMMITS" -eq 0 ]]; then
+            mv temp_new_commits.txt new_commits.txt
+          else
+            awk "NR % ($SKIP_COMMITS + 1) == 1" temp_new_commits.txt > new_commits.txt
+            rm temp_new_commits.txt
+          fi
+
+          # Print the new commits to be analyzed
+          echo "New commits to be analyzed (total: $(wc -l < new_commits.txt)):"
+          cat new_commits.txt
+
+      # --------------------------------------------------------------------
+      # STEP 3 — GENERATE DISPATCH ID
+      # --------------------------------------------------------------------
+      - name: Generate dispatch ID
+        id: dispatch-id
+        run: |
+          dispatch_id="$(date -u +%Y%m%dT%H%M%SZ)-$RANDOM"
+          echo "Dispatch ID: $dispatch_id"
+          echo "dispatch_id=$dispatch_id" >> $GITHUB_OUTPUT
+
+      # --------------------------------------------------------------------
+      # STEP 4 — RUN ANALYSIS (PARALLEL) AND COLLECT ARTIFACTS
+      # --------------------------------------------------------------------
+      - name: Run analysis workflows (parallel) and collect artifacts
+        id: run-analysis
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+
+          mapfile -t commits < new_commits.txt
+          dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}"
+          repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2)
+          MAX_CONCURRENT=${{ env.MAX_CONCURRENT }}
+          total_commits=${#commits[@]}
+
+          echo "Launching analysis for ${total_commits} commits..."
+          echo "" > all_expected_artifacts.txt   # all expected artifacts saved here
+
+          # ----------------------------------------
+          # DISPATCH ANALYSIS WORKFLOWS IN BATCHES
+          # ----------------------------------------
+          for ((batch_start=0; batch_start<total_commits; batch_start+=MAX_CONCURRENT)); do
+            batch_end=$((batch_start + MAX_CONCURRENT))
+            ((batch_end > total_commits)) && batch_end=$total_commits
+
+            echo "Processing batch $((batch_start/MAX_CONCURRENT + 1))..."
+
+            declare -a dispatched_commits=()
+            declare -a artifact_names=()
+
+            # Dispatch each workflow in this batch
+            for ((i=batch_start; i<batch_end; i++)); do
+              commit="${commits[$i]}"
+              echo "  Dispatching commit: $commit"
+
+              artifact_name="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${commit}"
+
+              # Remember all artifact names for final release step
+              echo "$artifact_name" >> all_expected_artifacts.txt
+
+              gh workflow run run-analysis.yml \
+                --repo "${GITHUB_REPOSITORY}" \
+                --ref "${BRANCH}" \
+                --field commit="$commit" \
+                --field dispatch_id="$dispatch_id"
+
+              dispatched_commits+=("$commit")
+              artifact_names+=("$artifact_name")
+            done
+
+            # -----------------------------
+            # WAIT FOR THIS BATCH TO FINISH
+            # -----------------------------
+            echo "Waiting for batch artifacts..."
+
+            end_time=$(( $(date +%s) + RUNNER_DISPATCH_TIMEOUT ))
+            declare -a done=()
+
+            while [[ ${#done[@]} -lt ${#dispatched_commits[@]} && $(date +%s) -lt $end_time ]]; do
+              artifact_list=$(curl -s -H "Authorization: token $GH_TOKEN" \
+                "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \
+                | jq -r '.artifacts[].name')
+
+              for idx in "${!artifact_names[@]}"; do
+                if [[ " ${done[@]} " =~ " ${artifact_names[$idx]} " ]]; then continue; fi
+                if echo "$artifact_list" | grep -q "^${artifact_names[$idx]}$"; then
+                  echo "  ✓ Artifact found: ${artifact_names[$idx]}"
+                  done+=("${artifact_names[$idx]}")
+                fi
+              done
+
+              [[ ${#done[@]} -lt ${#artifact_names[@]} ]] && sleep 60
+            done
+
+            if [[ ${#done[@]} -lt ${#artifact_names[@]} ]]; then
+              echo "ERROR: Timeout while waiting for batch artifacts."
+              exit 1
+            fi
+
+            echo "Batch completed."
+          done
+
+          echo "All analysis workflows completed."
+
+      # --------------------------------------------------------------------
+      # STEP 5 — CREATE ONE RELEASE AND UPLOAD ALL ARTIFACTS
+      # --------------------------------------------------------------------
+      - name: Create release and upload artifacts
+        id: make-release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+
+          # Helper function to fetch all artifacts across all pages
+          fetch_all_artifacts() {
+            local page=1
+            local all_artifacts_array="[]"
+            while true; do
+              local response=$(curl -s -H "Authorization: token $GH_TOKEN" \
+                "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100&page=${page}")
+              local page_artifacts=$(echo "$response" | jq -c '.artifacts[]')
+              if [[ -z "$page_artifacts" ]]; then
+                break
+              fi
+              # Merge this page's artifacts into the array
+              while IFS= read -r artifact; do
+                all_artifacts_array=$(echo "$all_artifacts_array" | jq --argjson art "$artifact" '. + [$art]')
+              done <<< "$page_artifacts"
+              local per_page=$(echo "$response" | jq -r '.artifacts | length')
+              if [[ $per_page -lt 100 ]]; then
+                break
+              fi
+              ((page++))
+            done
+            echo "$all_artifacts_array"
+          }
+
+          # Release naming (one release per workflow run)
+          run_timestamp="$(date -u +%Y%m%dT%H%M%SZ)"
+          release_tag="analysis-${run_timestamp}"
+          release_name="Continuous Analysis Run ${run_timestamp}"
+
+          echo "Creating release: $release_name"
+
+          # Create release
+          api_response=$(curl -s -X POST \
+            -H "Authorization: token $GH_TOKEN" \
+            -H "Content-Type: application/json" \
+            -d "{\"tag_name\": \"${release_tag}\", \"name\": \"${release_name}\", \"draft\": false, \"prerelease\": false}" \
+            "https://api.github.com/repos/${GITHUB_REPOSITORY}/releases")
+
+          upload_url=$(echo "$api_response" | jq -r '.upload_url' | sed 's/{?name,label}//')
+
+          echo "Upload URL: $upload_url"
+
+          echo "Fetching all artifacts (this may take a moment for large runs)..."
+          all_artifacts_data=$(fetch_all_artifacts)
+
+          echo "Uploading artifacts..."
+
+          # For each expected artifact, download it & upload it
+          while IFS= read -r artifact_name; do
+            echo "  Processing artifact: $artifact_name"
+
+            # Query artifact metadata from the fetched data
+            artifact_info=$(echo "$all_artifacts_data" | jq -c --arg NAME "$artifact_name" '[.[] | select(.name == $NAME)][0]')
+
+            if [[ -z "$artifact_info" || "$artifact_info" == "null" ]]; then
+              echo "  WARNING: Artifact not found: $artifact_name"
+              continue
+            fi
+
+            artifact_id=$(echo "$artifact_info" | jq -r '.id')
+            zip_name="${artifact_name}.zip"
+
+            echo "  Downloading artifact ID $artifact_id --> $zip_name"
+
+            # Download ZIP
+            curl -L -s \
+              -H "Authorization: token $GH_TOKEN" \
+              -o "$zip_name" \
+              "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts/${artifact_id}/zip"
+
+            echo "  Uploading $zip_name to release..."
+
+            # Upload ZIP to release
+            curl -s -X POST \
+              -H "Authorization: token $GH_TOKEN" \
+              -H "Content-Type: application/zip" \
+              --data-binary @"$zip_name" \
+              "${upload_url}?name=${zip_name}"
+
+            echo "  ✓ Uploaded: $zip_name"
+
+          done < all_expected_artifacts.txt
+
+          echo "Release completed successfully."
diff --git a/.github/workflows/run-filter.yml b/.github/workflows/run-filter.yml
index 7519390..ba2a56b 100644
--- a/.github/workflows/run-filter.yml
+++ b/.github/workflows/run-filter.yml
@@ -15,6 +15,10 @@ on:
         description: "Unique id from dispatcher for history runnings"
         required: false
         type: string
+      skip_commits_pattern:
+        description: "Number of commits to skip between processing (skip x commits pattern)"
+        required: false
+        type: number
 
 permissions:
   actions: read
@@ -29,5 +33,6 @@ jobs:
       current_commit: ${{ inputs.current_commit }}
       previous_commit: ${{ inputs.previous_commit }}
       dispatch_id: ${{ inputs.dispatch_id }}
+      skip_commits_pattern: ${{ inputs.skip_commits_pattern }}
     secrets:
       ORG_WIDE_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }}
diff --git a/.github/workflows/set-cache-sha.yml b/.github/workflows/set-cache-sha.yml
new file mode 100644
index 0000000..aee3ec8
--- /dev/null
+++ b/.github/workflows/set-cache-sha.yml
@@ -0,0 +1,49 @@
+name: Set Cache SHA
+
+on:
+  workflow_dispatch:
+    inputs:
+      commit_sha:
+        description: "Commit SHA to set in cache"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+
+jobs:
+  set-cache-sha:
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Validate and set commit SHA
+        run: |
+          COMMIT_SHA="${{ inputs.commit_sha }}"
+          
+          # Validate format
+          if [[ ! "$COMMIT_SHA" =~ ^[a-f0-9]{7,40}$ ]]; then
+            echo "Invalid commit SHA format"
+            exit 1
+          fi
+          
+          # Create cache directory and set SHA
+          mkdir -p .continuous-analysis-cache
+          echo "$COMMIT_SHA" > .continuous-analysis-cache/last_sha.txt
+          
+          echo "Set cache SHA: $COMMIT_SHA"
+
+      - name: Generate timestamp
+        id: timestamp
+        run: |
+          ts=$(date +'%Y%m%d-%H%M')
+          echo "ts=$ts" >> "$GITHUB_OUTPUT"
+          echo "Generated timestamp: $ts"
+
+      - name: Save updated SHA cache with timestamp
+        uses: actions/cache/save@v4
+        with:
+          path: .continuous-analysis-cache
+          key: continuous-analysis-cache-${{ github.repository }}-${{ steps.timestamp.outputs.ts }}

From e7d4ea3ad127edddb6efdbab76dc87f0532963f5 Mon Sep 17 00:00:00 2001
From: Stephen Shen <zs435@cornell.edu>
Date: Sun, 28 Dec 2025 13:54:09 -0500
Subject: [PATCH 3/4] Improve workflows

---
 .../monitor-upstream-and-analyze.yml          | 104 +++++++++++++-----
 1 file changed, 79 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml
index fd8a1f5..be207ae 100644
--- a/.github/workflows/monitor-upstream-and-analyze.yml
+++ b/.github/workflows/monitor-upstream-and-analyze.yml
@@ -104,10 +104,11 @@ jobs:
           if [[ "$NUMBER_OF_COMMITS" -gt 0 ]]; then
             echo "Historical mode: computing boundary SHA for ${NUMBER_OF_COMMITS} commits."
 
-            git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
+            # Clone the upstream repository
+            git clone "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
             cd upstream-repo
             git checkout "${BRANCH}"
-            git rev-list --first-parent "${BRANCH}" > ../linear_commits.txt
+            git log --no-merges --name-status | grep 'py\|^commit' | grep -B1 'py$' | grep ^commit | cut -d ' ' -f 2 > ../linear_commits.txt
             cd ..
             rm -rf upstream-repo
 
@@ -143,19 +144,19 @@ jobs:
       # --------------------------------------------------------------------
       # STEP 3 — FETCH COMMIT HISTORY & COMPUTE NEW COMMITS
       # --------------------------------------------------------------------
-      - name: Fetch first-parent commit history
+      - name: Fetch commit history with python files changed and no merge commits
         id: check-commits
         run: |
-          echo "Fetching upstream first-parent history..."
+          echo "Fetching upstream commit history with python files changed and no merge commits..."
 
-          git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
+          git clone "https://github.com/${UPSTREAM_REPO}.git" upstream-repo
           cd upstream-repo
           git checkout "${BRANCH}"
-          git rev-list --first-parent "${BRANCH}" > ../all_commits.txt
+          git log --no-merges --name-status | grep 'py\|^commit' | grep -B1 'py$' | grep ^commit | cut -d ' ' -f 2 > ../all_commits.txt
           cd ..
           rm -rf upstream-repo
 
-          echo "Total first-parent commits: $(wc -l < all_commits.txt)"
+          echo "Total commits with python files changed and no merge commits: $(wc -l < all_commits.txt)"
 
           LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}"
           SKIP_COMMITS="${{ inputs.skip_commits }}"
@@ -221,27 +222,75 @@ jobs:
           MAX_CONCURRENT=${{ env.MAX_CONCURRENT }}
           total_commits=${#commits[@]}
 
-          echo "Launching analysis for ${total_commits} commits..."
+          echo "Checking ${total_commits} commits for existing artifacts..."
 
-          for ((batch_start=0; batch_start<total_commits; batch_start+=MAX_CONCURRENT)); do
+          # First pass: check all commits and collect those that need processing
+          declare -a commits_to_process=()
+          declare -a artifacts_to_process=()
+
+          for commit in "${commits[@]}"; do
+            if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then
+              artifact="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${commit}"
+            else
+              artifact="continuous-analysis-results-${repo_name}-${commit}"
+            fi
+
+            # Check if artifact already exists (check all pages)
+            artifact_exists=false
+            page=1
+            per_page=100
+            
+            while true; do
+              response=$(curl -s -H "Authorization: token $GH_TOKEN" \
+                "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=${per_page}&page=${page}")
+              
+              artifact_names=$(echo "$response" | jq -r '.artifacts[].name')
+              
+              if echo "$artifact_names" | grep -q "^${artifact}"; then
+                artifact_exists=true
+                break
+              fi
+              
+              # Check if there are more pages
+              if [[ -z "$artifact_names" ]] || [[ $(echo "$artifact_names" | wc -l) -lt $per_page ]]; then
+                break
+              fi
+              
+              page=$((page + 1))
+            done
+
+            if $artifact_exists; then
+              echo " ✓ Artifact already exists: ${artifact} - will skip"
+            else
+              echo " ✗ Artifact missing: ${artifact} - will dispatch"
+              commits_to_process+=("$commit")
+              artifacts_to_process+=("$artifact")
+            fi
+          done
+
+          total_to_process=${#commits_to_process[@]}
+          if [[ $total_to_process -eq 0 ]]; then
+            echo "All commits already processed. Nothing to dispatch."
+            exit 0
+          else
+            echo "Launching analysis for ${total_to_process} commits (${total_commits} total, $((total_commits - total_to_process)) already processed)..."
+          fi
+
+          # Second pass: batch and dispatch only commits that need processing
+          for ((batch_start=0; batch_start<total_to_process; batch_start+=MAX_CONCURRENT)); do
             batch_end=$((batch_start + MAX_CONCURRENT))
-            ((batch_end > total_commits)) && batch_end=$total_commits
+            ((batch_end > total_to_process)) && batch_end=$total_to_process
 
             echo "Processing batch $((batch_start/MAX_CONCURRENT + 1))..."
 
             declare -a dispatched_commits=()
-            declare -a artifact_names=()
+            declare -a dispatched_artifacts=()
 
             for ((i=batch_start; i<batch_end; i++)); do
-              commit="${commits[$i]}"
+              commit="${commits_to_process[$i]}"
+              artifact="${artifacts_to_process[$i]}"
+              
               echo "  Dispatching commit $commit"
-
-              if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then
-                artifact="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${commit}"
-              else
-                artifact="continuous-analysis-results-${repo_name}-${commit}"
-              fi
-
               gh workflow run run-analysis.yml \
                 --repo "${GITHUB_REPOSITORY}" \
                 --ref "${BRANCH}" \
@@ -249,7 +298,7 @@ jobs:
                 --field dispatch_id="$dispatch_id"
 
               dispatched_commits+=("$commit")
-              artifact_names+=("$artifact")
+              dispatched_artifacts+=("$artifact")
             done
 
             echo "Waiting for batch artifacts..."
@@ -264,8 +313,8 @@ jobs:
 
               for idx in "${!dispatched_commits[@]}"; do
                 if [[ " ${done[@]} " =~ " ${dispatched_commits[$idx]} " ]]; then continue; fi
-                if echo "$names" | grep -q "^${artifact_names[$idx]}"; then
-                  echo "  ✓ Artifact found: ${artifact_names[$idx]}"
+                if echo "$names" | grep -q "^${dispatched_artifacts[$idx]}"; then
+                  echo "  ✓ Artifact found: ${dispatched_artifacts[$idx]}"
                   done+=("${dispatched_commits[$idx]}")
                 fi
               done
@@ -300,17 +349,22 @@ jobs:
             reversed+=("${commits[$i]}")
           done
 
+          LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}"
+
           for i in "${!reversed[@]}"; do
             current="${reversed[$i]}"
-            previous=""
-            ((i > 0)) && previous="${reversed[$i-1]}"
+            if [[ $i -eq 0 ]]; then
+              previous="$LAST_SEEN"
+            else
+              previous="${reversed[$i-1]}"
+            fi
 
             echo "Filtering: $current (prev: $previous)"
 
             if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then
               artifact="continuous-analysis-history-filtered-results-${dispatch_id}-${repo_name}-${current}-${{ inputs.skip_commits }}"
             else
-              artifact="continuous-analysis-filtered-results-${repo_name}-${current}"
+              artifact="continuous-analysis-future-filtered-results-${repo_name}-${current}"
             fi
 
             gh workflow run run-filter.yml \

From 2499074d75eeb973a9f29fef6ca736e51dce9977 Mon Sep 17 00:00:00 2001
From: Zhuohang Shen <18962118885@163.com>
Date: Sun, 28 Dec 2025 14:42:50 -0500
Subject: [PATCH 4/4] Reduce max concurrent number

---
 .github/workflows/monitor-upstream-and-analyze.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml
index be207ae..d1893fe 100644
--- a/.github/workflows/monitor-upstream-and-analyze.yml
+++ b/.github/workflows/monitor-upstream-and-analyze.yml
@@ -31,7 +31,7 @@ jobs:
       BRANCH: "master"
       RUNNER_DISPATCH_TIMEOUT: 7200 # 2 hours
       FILTER_DISPATCH_TIMEOUT: 1800 # 30 minutes
-      MAX_CONCURRENT: 8
+      MAX_CONCURRENT: 1
 
     steps:
       # --------------------------------------------------------------------