From 2c91c504fc42b445760d4895c239768f17ca3547 Mon Sep 17 00:00:00 2001 From: Stephen Shen Date: Mon, 13 Oct 2025 23:00:44 -0400 Subject: [PATCH 1/4] Add workflows --- .../monitor-upstream-and-analyze.yml | 411 ++++++++++++++++++ .github/workflows/run-analysis.yml | 28 ++ .github/workflows/run-filter.yml | 33 ++ 3 files changed, 472 insertions(+) create mode 100644 .github/workflows/monitor-upstream-and-analyze.yml create mode 100644 .github/workflows/run-analysis.yml create mode 100644 .github/workflows/run-filter.yml diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml new file mode 100644 index 0000000..652e9bf --- /dev/null +++ b/.github/workflows/monitor-upstream-and-analyze.yml @@ -0,0 +1,411 @@ +name: Monitor Upstream and Run Analysis + +on: + schedule: + - cron: "*/15 * * * *" + workflow_dispatch: + inputs: + start_commit: + description: "Start commit SHA" + required: false + type: string + +permissions: + actions: write + contents: write + issues: write + +jobs: + monitor-upstream: + runs-on: ubuntu-latest + env: + # NEED TO BE CONFIGURED EACH PROJECT + UPSTREAM_REPO: "CausalInferenceLab/Lang2SQL" + BRANCH: "master" + RUNNER_DISPATCH_TIMEOUT: 3600 # 1 hour + FILTER_DISPATCH_TIMEOUT: 600 # 10 minutes + MAX_CONCURRENT: 7 + + steps: + - name: Checkout the forked repo + uses: actions/checkout@v4 + with: + token: ${{ secrets.ORG_WIDE_TOKEN }} + path: forked-repo + fetch-depth: 0 + + - name: Sync fork with upstream + run: | + set -euo pipefail + cd forked-repo + echo "Syncing fork with upstream..." + + # Configure Git user name and email properly + git config --global --add safe.directory "$PWD" + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + # Ensure refs are current + git fetch --prune origin + git remote add upstream "https://github.com/${UPSTREAM_REPO}.git" || true + git fetch --prune upstream --tags + + # Check out the branch if it is not already checked out + git checkout "${BRANCH}" + + # Rebasing the branch onto the upstream branch + echo "Rebasing ${BRANCH} onto upstream/${BRANCH}..." + if ! git rebase -X theirs --rebase-merges "upstream/${BRANCH}"; then + echo "Rebase failed; aborting." + git rebase --abort || true + exit 1 + fi + + # Push only if diverged; use --force-with-lease for safety (we pushed the rebased branch to origin) + # This is to avoid conflicts when rebasing + if ! git diff --quiet "origin/${BRANCH}..HEAD"; then + git push --force-with-lease origin "${BRANCH}" + echo "Pushed rebased ${BRANCH} to origin." + else + echo "No changes to push after rebase." + fi + + # Change back to the root directory + cd .. + + - name: Clean up the workspace + run: | + rm -rf "$GITHUB_WORKSPACE/forked-repo" + + - name: Prepare and restore cache folder + id: cache-folder + uses: actions/cache/restore@v4 + with: + path: .continuous-analysis-cache + key: continuous-analysis-cache-${{ github.repository }}- + + - name: Create cache folder if not exists + run: | + mkdir -p .continuous-analysis-cache + + - name: Load last seen SHA from cache folder + id: last-sha + run: | + # If start_commit is provided, set it as the last seen SHA + if [[ -n "${{ inputs.start_commit }}" ]]; then + echo "last_sha=${{ inputs.start_commit }}" >> $GITHUB_OUTPUT + echo "Start commit provided: ${{ inputs.start_commit }}" + exit 0 + fi + + # Declare the file path to the last seen SHA + FILE=".continuous-analysis-cache/last_sha.txt" + + # Check if the file exists and load the last seen SHA + if [[ -f "$FILE" ]]; then + LAST_SHA=$(cat "$FILE") + echo "Last seen SHA found in cache: $LAST_SHA" + else + LAST_SHA="" + echo "No last seen SHA found in cache" + fi + + # Output the last seen SHA to the GitHub Actions output for further use + echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT + + - name: Get upstream commits and find new ones + id: check-commits + run: | + # Print the upstream repo and branch to the console + echo "Finding the latest 100 commits from the upstream repo: $UPSTREAM_REPO@$BRANCH" + + # Get the latest 100 commits from the upstream repo and save them to a JSON file + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${UPSTREAM_REPO}/commits?sha=${BRANCH}&per_page=100" \ + > commits.json + + # Parse the first commit SHA from the JSON file for sanity check + if ! jq -e '.[0].sha' commits.json > /dev/null; then + echo "Failed to parse SHA from commits.json" + exit 1 + fi + + # Parse all commit SHAs (from newest to oldest) from the JSON file and save to all_commits.txt + jq -r '.[].sha' commits.json > all_commits.txt + + # Get the last seen SHA from the previous step + LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}" + echo "Last seen SHA: $LAST_SEEN" + + # If the last seen SHA is empty, select the latest commit SHA + if [[ -z "$LAST_SEEN" ]]; then + head -n 1 all_commits.txt > new_commits.txt + echo "has_new_commits=true" >> $GITHUB_OUTPUT + echo "First-time run — selecting the latest commit: $(head -n 1 all_commits.txt)" + else + # If the last seen SHA is not empty, filter out previously seen commits + # Print all new commit SHAs (above the last seen SHA) to new_commits.txt + awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > new_commits.txt + + if [ ! -s new_commits.txt ]; then + echo "No new commits to process." + echo "has_new_commits=false" >> $GITHUB_OUTPUT + else + echo "New commits to process:" + echo "has_new_commits=true" >> $GITHUB_OUTPUT + cat new_commits.txt + fi + fi + + - name: Generate dispatch ID + id: dispatch-id + run: | + # If start_commit is provided, generate a random dispatch ID for history analysis + if [[ -n "${{ inputs.start_commit }}" ]]; then + dispatch_id="$(date -u +%Y%m%dT%H%M%SZ)-$RANDOM" + echo "dispatch_id=$dispatch_id" >> $GITHUB_OUTPUT + echo "Generated dispatch ID: $dispatch_id" + else + echo "dispatch_id=" >> $GITHUB_OUTPUT + echo "No dispatch ID needed for regular runs" + fi + + - name: Trigger analysis workflows for new commits in parallel + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Read the commits from file + mapfile -t commits < new_commits.txt + + # If no new commits to process, exit + if [ ${#commits[@]} -eq 0 ]; then + echo "No new commits to process" + exit 0 + fi + + # Use the shared dispatch ID + dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}" + + # Extract repository name for future usages + repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2) + + # Configuration + MAX_CONCURRENT=${{ env.MAX_CONCURRENT }} + total_commits=${#commits[@]} + echo "Processing ${total_commits} commits in batches of ${MAX_CONCURRENT}..." + + # Process commits in batches + for ((batch_start=0; batch_start/dev/null || echo "") + + # Check each dispatched workflow in this batch + for i in "${!dispatched_commits[@]}"; do + commit="${dispatched_commits[$i]}" + artifact_name="${artifact_names[$i]}" + + # Skip if already completed + if [[ " ${completed_commits[@]} " =~ " ${commit} " ]]; then + continue + fi + + # Check if artifact exists + if echo "$current_artifacts" | grep -q "^${artifact_name}"; then + echo "Artifact ${artifact_name} found for commit ${commit}." + completed_commits+=("$commit") + fi + done + + # Report progress for this batch + completed_count=${#completed_commits[@]} + total_count=${#dispatched_commits[@]} + echo "Batch ${batch_num} progress: ${completed_count}/${total_count} workflows completed" + + # If not all complete, wait before next check + if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then + echo "Waiting 60 seconds before next check..." + sleep 60 + fi + done + + # Check if all workflows in this batch completed successfully + if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then + echo "ERROR: Timed out waiting for batch ${batch_num} workflows to complete" >&2 + echo "Completed: ${#completed_commits[@]}/${#dispatched_commits[@]}" >&2 + echo "Missing artifacts:" >&2 + for i in "${!dispatched_commits[@]}"; do + commit="${dispatched_commits[$i]}" + if [[ ! " ${completed_commits[@]} " =~ " ${commit} " ]]; then + echo " - ${artifact_names[$i]} (commit: ${commit})" >&2 + fi + done + exit 1 + fi + + echo "Batch ${batch_num} completed successfully! (${#dispatched_commits[@]} workflows)" + + # Clear arrays for next batch + unset dispatched_commits + unset artifact_names + unset completed_commits + done + + echo "All ${total_commits} workflows completed successfully across all batches!" + + - name: Trigger violation filter workflows for new commits in sequence + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Read the commits from file + mapfile -t commits < new_commits.txt + + # If no new commits to process, exit + if [ ${#commits[@]} -eq 0 ]; then + echo "No new commits to process" + exit 0 + fi + + # Use the same dispatch_id as the analysis workflows + dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}" + + # Extract repository name for future usages + repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2) + + # Process commits in sequence for violation filter + echo "Processing ${#commits[@]} commits for violation filter in sequence..." + + # Reverse the commits array to process from oldest to newest + reversed_commits=() + for ((i=${#commits[@]}-1; i>=0; i--)); do + reversed_commits+=("${commits[$i]}") + done + + # Process each commit in sequence + for i in "${!reversed_commits[@]}"; do + current_commit="${reversed_commits[$i]}" + + # For the first commit (oldest), previous_commit is empty + # For subsequent commits, previous_commit is the previous one in the sequence + if [ $i -eq 0 ]; then + previous_commit="" + echo "Processing first commit: $current_commit (no previous commit)" + else + previous_commit="${reversed_commits[$i-1]}" + echo "Processing commit: $current_commit (previous: $previous_commit)" + fi + + # Generate artifact name to detect completion + if [[ -n "${{ inputs.start_commit }}" ]]; then + artifact_name="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${current_commit}-filtered" + else + artifact_name="continuous-analysis-results-${repo_name}-${current_commit}-filtered" + fi + + # Trigger the violation filter workflow + gh workflow run run-filter.yml \ + --repo "${GITHUB_REPOSITORY}" \ + --ref "${{ env.BRANCH }}" \ + --field current_commit="$current_commit" \ + --field previous_commit="$previous_commit" \ + --field dispatch_id="$dispatch_id" + + # Wait for this workflow to complete + echo "Waiting for violation filter workflow to complete for commit: $current_commit" + artifact_created=false + + # Set the end time for the timeout + end_time=$(( $(date +%s) + FILTER_DISPATCH_TIMEOUT )) + + # Wait for the artifact to be created + while [ "$artifact_created" = false ] && [ "$(date +%s)" -lt "$end_time" ]; do + if curl -s -H "Authorization: token $GH_TOKEN" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \ + | jq -r '.artifacts[].name' 2>/dev/null | grep -q "^${artifact_name}"; then + artifact_created=true + echo "Violation filter artifact ${artifact_name} found for commit ${current_commit}." + break + fi + echo "Violation filter artifact not found yet, waiting 30 seconds..." + sleep 30 + done + + # Check if the workflow completed successfully + if [ "$artifact_created" = false ]; then + echo "ERROR: Timed out waiting for violation filter artifact ${artifact_name} for commit ${current_commit}" >&2 + exit 1 + fi + done + + echo "All ${#commits[@]} violation filter workflows completed successfully!" + + - name: Update SHA cache + if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == '' + run: | + NEWEST=$(head -n 1 new_commits.txt) + echo "$NEWEST" > .continuous-analysis-cache/last_sha.txt + + - name: Generate timestamp + if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == '' + id: timestamp + run: | + ts=$(date +'%Y%m%d-%H%M') + echo "ts=$ts" >> "$GITHUB_OUTPUT" + echo "Generated timestamp: $ts" + + - name: Save updated SHA cache with timestamp + if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == '' + uses: actions/cache/save@v4 + with: + path: .continuous-analysis-cache + key: continuous-analysis-cache-${{ github.repository }}-${{ steps.timestamp.outputs.ts }} diff --git a/.github/workflows/run-analysis.yml b/.github/workflows/run-analysis.yml new file mode 100644 index 0000000..beba369 --- /dev/null +++ b/.github/workflows/run-analysis.yml @@ -0,0 +1,28 @@ +name: Trigger Continuous Analysis + +on: + workflow_dispatch: + inputs: + commit: + description: 'Single commit SHA to test' + required: true + type: string + dispatch_id: + description: "Unique id from dispatcher for history runnings" + required: false + type: string + +permissions: + actions: read + contents: write + issues: write + +jobs: + analyze-single-commit: + uses: ContinuousAnalysis/continuous-analysis/.github/workflows/auto-runner.yml@main + with: + project: ${{ github.repository }} + commit: ${{ inputs.commit }} + dispatch_id: ${{ inputs.dispatch_id }} + secrets: + ORG_WIDE_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }} diff --git a/.github/workflows/run-filter.yml b/.github/workflows/run-filter.yml new file mode 100644 index 0000000..7519390 --- /dev/null +++ b/.github/workflows/run-filter.yml @@ -0,0 +1,33 @@ +name: Trigger Continuous Analysis Violation Filter + +on: + workflow_dispatch: + inputs: + current_commit: + description: 'Current commit SHA to test' + required: true + type: string + previous_commit: + description: 'Previous commit SHA to test' + required: false + type: string + dispatch_id: + description: "Unique id from dispatcher for history runnings" + required: false + type: string + +permissions: + actions: read + contents: write + issues: write + +jobs: + analyze-single-commit: + uses: ContinuousAnalysis/continuous-analysis/.github/workflows/auto-filter.yml@main + with: + project: ${{ github.repository }} + current_commit: ${{ inputs.current_commit }} + previous_commit: ${{ inputs.previous_commit }} + dispatch_id: ${{ inputs.dispatch_id }} + secrets: + ORG_WIDE_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }} From f35a25592374ae071c94f55ecf9d7f0214f78472 Mon Sep 17 00:00:00 2001 From: Stephen Shen Date: Tue, 25 Nov 2025 11:08:31 -0500 Subject: [PATCH 2/4] Update workflows --- .github/workflows/create-release.yml | 110 +++++ .../monitor-upstream-and-analyze.yml | 432 ++++++++---------- .github/workflows/monitor-upstream.yml | 300 ++++++++++++ .github/workflows/run-filter.yml | 5 + .github/workflows/set-cache-sha.yml | 49 ++ 5 files changed, 658 insertions(+), 238 deletions(-) create mode 100644 .github/workflows/create-release.yml create mode 100644 .github/workflows/monitor-upstream.yml create mode 100644 .github/workflows/set-cache-sha.yml diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml new file mode 100644 index 0000000..40d9270 --- /dev/null +++ b/.github/workflows/create-release.yml @@ -0,0 +1,110 @@ +name: Create Release for Analysis Artifacts + +on: + workflow_dispatch: + inputs: + prefix: + description: "Artifact name prefix" + required: true + type: string + +permissions: + contents: write + +jobs: + release: + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }} + + steps: + - name: Create timestamp + id: ts + run: | + ts="$(date -u +%Y%m%dT%H%M%SZ)" + echo "ts=$ts" >> $GITHUB_OUTPUT + + - name: Create GitHub Release + id: create-release + run: | + tag="analysis-${{ steps.ts.outputs.ts }}" + name="Continuous Analysis Release (${{ steps.ts.outputs.ts }})" + + echo "Creating release: $name" + + response=$(curl -s -X POST \ + -H "Authorization: token $GH_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"tag_name\": \"$tag\", \"name\": \"$name\", \"draft\": false}" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/releases") + + upload_url=$(echo "$response" | jq -r '.upload_url' | sed 's/{?name,label}//') + echo "upload_url=$upload_url" >> $GITHUB_OUTPUT + + - name: Fetch all artifact metadata + id: fetch + run: | + echo "Fetching all artifacts..." + + page=1 + all="[]" + while true; do + response=$(curl -s -H "Authorization: token $GH_TOKEN" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100&page=$page") + + artifacts=$(echo "$response" | jq -c '.artifacts[]?') + if [[ -z "$artifacts" ]]; then break; fi + + while IFS= read -r art; do + all=$(echo "$all" | jq --argjson a "$art" '. + [$a]') + done <<< "$artifacts" + + count=$(echo "$response" | jq '.artifacts | length') + (( count < 100 )) && break + + ((page++)) + done + + echo "$all" > all_artifacts.json + echo "Saved metadata for all artifacts." + + - name: Upload matching artifacts to release + run: | + prefix="${{ inputs.prefix }}" + upload_url="${{ steps.create-release.outputs.upload_url }}" + + echo "Looking for artifacts starting with: $prefix" + echo "" + + matches=$(jq -c --arg p "$prefix" '.[] | select(.name | startswith($p))' all_artifacts.json) + + if [[ "$(echo "$matches" | wc -l)" -eq 0 ]]; then + echo "❌ No artifacts found starting with: $prefix" + exit 1 + fi + + echo "$matches" | while IFS= read -r art; do + name=$(echo "$art" | jq -r '.name') + id=$(echo "$art" | jq -r '.id') + zip="${name}.zip" + + echo "▶ Downloading artifact: $name (ID $id)" + + curl -L -s \ + -H "Authorization: token $GH_TOKEN" \ + -o "$zip" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts/${id}/zip" + + echo "⬆ Uploading $zip to release..." + + curl -s -X POST \ + -H "Authorization: token $GH_TOKEN" \ + -H "Content-Type: application/zip" \ + --data-binary @"$zip" \ + "${upload_url}?name=${zip}" + + echo "✓ Uploaded: $zip" + echo "" + done + + echo "🎉 Release upload completed successfully!" diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml index 652e9bf..fd8a1f5 100644 --- a/.github/workflows/monitor-upstream-and-analyze.yml +++ b/.github/workflows/monitor-upstream-and-analyze.yml @@ -2,13 +2,19 @@ name: Monitor Upstream and Run Analysis on: schedule: - - cron: "*/15 * * * *" + - cron: "0 */6 * * *" workflow_dispatch: inputs: - start_commit: - description: "Start commit SHA" + number_of_commits: + description: "Historical mode: analyze N previous commits (0 = continuous mode)" required: false - type: string + type: number + default: 0 + skip_commits: + description: "Skip commit pattern: process every (N+1)th commit (0 = process all)" + required: false + type: number + default: 0 permissions: actions: write @@ -18,16 +24,20 @@ permissions: jobs: monitor-upstream: runs-on: ubuntu-latest + env: # NEED TO BE CONFIGURED EACH PROJECT UPSTREAM_REPO: "CausalInferenceLab/Lang2SQL" BRANCH: "master" - RUNNER_DISPATCH_TIMEOUT: 3600 # 1 hour - FILTER_DISPATCH_TIMEOUT: 600 # 10 minutes - MAX_CONCURRENT: 7 + RUNNER_DISPATCH_TIMEOUT: 7200 # 2 hours + FILTER_DISPATCH_TIMEOUT: 1800 # 30 minutes + MAX_CONCURRENT: 8 steps: - - name: Checkout the forked repo + # -------------------------------------------------------------------- + # STEP 1 — SYNC FORK WITH UPSTREAM + # -------------------------------------------------------------------- + - name: Checkout fork uses: actions/checkout@v4 with: token: ${{ secrets.ORG_WIDE_TOKEN }} @@ -38,373 +48,319 @@ jobs: run: | set -euo pipefail cd forked-repo - echo "Syncing fork with upstream..." - - # Configure Git user name and email properly + + echo "Syncing ${BRANCH} with upstream ${UPSTREAM_REPO}..." + git config --global --add safe.directory "$PWD" git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - - # Ensure refs are current + git fetch --prune origin git remote add upstream "https://github.com/${UPSTREAM_REPO}.git" || true git fetch --prune upstream --tags - - # Check out the branch if it is not already checked out + git checkout "${BRANCH}" - - # Rebasing the branch onto the upstream branch - echo "Rebasing ${BRANCH} onto upstream/${BRANCH}..." + + echo "Rebasing local branch onto upstream/${BRANCH}..." if ! git rebase -X theirs --rebase-merges "upstream/${BRANCH}"; then - echo "Rebase failed; aborting." + echo "Rebase failed -- aborting." git rebase --abort || true exit 1 fi - - # Push only if diverged; use --force-with-lease for safety (we pushed the rebased branch to origin) - # This is to avoid conflicts when rebasing + if ! git diff --quiet "origin/${BRANCH}..HEAD"; then git push --force-with-lease origin "${BRANCH}" - echo "Pushed rebased ${BRANCH} to origin." + echo "Rebase changes pushed to origin." else - echo "No changes to push after rebase." + echo "No diverged changes; nothing to push." fi - # Change back to the root directory cd .. - - name: Clean up the workspace - run: | - rm -rf "$GITHUB_WORKSPACE/forked-repo" + - name: Cleanup forked repo + run: rm -rf forked-repo - - name: Prepare and restore cache folder + # -------------------------------------------------------------------- + # STEP 2 — LOAD LAST-SEEN SHA (or compute from history) + # -------------------------------------------------------------------- + - name: Restore analysis cache folder id: cache-folder uses: actions/cache/restore@v4 with: path: .continuous-analysis-cache key: continuous-analysis-cache-${{ github.repository }}- - - name: Create cache folder if not exists - run: | - mkdir -p .continuous-analysis-cache + - name: Ensure cache folder exists + run: mkdir -p .continuous-analysis-cache - - name: Load last seen SHA from cache folder + - name: Determine last-seen SHA id: last-sha run: | - # If start_commit is provided, set it as the last seen SHA - if [[ -n "${{ inputs.start_commit }}" ]]; then - echo "last_sha=${{ inputs.start_commit }}" >> $GITHUB_OUTPUT - echo "Start commit provided: ${{ inputs.start_commit }}" + NUMBER_OF_COMMITS="${{ inputs.number_of_commits }}" + + # --------------------------- + # Historical mode + # --------------------------- + if [[ "$NUMBER_OF_COMMITS" -gt 0 ]]; then + echo "Historical mode: computing boundary SHA for ${NUMBER_OF_COMMITS} commits." + + git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo + cd upstream-repo + git checkout "${BRANCH}" + git rev-list --first-parent "${BRANCH}" > ../linear_commits.txt + cd .. + rm -rf upstream-repo + + TOTAL=$(wc -l < linear_commits.txt) + echo "Total commits in first-parent history: $TOTAL" + + if [[ "$NUMBER_OF_COMMITS" -ge "$TOTAL" ]]; then + echo "ERROR: number_of_commits $NUMBER_OF_COMMITS exceeds available history ($TOTAL)." >&2 + exit 1 + fi + + LAST_SHA=$(sed -n "$((NUMBER_OF_COMMITS + 1))p" linear_commits.txt) + echo "Historical boundary last_sha = $LAST_SHA" + echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT exit 0 fi - # Declare the file path to the last seen SHA - FILE=".continuous-analysis-cache/last_sha.txt" + # --------------------------- + # Continuous mode + # --------------------------- + CACHE_FILE=".continuous-analysis-cache/last_sha.txt" - # Check if the file exists and load the last seen SHA - if [[ -f "$FILE" ]]; then - LAST_SHA=$(cat "$FILE") - echo "Last seen SHA found in cache: $LAST_SHA" + if [[ -f "$CACHE_FILE" ]]; then + LAST_SHA=$(cat "$CACHE_FILE") + echo "Loaded last-seen SHA from cache: $LAST_SHA" else LAST_SHA="" - echo "No last seen SHA found in cache" + echo "No last-seen SHA found; this is the first run." fi - # Output the last seen SHA to the GitHub Actions output for further use echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT - - name: Get upstream commits and find new ones + # -------------------------------------------------------------------- + # STEP 3 — FETCH COMMIT HISTORY & COMPUTE NEW COMMITS + # -------------------------------------------------------------------- + - name: Fetch first-parent commit history id: check-commits run: | - # Print the upstream repo and branch to the console - echo "Finding the latest 100 commits from the upstream repo: $UPSTREAM_REPO@$BRANCH" - - # Get the latest 100 commits from the upstream repo and save them to a JSON file - curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ - "https://api.github.com/repos/${UPSTREAM_REPO}/commits?sha=${BRANCH}&per_page=100" \ - > commits.json + echo "Fetching upstream first-parent history..." - # Parse the first commit SHA from the JSON file for sanity check - if ! jq -e '.[0].sha' commits.json > /dev/null; then - echo "Failed to parse SHA from commits.json" - exit 1 - fi + git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo + cd upstream-repo + git checkout "${BRANCH}" + git rev-list --first-parent "${BRANCH}" > ../all_commits.txt + cd .. + rm -rf upstream-repo - # Parse all commit SHAs (from newest to oldest) from the JSON file and save to all_commits.txt - jq -r '.[].sha' commits.json > all_commits.txt + echo "Total first-parent commits: $(wc -l < all_commits.txt)" - # Get the last seen SHA from the previous step LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}" - echo "Last seen SHA: $LAST_SEEN" + SKIP_COMMITS="${{ inputs.skip_commits }}" + + [[ -z "$SKIP_COMMITS" || "$SKIP_COMMITS" -lt 0 ]] && SKIP_COMMITS=0 - # If the last seen SHA is empty, select the latest commit SHA + # First-time run if [[ -z "$LAST_SEEN" ]]; then head -n 1 all_commits.txt > new_commits.txt + echo "Initial run: analyzing latest commit only: $(cat new_commits.txt)" echo "has_new_commits=true" >> $GITHUB_OUTPUT - echo "First-time run — selecting the latest commit: $(head -n 1 all_commits.txt)" - else - # If the last seen SHA is not empty, filter out previously seen commits - # Print all new commit SHAs (above the last seen SHA) to new_commits.txt - awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > new_commits.txt + exit 0 + fi - if [ ! -s new_commits.txt ]; then - echo "No new commits to process." - echo "has_new_commits=false" >> $GITHUB_OUTPUT - else - echo "New commits to process:" - echo "has_new_commits=true" >> $GITHUB_OUTPUT - cat new_commits.txt - fi + # Extract new commits above LAST_SEEN + awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > temp_new_commits.txt + + if [[ ! -s temp_new_commits.txt ]]; then + echo "No new commits since last analysis." + touch new_commits.txt + echo "has_new_commits=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # Apply skip pattern + if [[ "$SKIP_COMMITS" -eq 0 ]]; then + mv temp_new_commits.txt new_commits.txt + else + awk "NR % ($SKIP_COMMITS + 1) == 1" temp_new_commits.txt > new_commits.txt + rm temp_new_commits.txt fi + echo "has_new_commits=true" >> $GITHUB_OUTPUT + echo "New commits to analyze (total: $(wc -l < new_commits.txt)):" + cat new_commits.txt + + # -------------------------------------------------------------------- + # STEP 4 — GENERATE DISPATCH ID (ONLY FOR HISTORICAL MODE) + # -------------------------------------------------------------------- - name: Generate dispatch ID id: dispatch-id run: | - # If start_commit is provided, generate a random dispatch ID for history analysis - if [[ -n "${{ inputs.start_commit }}" ]]; then + if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then dispatch_id="$(date -u +%Y%m%dT%H%M%SZ)-$RANDOM" + echo "Historical run dispatch ID: $dispatch_id" echo "dispatch_id=$dispatch_id" >> $GITHUB_OUTPUT - echo "Generated dispatch ID: $dispatch_id" else echo "dispatch_id=" >> $GITHUB_OUTPUT - echo "No dispatch ID needed for regular runs" fi - - name: Trigger analysis workflows for new commits in parallel + # -------------------------------------------------------------------- + # STEP 5 — TRIGGER ANALYSIS WORKFLOWS (PARALLEL) + # -------------------------------------------------------------------- + - name: Run analysis workflows + if: steps.check-commits.outputs.has_new_commits == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - # Read the commits from file mapfile -t commits < new_commits.txt - - # If no new commits to process, exit - if [ ${#commits[@]} -eq 0 ]; then - echo "No new commits to process" - exit 0 - fi - - # Use the shared dispatch ID dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}" - - # Extract repository name for future usages repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2) - # Configuration MAX_CONCURRENT=${{ env.MAX_CONCURRENT }} total_commits=${#commits[@]} - echo "Processing ${total_commits} commits in batches of ${MAX_CONCURRENT}..." - # Process commits in batches + echo "Launching analysis for ${total_commits} commits..." + for ((batch_start=0; batch_start total_commits)) && batch_end=$total_commits + + echo "Processing batch $((batch_start/MAX_CONCURRENT + 1))..." - # Create arrays to track dispatched workflows and their artifacts for this batch declare -a dispatched_commits=() declare -a artifact_names=() - # Dispatch workflows for this batch for ((i=batch_start; i/dev/null || echo "") - - # Check each dispatched workflow in this batch - for i in "${!dispatched_commits[@]}"; do - commit="${dispatched_commits[$i]}" - artifact_name="${artifact_names[$i]}" - - # Skip if already completed - if [[ " ${completed_commits[@]} " =~ " ${commit} " ]]; then - continue - fi - - # Check if artifact exists - if echo "$current_artifacts" | grep -q "^${artifact_name}"; then - echo "Artifact ${artifact_name} found for commit ${commit}." - completed_commits+=("$commit") + | jq -r '.artifacts[].name') + + for idx in "${!dispatched_commits[@]}"; do + if [[ " ${done[@]} " =~ " ${dispatched_commits[$idx]} " ]]; then continue; fi + if echo "$names" | grep -q "^${artifact_names[$idx]}"; then + echo " ✓ Artifact found: ${artifact_names[$idx]}" + done+=("${dispatched_commits[$idx]}") fi done - - # Report progress for this batch - completed_count=${#completed_commits[@]} - total_count=${#dispatched_commits[@]} - echo "Batch ${batch_num} progress: ${completed_count}/${total_count} workflows completed" - - # If not all complete, wait before next check - if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then - echo "Waiting 60 seconds before next check..." - sleep 60 - fi + + [[ ${#done[@]} -lt ${#dispatched_commits[@]} ]] && sleep 60 done - # Check if all workflows in this batch completed successfully - if [ ${#completed_commits[@]} -lt ${#dispatched_commits[@]} ]; then - echo "ERROR: Timed out waiting for batch ${batch_num} workflows to complete" >&2 - echo "Completed: ${#completed_commits[@]}/${#dispatched_commits[@]}" >&2 - echo "Missing artifacts:" >&2 - for i in "${!dispatched_commits[@]}"; do - commit="${dispatched_commits[$i]}" - if [[ ! " ${completed_commits[@]} " =~ " ${commit} " ]]; then - echo " - ${artifact_names[$i]} (commit: ${commit})" >&2 - fi - done + if [[ ${#done[@]} -lt ${#dispatched_commits[@]} ]]; then + echo "ERROR: Timeout waiting for batch artifacts." exit 1 fi - - echo "Batch ${batch_num} completed successfully! (${#dispatched_commits[@]} workflows)" - - # Clear arrays for next batch - unset dispatched_commits - unset artifact_names - unset completed_commits + + echo "Batch completed." done - - echo "All ${total_commits} workflows completed successfully across all batches!" - - name: Trigger violation filter workflows for new commits in sequence + # -------------------------------------------------------------------- + # STEP 6 — TRIGGER FILTER WORKFLOWS (IN SEQUENCE) + # -------------------------------------------------------------------- + - name: Run violation filter workflows + if: steps.check-commits.outputs.has_new_commits == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - # Read the commits from file mapfile -t commits < new_commits.txt - - # If no new commits to process, exit - if [ ${#commits[@]} -eq 0 ]; then - echo "No new commits to process" - exit 0 - fi - - # Use the same dispatch_id as the analysis workflows dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}" - - # Extract repository name for future usages repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2) - # Process commits in sequence for violation filter - echo "Processing ${#commits[@]} commits for violation filter in sequence..." - - # Reverse the commits array to process from oldest to newest - reversed_commits=() + echo "Executing violation filtering sequentially..." + + reversed=() for ((i=${#commits[@]}-1; i>=0; i--)); do - reversed_commits+=("${commits[$i]}") + reversed+=("${commits[$i]}") done - # Process each commit in sequence - for i in "${!reversed_commits[@]}"; do - current_commit="${reversed_commits[$i]}" - - # For the first commit (oldest), previous_commit is empty - # For subsequent commits, previous_commit is the previous one in the sequence - if [ $i -eq 0 ]; then - previous_commit="" - echo "Processing first commit: $current_commit (no previous commit)" - else - previous_commit="${reversed_commits[$i-1]}" - echo "Processing commit: $current_commit (previous: $previous_commit)" - fi + for i in "${!reversed[@]}"; do + current="${reversed[$i]}" + previous="" + ((i > 0)) && previous="${reversed[$i-1]}" - # Generate artifact name to detect completion - if [[ -n "${{ inputs.start_commit }}" ]]; then - artifact_name="continuous-analysis-history-results-${dispatch_id}-${repo_name}-${current_commit}-filtered" + echo "Filtering: $current (prev: $previous)" + + if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then + artifact="continuous-analysis-history-filtered-results-${dispatch_id}-${repo_name}-${current}-${{ inputs.skip_commits }}" else - artifact_name="continuous-analysis-results-${repo_name}-${current_commit}-filtered" + artifact="continuous-analysis-filtered-results-${repo_name}-${current}" fi - # Trigger the violation filter workflow gh workflow run run-filter.yml \ --repo "${GITHUB_REPOSITORY}" \ - --ref "${{ env.BRANCH }}" \ - --field current_commit="$current_commit" \ - --field previous_commit="$previous_commit" \ - --field dispatch_id="$dispatch_id" - - # Wait for this workflow to complete - echo "Waiting for violation filter workflow to complete for commit: $current_commit" - artifact_created=false - - # Set the end time for the timeout + --ref "${BRANCH}" \ + --field current_commit="$current" \ + --field previous_commit="$previous" \ + --field dispatch_id="$dispatch_id" \ + --field skip_commits_pattern="${{ inputs.skip_commits }}" + end_time=$(( $(date +%s) + FILTER_DISPATCH_TIMEOUT )) - - # Wait for the artifact to be created - while [ "$artifact_created" = false ] && [ "$(date +%s)" -lt "$end_time" ]; do - if curl -s -H "Authorization: token $GH_TOKEN" \ + created=false + + while ! $created && [[ $(date +%s) -lt $end_time ]]; do + names=$(curl -s -H "Authorization: token $GH_TOKEN" \ "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \ - | jq -r '.artifacts[].name' 2>/dev/null | grep -q "^${artifact_name}"; then - artifact_created=true - echo "Violation filter artifact ${artifact_name} found for commit ${current_commit}." + | jq -r '.artifacts[].name') + if echo "$names" | grep -q "^${artifact}"; then + echo " ✓ Filter artifact: ${artifact}" + created=true break fi - echo "Violation filter artifact not found yet, waiting 30 seconds..." sleep 30 done - # Check if the workflow completed successfully - if [ "$artifact_created" = false ]; then - echo "ERROR: Timed out waiting for violation filter artifact ${artifact_name} for commit ${current_commit}" >&2 + if ! $created; then + echo "ERROR: Timeout waiting for filter artifact: ${artifact}" exit 1 fi done - - echo "All ${#commits[@]} violation filter workflows completed successfully!" - - name: Update SHA cache - if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == '' + # -------------------------------------------------------------------- + # STEP 7 — UPDATE SHA CACHE (Continuous mode only) + # -------------------------------------------------------------------- + - name: Update last-seen SHA cache + if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.number_of_commits == 0 run: | NEWEST=$(head -n 1 new_commits.txt) echo "$NEWEST" > .continuous-analysis-cache/last_sha.txt + echo "Updated cache: last_sha = $NEWEST" - name: Generate timestamp - if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == '' + if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.number_of_commits == 0 id: timestamp run: | ts=$(date +'%Y%m%d-%H%M') echo "ts=$ts" >> "$GITHUB_OUTPUT" - echo "Generated timestamp: $ts" - - name: Save updated SHA cache with timestamp - if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.start_commit == '' + - name: Save updated cache + if: steps.check-commits.outputs.has_new_commits == 'true' && inputs.number_of_commits == 0 uses: actions/cache/save@v4 with: path: .continuous-analysis-cache diff --git a/.github/workflows/monitor-upstream.yml b/.github/workflows/monitor-upstream.yml new file mode 100644 index 0000000..cc23aa6 --- /dev/null +++ b/.github/workflows/monitor-upstream.yml @@ -0,0 +1,300 @@ +name: Monitor Upstream Repository + +on: + workflow_dispatch: + inputs: + number_of_commits: + description: "Historical mode: analyze N previous commits (0 = continuous mode)" + required: true + type: number + skip_commits: + description: "Skip commit pattern: process every (N+1)th commit (0 = process all)" + required: false + type: number + default: 0 + +permissions: + actions: write + contents: write + issues: write + +jobs: + monitor-upstream: + runs-on: ubuntu-latest + + env: + # NEED TO BE CONFIGURED EACH PROJECT + UPSTREAM_REPO: "CausalInferenceLab/Lang2SQL" + BRANCH: "master" + RUNNER_DISPATCH_TIMEOUT: 7200 # 2 hours + MAX_CONCURRENT: 9 + + steps: + # -------------------------------------------------------------------- + # STEP 1 — COMPUTE LAST-SEEN SHA BASED ON NUMBER OF COMMITS + # -------------------------------------------------------------------- + - name: Determine last-seen SHA + id: last-sha + run: | + # Set error handling + set -euo pipefail + + # Get the number of commits to analyze + NUMBER_OF_COMMITS="${{ inputs.number_of_commits }}" + echo "Historical mode: computing boundary SHA for ${NUMBER_OF_COMMITS} commits." + + # Clone the upstream repository + git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo + cd upstream-repo + git checkout "${BRANCH}" + git rev-list --first-parent "${BRANCH}" > ../linear_commits.txt + cd .. + rm -rf upstream-repo + + # Get the total number of commits in the first-parent history + TOTAL=$(wc -l < linear_commits.txt) + echo "Total commits in first-parent history: $TOTAL" + + # Check if the number of commits to analyze is greater than 0 + if [[ "$NUMBER_OF_COMMITS" -le 0 ]]; then + echo "ERROR: number_of_commits $NUMBER_OF_COMMITS is less than or equal to 0." >&2 + exit 1 + fi + + # Check if the number of commits to analyze exceeds the total number of commits in the first-parent history + if [[ "$NUMBER_OF_COMMITS" -ge "$TOTAL" ]]; then + echo "ERROR: number_of_commits $NUMBER_OF_COMMITS exceeds available history ($TOTAL)." >&2 + exit 1 + fi + + # Extract the last seen SHA based on the number of commits to analyze + LAST_SHA=$(sed -n "$((NUMBER_OF_COMMITS + 1))p" linear_commits.txt) + echo "Historical boundary last_sha = $LAST_SHA" + echo "last_sha=$LAST_SHA" >> $GITHUB_OUTPUT + + # -------------------------------------------------------------------- + # STEP 2 — FETCH COMMIT HISTORY & COMPUTE NEW COMMITS + # -------------------------------------------------------------------- + - name: Fetch first-parent commit history + id: check-commits + run: | + echo "Fetching upstream first-parent history..." + + git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo + cd upstream-repo + git checkout "${BRANCH}" + git rev-list --first-parent "${BRANCH}" > ../all_commits.txt + cd .. + rm -rf upstream-repo + + echo "Total first-parent commits: $(wc -l < all_commits.txt)" + + LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}" + SKIP_COMMITS="${{ inputs.skip_commits }}" + + [[ -z "$SKIP_COMMITS" || "$SKIP_COMMITS" -lt 0 ]] && SKIP_COMMITS=0 + + # Extract new commits above LAST_SEEN + awk -v sha="$LAST_SEEN" '$0 ~ sha {exit} {print}' all_commits.txt > temp_new_commits.txt + + # Apply skip pattern if skip_commits is provided + if [[ "$SKIP_COMMITS" -eq 0 ]]; then + mv temp_new_commits.txt new_commits.txt + else + awk "NR % ($SKIP_COMMITS + 1) == 1" temp_new_commits.txt > new_commits.txt + rm temp_new_commits.txt + fi + + # Print the new commits to be analyzed + echo "New commits to be analyzed (total: $(wc -l < new_commits.txt)):" + cat new_commits.txt + + # -------------------------------------------------------------------- + # STEP 3 — GENERATE DISPATCH ID + # -------------------------------------------------------------------- + - name: Generate dispatch ID + id: dispatch-id + run: | + dispatch_id="$(date -u +%Y%m%dT%H%M%SZ)-$RANDOM" + echo "Dispatch ID: $dispatch_id" + echo "dispatch_id=$dispatch_id" >> $GITHUB_OUTPUT + + # -------------------------------------------------------------------- + # STEP 4 — RUN ANALYSIS (PARALLEL) AND COLLECT ARTIFACTS + # -------------------------------------------------------------------- + - name: Run analysis workflows (parallel) and collect artifacts + id: run-analysis + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + + mapfile -t commits < new_commits.txt + dispatch_id="${{ steps.dispatch-id.outputs.dispatch_id }}" + repo_name=$(echo "${GITHUB_REPOSITORY}" | cut -d'/' -f2) + MAX_CONCURRENT=${{ env.MAX_CONCURRENT }} + total_commits=${#commits[@]} + + echo "Launching analysis for ${total_commits} commits..." + echo "" > all_expected_artifacts.txt # all expected artifacts saved here + + # ---------------------------------------- + # DISPATCH ANALYSIS WORKFLOWS IN BATCHES + # ---------------------------------------- + for ((batch_start=0; batch_start total_commits)) && batch_end=$total_commits + + echo "Processing batch $((batch_start/MAX_CONCURRENT + 1))..." + + declare -a dispatched_commits=() + declare -a artifact_names=() + + # Dispatch each workflow in this batch + for ((i=batch_start; i> all_expected_artifacts.txt + + gh workflow run run-analysis.yml \ + --repo "${GITHUB_REPOSITORY}" \ + --ref "${BRANCH}" \ + --field commit="$commit" \ + --field dispatch_id="$dispatch_id" + + dispatched_commits+=("$commit") + artifact_names+=("$artifact_name") + done + + # ----------------------------- + # WAIT FOR THIS BATCH TO FINISH + # ----------------------------- + echo "Waiting for batch artifacts..." + + end_time=$(( $(date +%s) + RUNNER_DISPATCH_TIMEOUT )) + declare -a done=() + + while [[ ${#done[@]} -lt ${#dispatched_commits[@]} && $(date +%s) -lt $end_time ]]; do + artifact_list=$(curl -s -H "Authorization: token $GH_TOKEN" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100" \ + | jq -r '.artifacts[].name') + + for idx in "${!artifact_names[@]}"; do + if [[ " ${done[@]} " =~ " ${artifact_names[$idx]} " ]]; then continue; fi + if echo "$artifact_list" | grep -q "^${artifact_names[$idx]}$"; then + echo " ✓ Artifact found: ${artifact_names[$idx]}" + done+=("${artifact_names[$idx]}") + fi + done + + [[ ${#done[@]} -lt ${#artifact_names[@]} ]] && sleep 60 + done + + if [[ ${#done[@]} -lt ${#artifact_names[@]} ]]; then + echo "ERROR: Timeout while waiting for batch artifacts." + exit 1 + fi + + echo "Batch completed." + done + + echo "All analysis workflows completed." + + # -------------------------------------------------------------------- + # STEP 5 — CREATE ONE RELEASE AND UPLOAD ALL ARTIFACTS + # -------------------------------------------------------------------- + - name: Create release and upload artifacts + id: make-release + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + + # Helper function to fetch all artifacts across all pages + fetch_all_artifacts() { + local page=1 + local all_artifacts_array="[]" + while true; do + local response=$(curl -s -H "Authorization: token $GH_TOKEN" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts?per_page=100&page=${page}") + local page_artifacts=$(echo "$response" | jq -c '.artifacts[]') + if [[ -z "$page_artifacts" ]]; then + break + fi + # Merge this page's artifacts into the array + while IFS= read -r artifact; do + all_artifacts_array=$(echo "$all_artifacts_array" | jq --argjson art "$artifact" '. + [$art]') + done <<< "$page_artifacts" + local per_page=$(echo "$response" | jq -r '.artifacts | length') + if [[ $per_page -lt 100 ]]; then + break + fi + ((page++)) + done + echo "$all_artifacts_array" + } + + # Release naming (one release per workflow run) + run_timestamp="$(date -u +%Y%m%dT%H%M%SZ)" + release_tag="analysis-${run_timestamp}" + release_name="Continuous Analysis Run ${run_timestamp}" + + echo "Creating release: $release_name" + + # Create release + api_response=$(curl -s -X POST \ + -H "Authorization: token $GH_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"tag_name\": \"${release_tag}\", \"name\": \"${release_name}\", \"draft\": false, \"prerelease\": false}" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/releases") + + upload_url=$(echo "$api_response" | jq -r '.upload_url' | sed 's/{?name,label}//') + + echo "Upload URL: $upload_url" + + echo "Fetching all artifacts (this may take a moment for large runs)..." + all_artifacts_data=$(fetch_all_artifacts) + + echo "Uploading artifacts..." + + # For each expected artifact, download it & upload it + while IFS= read -r artifact_name; do + echo " Processing artifact: $artifact_name" + + # Query artifact metadata from the fetched data + artifact_info=$(echo "$all_artifacts_data" | jq -c --arg NAME "$artifact_name" '[.[] | select(.name == $NAME)][0]') + + if [[ -z "$artifact_info" || "$artifact_info" == "null" ]]; then + echo " WARNING: Artifact not found: $artifact_name" + continue + fi + + artifact_id=$(echo "$artifact_info" | jq -r '.id') + zip_name="${artifact_name}.zip" + + echo " Downloading artifact ID $artifact_id --> $zip_name" + + # Download ZIP + curl -L -s \ + -H "Authorization: token $GH_TOKEN" \ + -o "$zip_name" \ + "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/artifacts/${artifact_id}/zip" + + echo " Uploading $zip_name to release..." + + # Upload ZIP to release + curl -s -X POST \ + -H "Authorization: token $GH_TOKEN" \ + -H "Content-Type: application/zip" \ + --data-binary @"$zip_name" \ + "${upload_url}?name=${zip_name}" + + echo " ✓ Uploaded: $zip_name" + + done < all_expected_artifacts.txt + + echo "Release completed successfully." diff --git a/.github/workflows/run-filter.yml b/.github/workflows/run-filter.yml index 7519390..ba2a56b 100644 --- a/.github/workflows/run-filter.yml +++ b/.github/workflows/run-filter.yml @@ -15,6 +15,10 @@ on: description: "Unique id from dispatcher for history runnings" required: false type: string + skip_commits_pattern: + description: "Number of commits to skip between processing (skip x commits pattern)" + required: false + type: number permissions: actions: read @@ -29,5 +33,6 @@ jobs: current_commit: ${{ inputs.current_commit }} previous_commit: ${{ inputs.previous_commit }} dispatch_id: ${{ inputs.dispatch_id }} + skip_commits_pattern: ${{ inputs.skip_commits_pattern }} secrets: ORG_WIDE_TOKEN: ${{ secrets.ORG_WIDE_TOKEN }} diff --git a/.github/workflows/set-cache-sha.yml b/.github/workflows/set-cache-sha.yml new file mode 100644 index 0000000..aee3ec8 --- /dev/null +++ b/.github/workflows/set-cache-sha.yml @@ -0,0 +1,49 @@ +name: Set Cache SHA + +on: + workflow_dispatch: + inputs: + commit_sha: + description: "Commit SHA to set in cache" + required: true + type: string + +permissions: + contents: write + +jobs: + set-cache-sha: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Validate and set commit SHA + run: | + COMMIT_SHA="${{ inputs.commit_sha }}" + + # Validate format + if [[ ! "$COMMIT_SHA" =~ ^[a-f0-9]{7,40}$ ]]; then + echo "Invalid commit SHA format" + exit 1 + fi + + # Create cache directory and set SHA + mkdir -p .continuous-analysis-cache + echo "$COMMIT_SHA" > .continuous-analysis-cache/last_sha.txt + + echo "Set cache SHA: $COMMIT_SHA" + + - name: Generate timestamp + id: timestamp + run: | + ts=$(date +'%Y%m%d-%H%M') + echo "ts=$ts" >> "$GITHUB_OUTPUT" + echo "Generated timestamp: $ts" + + - name: Save updated SHA cache with timestamp + uses: actions/cache/save@v4 + with: + path: .continuous-analysis-cache + key: continuous-analysis-cache-${{ github.repository }}-${{ steps.timestamp.outputs.ts }} From e7d4ea3ad127edddb6efdbab76dc87f0532963f5 Mon Sep 17 00:00:00 2001 From: Stephen Shen Date: Sun, 28 Dec 2025 13:54:09 -0500 Subject: [PATCH 3/4] Improve workflows --- .../monitor-upstream-and-analyze.yml | 104 +++++++++++++----- 1 file changed, 79 insertions(+), 25 deletions(-) diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml index fd8a1f5..be207ae 100644 --- a/.github/workflows/monitor-upstream-and-analyze.yml +++ b/.github/workflows/monitor-upstream-and-analyze.yml @@ -104,10 +104,11 @@ jobs: if [[ "$NUMBER_OF_COMMITS" -gt 0 ]]; then echo "Historical mode: computing boundary SHA for ${NUMBER_OF_COMMITS} commits." - git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo + # Clone the upstream repository + git clone "https://github.com/${UPSTREAM_REPO}.git" upstream-repo cd upstream-repo git checkout "${BRANCH}" - git rev-list --first-parent "${BRANCH}" > ../linear_commits.txt + git log --no-merges --name-status | grep 'py\|^commit' | grep -B1 'py$' | grep ^commit | cut -d ' ' -f 2 > ../linear_commits.txt cd .. rm -rf upstream-repo @@ -143,19 +144,19 @@ jobs: # -------------------------------------------------------------------- # STEP 3 — FETCH COMMIT HISTORY & COMPUTE NEW COMMITS # -------------------------------------------------------------------- - - name: Fetch first-parent commit history + - name: Fetch commit history with python files changed and no merge commits id: check-commits run: | - echo "Fetching upstream first-parent history..." + echo "Fetching upstream commit history with python files changed and no merge commits..." - git clone --depth=100000 "https://github.com/${UPSTREAM_REPO}.git" upstream-repo + git clone "https://github.com/${UPSTREAM_REPO}.git" upstream-repo cd upstream-repo git checkout "${BRANCH}" - git rev-list --first-parent "${BRANCH}" > ../all_commits.txt + git log --no-merges --name-status | grep 'py\|^commit' | grep -B1 'py$' | grep ^commit | cut -d ' ' -f 2 > ../all_commits.txt cd .. rm -rf upstream-repo - echo "Total first-parent commits: $(wc -l < all_commits.txt)" + echo "Total commits with python files changed and no merge commits: $(wc -l < all_commits.txt)" LAST_SEEN="${{ steps.last-sha.outputs.last_sha }}" SKIP_COMMITS="${{ inputs.skip_commits }}" @@ -221,27 +222,75 @@ jobs: MAX_CONCURRENT=${{ env.MAX_CONCURRENT }} total_commits=${#commits[@]} - echo "Launching analysis for ${total_commits} commits..." + echo "Checking ${total_commits} commits for existing artifacts..." - for ((batch_start=0; batch_start total_commits)) && batch_end=$total_commits + ((batch_end > total_to_process)) && batch_end=$total_to_process echo "Processing batch $((batch_start/MAX_CONCURRENT + 1))..." declare -a dispatched_commits=() - declare -a artifact_names=() + declare -a dispatched_artifacts=() for ((i=batch_start; i 0)) && previous="${reversed[$i-1]}" + if [[ $i -eq 0 ]]; then + previous="$LAST_SEEN" + else + previous="${reversed[$i-1]}" + fi echo "Filtering: $current (prev: $previous)" if [[ "${{ inputs.number_of_commits }}" -gt 0 ]]; then artifact="continuous-analysis-history-filtered-results-${dispatch_id}-${repo_name}-${current}-${{ inputs.skip_commits }}" else - artifact="continuous-analysis-filtered-results-${repo_name}-${current}" + artifact="continuous-analysis-future-filtered-results-${repo_name}-${current}" fi gh workflow run run-filter.yml \ From 2499074d75eeb973a9f29fef6ca736e51dce9977 Mon Sep 17 00:00:00 2001 From: Zhuohang Shen <18962118885@163.com> Date: Sun, 28 Dec 2025 14:42:50 -0500 Subject: [PATCH 4/4] Reduce max concurrent number --- .github/workflows/monitor-upstream-and-analyze.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/monitor-upstream-and-analyze.yml b/.github/workflows/monitor-upstream-and-analyze.yml index be207ae..d1893fe 100644 --- a/.github/workflows/monitor-upstream-and-analyze.yml +++ b/.github/workflows/monitor-upstream-and-analyze.yml @@ -31,7 +31,7 @@ jobs: BRANCH: "master" RUNNER_DISPATCH_TIMEOUT: 7200 # 2 hours FILTER_DISPATCH_TIMEOUT: 1800 # 30 minutes - MAX_CONCURRENT: 8 + MAX_CONCURRENT: 1 steps: # --------------------------------------------------------------------