diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8b83cb41c79a..2bb8e1d69217 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -9,27 +9,31 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
- - uses: astral-sh/setup-uv@v6
+ - run: sudo apt-get update && sudo apt-get install -y libhdf5-dev
+ - uses: actions/checkout@v6
+ - uses: astral-sh/setup-uv@v7
with:
enable-cache: true
cache-dependency-glob: uv.lock
- - uses: actions/setup-python@v5
+ - uses: actions/setup-python@v6
with:
- python-version: 3.13
+ python-version: 3.14
allow-prereleases: true
- run: uv sync --group=test
- name: Run tests
# TODO: #8818 Re-enable quantum tests
- run: uv run pytest
+ run: uv run --with=pytest-run-parallel pytest
+ --iterations=8 --parallel-threads=auto
--ignore=computer_vision/cnn_classification.py
--ignore=docs/conf.py
--ignore=dynamic_programming/k_means_clustering_tensorflow.py
+ --ignore=machine_learning/local_weighted_learning/local_weighted_learning.py
--ignore=machine_learning/lstm/lstm_prediction.py
--ignore=neural_network/input_data.py
--ignore=project_euler/
--ignore=quantum/q_fourier_transform.py
--ignore=scripts/validate_solutions.py
+ --ignore=web_programming/current_stock_price.py
--ignore=web_programming/fetch_anime_and_play.py
--cov-report=term-missing:skip-covered
--cov=. .
diff --git a/.github/workflows/devcontainer_ci.yml b/.github/workflows/devcontainer_ci.yml
index c0b26bb77da6..d1b81593866f 100644
--- a/.github/workflows/devcontainer_ci.yml
+++ b/.github/workflows/devcontainer_ci.yml
@@ -12,7 +12,7 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- uses: devcontainers/ci@v0.3
with:
push: never
diff --git a/.github/workflows/directory_writer.yml b/.github/workflows/directory_writer.yml
index 3edb5c91a951..deffbe9e364f 100644
--- a/.github/workflows/directory_writer.yml
+++ b/.github/workflows/directory_writer.yml
@@ -6,12 +6,13 @@ jobs:
directory_writer:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
fetch-depth: 0
- - uses: actions/setup-python@v5
+ - uses: actions/setup-python@v6
with:
- python-version: 3.x
+ python-version: 3.14
+ allow-prereleases: true
- name: Write DIRECTORY.md
run: |
scripts/build_directory_md.py 2>&1 | tee DIRECTORY.md
diff --git a/.github/workflows/project_euler.yml b/.github/workflows/project_euler.yml
index eaf4150e4eaa..591b2163cc1a 100644
--- a/.github/workflows/project_euler.yml
+++ b/.github/workflows/project_euler.yml
@@ -14,21 +14,37 @@ jobs:
project-euler:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
- - uses: astral-sh/setup-uv@v6
- - uses: actions/setup-python@v5
+ - run:
+ sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
+ zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
+ libharfbuzz-dev libfribidi-dev libxcb1-dev
+ libxml2-dev libxslt-dev
+ libhdf5-dev
+ libopenblas-dev
+ - uses: actions/checkout@v6
+ - uses: astral-sh/setup-uv@v7
+ - uses: actions/setup-python@v6
with:
- python-version: 3.x
+ python-version: 3.14
+ allow-prereleases: true
- run: uv sync --group=euler-validate --group=test
- run: uv run pytest --doctest-modules --cov-report=term-missing:skip-covered --cov=project_euler/ project_euler/
validate-solutions:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
- - uses: astral-sh/setup-uv@v6
- - uses: actions/setup-python@v5
+ - run:
+ sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
+ zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
+ libharfbuzz-dev libfribidi-dev libxcb1-dev
+ libxml2-dev libxslt-dev
+ libhdf5-dev
+ libopenblas-dev
+ - uses: actions/checkout@v6
+ - uses: astral-sh/setup-uv@v7
+ - uses: actions/setup-python@v6
with:
- python-version: 3.x
+ python-version: 3.14
+ allow-prereleases: true
- run: uv sync --group=euler-validate --group=test
- run: uv run pytest scripts/validate_solutions.py
env:
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index ec9f0202bd7e..13df19c8d743 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -11,6 +11,6 @@ jobs:
ruff:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
- - uses: astral-sh/setup-uv@v6
+ - uses: actions/checkout@v6
+ - uses: astral-sh/setup-uv@v7
- run: uvx ruff check --output-format=github .
diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml
index 2010041d80c5..bf0a74a239c8 100644
--- a/.github/workflows/sphinx.yml
+++ b/.github/workflows/sphinx.yml
@@ -25,16 +25,23 @@ jobs:
build_docs:
runs-on: ubuntu-24.04-arm
steps:
- - uses: actions/checkout@v4
- - uses: astral-sh/setup-uv@v6
- - uses: actions/setup-python@v5
+ - run:
+ sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
+ zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
+ libharfbuzz-dev libfribidi-dev libxcb1-dev
+ libxml2-dev libxslt-dev
+ libhdf5-dev
+ libopenblas-dev
+ - uses: actions/checkout@v6
+ - uses: astral-sh/setup-uv@v7
+ - uses: actions/setup-python@v6
with:
- python-version: 3.13
+ python-version: 3.14
allow-prereleases: true
- run: uv sync --group=docs
- uses: actions/configure-pages@v5
- run: uv run sphinx-build -c docs . docs/_build/html
- - uses: actions/upload-pages-artifact@v3
+ - uses: actions/upload-pages-artifact@v4
with:
path: docs/_build/html
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f733908c2987..57f92ce941d9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,9 @@
+ci:
+ autoupdate_schedule: monthly
+
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v5.0.0
+ rev: v6.0.0
hooks:
- id: check-executables-have-shebangs
- id: check-toml
@@ -16,7 +19,7 @@ repos:
- id: auto-walrus
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.12.4
+ rev: v0.14.7
hooks:
- id: ruff-check
- id: ruff-format
@@ -29,7 +32,7 @@ repos:
- tomli
- repo: https://github.com/tox-dev/pyproject-fmt
- rev: v2.6.0
+ rev: v2.11.1
hooks:
- id: pyproject-fmt
@@ -47,7 +50,7 @@ repos:
- id: validate-pyproject
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: v1.15.0
+ rev: v1.19.0
hooks:
- id: mypy
args:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3df39f95b784..35de0bf75ed5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -99,7 +99,7 @@ We want your work to be readable by others; therefore, we encourage you to note
ruff check
```
-- Original code submission require docstrings or comments to describe your work.
+- Original code submissions require docstrings or comments to describe your work.
- More on docstrings and comments:
diff --git a/DIRECTORY.md b/DIRECTORY.md
index 81d6f4c70864..0f9859577493 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -12,6 +12,7 @@
* [Combination Sum](backtracking/combination_sum.py)
* [Crossword Puzzle Solver](backtracking/crossword_puzzle_solver.py)
* [Generate Parentheses](backtracking/generate_parentheses.py)
+ * [Generate Parentheses Iterative](backtracking/generate_parentheses_iterative.py)
* [Hamiltonian Cycle](backtracking/hamiltonian_cycle.py)
* [Knight Tour](backtracking/knight_tour.py)
* [Match Word Pattern](backtracking/match_word_pattern.py)
@@ -174,6 +175,7 @@
## Data Compression
* [Burrows Wheeler](data_compression/burrows_wheeler.py)
+ * [Coordinate Compression](data_compression/coordinate_compression.py)
* [Huffman](data_compression/huffman.py)
* [Lempel Ziv](data_compression/lempel_ziv.py)
* [Lempel Ziv Decompress](data_compression/lempel_ziv_decompress.py)
@@ -193,6 +195,7 @@
* [Permutations](data_structures/arrays/permutations.py)
* [Prefix Sum](data_structures/arrays/prefix_sum.py)
* [Product Sum](data_structures/arrays/product_sum.py)
+ * [Rotate Array](data_structures/arrays/rotate_array.py)
* [Sparse Table](data_structures/arrays/sparse_table.py)
* [Sudoku Solver](data_structures/arrays/sudoku_solver.py)
* Binary Tree
@@ -621,6 +624,7 @@
* [Sequential Minimum Optimization](machine_learning/sequential_minimum_optimization.py)
* [Similarity Search](machine_learning/similarity_search.py)
* [Support Vector Machines](machine_learning/support_vector_machines.py)
+ * [T Stochastic Neighbour Embedding](machine_learning/t_stochastic_neighbour_embedding.py)
* [Word Frequency Functions](machine_learning/word_frequency_functions.py)
* [Xgboost Classifier](machine_learning/xgboost_classifier.py)
* [Xgboost Regressor](machine_learning/xgboost_regressor.py)
@@ -723,6 +727,7 @@
* [Secant Method](maths/numerical_analysis/secant_method.py)
* [Simpson Rule](maths/numerical_analysis/simpson_rule.py)
* [Square Root](maths/numerical_analysis/square_root.py)
+ * [Weierstrass Method](maths/numerical_analysis/weierstrass_method.py)
* [Odd Sieve](maths/odd_sieve.py)
* [Perfect Cube](maths/perfect_cube.py)
* [Perfect Number](maths/perfect_number.py)
@@ -956,6 +961,7 @@
* [Sol1](project_euler/problem_009/sol1.py)
* [Sol2](project_euler/problem_009/sol2.py)
* [Sol3](project_euler/problem_009/sol3.py)
+ * [Sol4](project_euler/problem_009/sol4.py)
* Problem 010
* [Sol1](project_euler/problem_010/sol1.py)
* [Sol2](project_euler/problem_010/sol2.py)
@@ -1266,6 +1272,7 @@
* [Comb Sort](sorts/comb_sort.py)
* [Counting Sort](sorts/counting_sort.py)
* [Cycle Sort](sorts/cycle_sort.py)
+ * [Cyclic Sort](sorts/cyclic_sort.py)
* [Double Sort](sorts/double_sort.py)
* [Dutch National Flag Sort](sorts/dutch_national_flag_sort.py)
* [Exchange Sort](sorts/exchange_sort.py)
@@ -1296,6 +1303,7 @@
* [Shell Sort](sorts/shell_sort.py)
* [Shrink Shell Sort](sorts/shrink_shell_sort.py)
* [Slowsort](sorts/slowsort.py)
+ * [Stalin Sort](sorts/stalin_sort.py)
* [Stooge Sort](sorts/stooge_sort.py)
* [Strand Sort](sorts/strand_sort.py)
* [Tim Sort](sorts/tim_sort.py)
diff --git a/README.md b/README.md
index d8eba4e016fa..182d36a8d905 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@
+
@@ -19,6 +20,7 @@
+
@@ -27,23 +29,24 @@
-
-
+
+
+
- All algorithms implemented in Python - for education
+ All algorithms implemented in Python - for education 📚
Implementations are for learning purposes only. They may be less efficient than the implementations in the Python standard library. Use them at your discretion.
-## Getting Started
+## 🚀 Getting Started
-Read through our [Contribution Guidelines](CONTRIBUTING.md) before you contribute.
+📋 Read through our [Contribution Guidelines](CONTRIBUTING.md) before you contribute.
-## Community Channels
+## 🌐 Community Channels
We are on [Discord](https://the-algorithms.com/discord) and [Gitter](https://gitter.im/TheAlgorithms/community)! Community channels are a great way for you to ask questions and get help. Please join us!
-## List of Algorithms
+## 📜 List of Algorithms
See our [directory](DIRECTORY.md) for easier navigation and a better overview of the project.
diff --git a/backtracking/combination_sum.py b/backtracking/combination_sum.py
index 3c6ed81f44f0..3d954f11d2c5 100644
--- a/backtracking/combination_sum.py
+++ b/backtracking/combination_sum.py
@@ -47,8 +47,18 @@ def combination_sum(candidates: list, target: int) -> list:
>>> combination_sum([-8, 2.3, 0], 1)
Traceback (most recent call last):
...
- RecursionError: maximum recursion depth exceeded
+ ValueError: All elements in candidates must be non-negative
+ >>> combination_sum([], 1)
+ Traceback (most recent call last):
+ ...
+ ValueError: Candidates list should not be empty
"""
+ if not candidates:
+ raise ValueError("Candidates list should not be empty")
+
+ if any(x < 0 for x in candidates):
+ raise ValueError("All elements in candidates must be non-negative")
+
path = [] # type: list[int]
answer = [] # type: list[int]
backtrack(candidates, path, answer, target, 0)
diff --git a/backtracking/generate_parentheses_iterative.py b/backtracking/generate_parentheses_iterative.py
new file mode 100644
index 000000000000..175941c7ae95
--- /dev/null
+++ b/backtracking/generate_parentheses_iterative.py
@@ -0,0 +1,62 @@
+def generate_parentheses_iterative(length: int) -> list:
+ """
+ Generate all valid combinations of parentheses (Iterative Approach).
+
+ The algorithm works as follows:
+ 1. Initialize an empty list to store the combinations.
+ 2. Initialize a stack to keep track of partial combinations.
+ 3. Start with empty string and push it onstack along with the counts of '(' and ')'.
+ 4. While the stack is not empty:
+ a. Pop a partial combination and its open and close counts from the stack.
+ b. If the combination length is equal to 2*length, add it to the result.
+ c. If open count < length, push new combination with added '(' on stack.
+ d. If close count < open count, push new combination with added ')' on stack.
+ 5. Return the result containing all valid combinations.
+
+ Args:
+ length: The desired length of the parentheses combinations
+
+ Returns:
+ A list of strings representing valid combinations of parentheses
+
+ Time Complexity:
+ O(2^(2*length))
+
+ Space Complexity:
+ O(2^(2*length))
+
+ >>> generate_parentheses_iterative(3)
+ ['()()()', '()(())', '(())()', '(()())', '((()))']
+ >>> generate_parentheses_iterative(2)
+ ['()()', '(())']
+ >>> generate_parentheses_iterative(1)
+ ['()']
+ >>> generate_parentheses_iterative(0)
+ ['']
+ """
+ result = []
+ stack = []
+
+ # Each element in stack is a tuple (current_combination, open_count, close_count)
+ stack.append(("", 0, 0))
+
+ while stack:
+ current_combination, open_count, close_count = stack.pop()
+
+ if len(current_combination) == 2 * length:
+ result.append(current_combination)
+
+ if open_count < length:
+ stack.append((current_combination + "(", open_count + 1, close_count))
+
+ if close_count < open_count:
+ stack.append((current_combination + ")", open_count, close_count + 1))
+
+ return result
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
+ print(generate_parentheses_iterative(3))
diff --git a/bit_manipulation/reverse_bits.py b/bit_manipulation/reverse_bits.py
index 74b4f2563234..4a0b2ff7047a 100644
--- a/bit_manipulation/reverse_bits.py
+++ b/bit_manipulation/reverse_bits.py
@@ -1,6 +1,6 @@
def get_reverse_bit_string(number: int) -> str:
"""
- return the bit string of an integer
+ Return the reverse bit string of a 32 bit integer
>>> get_reverse_bit_string(9)
'10010000000000000000000000000000'
@@ -8,76 +8,76 @@ def get_reverse_bit_string(number: int) -> str:
'11010100000000000000000000000000'
>>> get_reverse_bit_string(2873)
'10011100110100000000000000000000'
+ >>> get_reverse_bit_string(2550136832)
+ '00000000000000000000000000011001'
>>> get_reverse_bit_string("this is not a number")
Traceback (most recent call last):
...
- TypeError: operation can not be conducted on a object of type str
+ TypeError: operation can not be conducted on an object of type str
"""
if not isinstance(number, int):
msg = (
- "operation can not be conducted on a object of type "
+ "operation can not be conducted on an object of type "
f"{type(number).__name__}"
)
raise TypeError(msg)
bit_string = ""
for _ in range(32):
bit_string += str(number % 2)
- number = number >> 1
+ number >>= 1
return bit_string
-def reverse_bit(number: int) -> str:
+def reverse_bit(number: int) -> int:
"""
- Take in an 32 bit integer, reverse its bits,
- return a string of reverse bits
-
- result of a reverse_bit and operation on the integer provided.
+ Take in a 32 bit integer, reverse its bits, return a 32 bit integer result
>>> reverse_bit(25)
- '00000000000000000000000000011001'
+ 2550136832
>>> reverse_bit(37)
- '00000000000000000000000000100101'
+ 2751463424
>>> reverse_bit(21)
- '00000000000000000000000000010101'
+ 2818572288
>>> reverse_bit(58)
- '00000000000000000000000000111010'
+ 1543503872
>>> reverse_bit(0)
- '00000000000000000000000000000000'
+ 0
>>> reverse_bit(256)
- '00000000000000000000000100000000'
+ 8388608
+ >>> reverse_bit(2550136832)
+ 25
>>> reverse_bit(-1)
Traceback (most recent call last):
...
- ValueError: the value of input must be positive
+ ValueError: The value of input must be non-negative
>>> reverse_bit(1.1)
Traceback (most recent call last):
...
- TypeError: Input value must be a 'int' type
+ TypeError: Input value must be an 'int' type
>>> reverse_bit("0")
Traceback (most recent call last):
...
- TypeError: '<' not supported between instances of 'str' and 'int'
+ TypeError: Input value must be an 'int' type
"""
+ if not isinstance(number, int):
+ raise TypeError("Input value must be an 'int' type")
if number < 0:
- raise ValueError("the value of input must be positive")
- elif isinstance(number, float):
- raise TypeError("Input value must be a 'int' type")
- elif isinstance(number, str):
- raise TypeError("'<' not supported between instances of 'str' and 'int'")
+ raise ValueError("The value of input must be non-negative")
+
result = 0
- # iterator over [1 to 32],since we are dealing with 32 bit integer
- for _ in range(1, 33):
+ # iterator over [0 to 31], since we are dealing with a 32 bit integer
+ for _ in range(32):
# left shift the bits by unity
- result = result << 1
+ result <<= 1
# get the end bit
- end_bit = number % 2
+ end_bit = number & 1
# right shift the bits by unity
- number = number >> 1
- # add that bit to our ans
- result = result | end_bit
- return get_reverse_bit_string(result)
+ number >>= 1
+ # add that bit to our answer
+ result |= end_bit
+ return result
if __name__ == "__main__":
diff --git a/blockchain/README.md b/blockchain/README.md
index b5fab7b36eaa..ecd784fc2c7d 100644
--- a/blockchain/README.md
+++ b/blockchain/README.md
@@ -1,8 +1,8 @@
# Blockchain
-A Blockchain is a type of **distributed ledger** technology (DLT) that consists of growing list of records, called **blocks**, that are securely linked together using **cryptography**.
+A Blockchain is a type of **distributed ledger** technology (DLT) that consists of a growing list of records, called **blocks**, that are securely linked together using **cryptography**.
-Let's breakdown the terminologies in the above definition. We find below terminologies,
+Let's break down the terminologies in the above definition. We find below terminologies,
- Digital Ledger Technology (DLT)
- Blocks
@@ -10,35 +10,35 @@ Let's breakdown the terminologies in the above definition. We find below termino
## Digital Ledger Technology
- It is otherwise called as distributed ledger technology. It is simply the opposite of centralized database. Firstly, what is a **ledger**? A ledger is a book or collection of accounts that records account transactions.
+Blockchain is also called distributed ledger technology. It is simply the opposite of a centralized database. Firstly, what is a **ledger**? A ledger is a book or collection of accounts that records account transactions.
- *Why is Blockchain addressed as digital ledger if it can record more than account transactions? What other transaction details and information can it hold?*
+*Why is Blockchain addressed as a digital ledger if it can record more than account transactions? What other transaction details and information can it hold?*
-Digital Ledger Technology is just a ledger which is shared among multiple nodes. This way there exist no need for central authority to hold the info. Okay, how is it differentiated from central database and what are their benefits?
+Digital Ledger Technology is just a ledger that is shared among multiple nodes. This way there exists no need for a central authority to hold the info. Okay, how is it differentiated from a central database and what are their benefits?
-There is an organization which has 4 branches whose data are stored in a centralized database. So even if one branch needs any data from ledger they need an approval from database in charge. And if one hacks the central database he gets to tamper and control all the data.
+Suppose that there is an organization that has 4 branches whose data are stored in a centralized database. So even if one branch needs any data from the ledger it needs approval from the database in charge. And if one hacks the central database he gets to tamper and control all the data.
-Now lets assume every branch has a copy of the ledger and then once anything is added to the ledger by anyone branch it is gonna automatically reflect in all other ledgers available in other branch. This is done using Peer-to-peer network.
+Now let's assume every branch has a copy of the ledger and then once anything is added to the ledger by any branch it is gonna automatically reflect in all other ledgers available in other branches. This is done using a peer-to-peer network.
-So this means even if information is tampered in one branch we can find out. If one branch is hacked we can be alerted ,so we can safeguard other branches. Now, assume these branches as computers or nodes and the ledger is a transaction record or digital receipt. If one ledger is hacked in a node we can detect since there will be a mismatch in comparison with other node information. So this is the concept of Digital Ledger Technology.
+This means that even if information is tampered with in one branch we can find out. If one branch is hacked we can be alerted, so we can safeguard other branches. Now, assume these branches as computers or nodes and the ledger is a transaction record or digital receipt. If one ledger is hacked in a node we can detect since there will be a mismatch in comparison with other node information. So this is the concept of Digital Ledger Technology.
*Is it required for all nodes to have access to all information in other nodes? Wouldn't this require enormous storage space in each node?*
## Blocks
-In short a block is nothing but collections of records with a labelled header. These are connected cryptographically. Once a new block is added to a chain, the previous block is connected, more precisely said as locked and hence, will remain unaltered. We can understand this concept once we get a clear understanding of working mechanism of blockchain.
+In short, a block is nothing but a collection of records with a labelled header. These are connected cryptographically. Once a new block is added to a chain, the previous block is connected, more precisely said as locked, and hence will remain unaltered. We can understand this concept once we get a clear understanding of the working mechanism of blockchain.
## Cryptography
-It is the practice and study of secure communication techniques in the midst of adversarial behavior. More broadly, cryptography is the creation and analysis of protocols that prevent third parties or the general public from accessing private messages.
+Cryptography is the practice and study of secure communication techniques amid adversarial behavior. More broadly, cryptography is the creation and analysis of protocols that prevent third parties or the general public from accessing private messages.
*Which cryptography technology is most widely used in blockchain and why?*
-So, in general, blockchain technology is a distributed record holder which records the information about ownership of an asset. To define precisely,
+So, in general, blockchain technology is a distributed record holder that records the information about ownership of an asset. To define precisely,
> Blockchain is a distributed, immutable ledger that makes it easier to record transactions and track assets in a corporate network.
An asset could be tangible (such as a house, car, cash, or land) or intangible (such as a business) (intellectual property, patents, copyrights, branding). A blockchain network can track and sell almost anything of value, lowering risk and costs for everyone involved.
-So this is all about introduction to blockchain technology. To learn more about the topic refer below links....
+So this is all about the introduction to blockchain technology. To learn more about the topic refer below links....
*
*
*
diff --git a/boolean_algebra/imply_gate.py b/boolean_algebra/imply_gate.py
index b64ebaceb306..3d71ff12f8d9 100644
--- a/boolean_algebra/imply_gate.py
+++ b/boolean_algebra/imply_gate.py
@@ -33,6 +33,58 @@ def imply_gate(input_1: int, input_2: int) -> int:
return int(input_1 == 0 or input_2 == 1)
+def recursive_imply_list(input_list: list[int]) -> int:
+ """
+ Recursively calculates the implication of a list.
+ Strictly the implication is applied consecutively left to right:
+ ( (a -> b) -> c ) -> d ...
+
+ >>> recursive_imply_list([])
+ Traceback (most recent call last):
+ ...
+ ValueError: Input list must contain at least two elements
+ >>> recursive_imply_list([0])
+ Traceback (most recent call last):
+ ...
+ ValueError: Input list must contain at least two elements
+ >>> recursive_imply_list([1])
+ Traceback (most recent call last):
+ ...
+ ValueError: Input list must contain at least two elements
+ >>> recursive_imply_list([0, 0])
+ 1
+ >>> recursive_imply_list([0, 1])
+ 1
+ >>> recursive_imply_list([1, 0])
+ 0
+ >>> recursive_imply_list([1, 1])
+ 1
+ >>> recursive_imply_list([0, 0, 0])
+ 0
+ >>> recursive_imply_list([0, 0, 1])
+ 1
+ >>> recursive_imply_list([0, 1, 0])
+ 0
+ >>> recursive_imply_list([0, 1, 1])
+ 1
+ >>> recursive_imply_list([1, 0, 0])
+ 1
+ >>> recursive_imply_list([1, 0, 1])
+ 1
+ >>> recursive_imply_list([1, 1, 0])
+ 0
+ >>> recursive_imply_list([1, 1, 1])
+ 1
+ """
+ if len(input_list) < 2:
+ raise ValueError("Input list must contain at least two elements")
+ first_implication = imply_gate(input_list[0], input_list[1])
+ if len(input_list) == 2:
+ return first_implication
+ new_list = [first_implication, *input_list[2:]]
+ return recursive_imply_list(new_list)
+
+
if __name__ == "__main__":
import doctest
diff --git a/ciphers/gronsfeld_cipher.py b/ciphers/gronsfeld_cipher.py
index 8fbeab4307fc..a72b141bd502 100644
--- a/ciphers/gronsfeld_cipher.py
+++ b/ciphers/gronsfeld_cipher.py
@@ -20,7 +20,7 @@ def gronsfeld(text: str, key: str) -> str:
>>> gronsfeld('yes, ¥€$ - _!@#%?', '')
Traceback (most recent call last):
...
- ZeroDivisionError: integer modulo by zero
+ ZeroDivisionError: division by zero
"""
ascii_len = len(ascii_uppercase)
key_len = len(key)
diff --git a/data_compression/coordinate_compression.py b/data_compression/coordinate_compression.py
new file mode 100644
index 000000000000..9c4ad9a99ac3
--- /dev/null
+++ b/data_compression/coordinate_compression.py
@@ -0,0 +1,132 @@
+"""
+Assumption:
+ - The values to compress are assumed to be comparable,
+ values can be sorted and compared with '<' and '>' operators.
+"""
+
+
+class CoordinateCompressor:
+ """
+ A class for coordinate compression.
+
+ This class allows you to compress and decompress a list of values.
+
+ Mapping:
+ In addition to compression and decompression, this class maintains a mapping
+ between original values and their compressed counterparts using two data
+ structures: a dictionary `coordinate_map` and a list `reverse_map`:
+ - `coordinate_map`: A dictionary that maps original values to their compressed
+ coordinates. Keys are original values, and values are compressed coordinates.
+ - `reverse_map`: A list used for reverse mapping, where each index corresponds
+ to a compressed coordinate, and the value at that index is the original value.
+
+ Example of mapping:
+ Original: 10, Compressed: 0
+ Original: 52, Compressed: 1
+ Original: 83, Compressed: 2
+ Original: 100, Compressed: 3
+
+ This mapping allows for efficient compression and decompression of values within
+ the list.
+ """
+
+ def __init__(self, arr: list[int | float | str]) -> None:
+ """
+ Initialize the CoordinateCompressor with a list.
+
+ Args:
+ arr: The list of values to be compressed.
+
+ >>> arr = [100, 10, 52, 83]
+ >>> cc = CoordinateCompressor(arr)
+ >>> cc.compress(100)
+ 3
+ >>> cc.compress(52)
+ 1
+ >>> cc.decompress(1)
+ 52
+ """
+
+ # A dictionary to store compressed coordinates
+ self.coordinate_map: dict[int | float | str, int] = {}
+
+ # A list to store reverse mapping
+ self.reverse_map: list[int | float | str] = [-1] * len(arr)
+
+ self.arr = sorted(arr) # The input list
+ self.n = len(arr) # The length of the input list
+ self.compress_coordinates()
+
+ def compress_coordinates(self) -> None:
+ """
+ Compress the coordinates in the input list.
+
+ >>> arr = [100, 10, 52, 83]
+ >>> cc = CoordinateCompressor(arr)
+ >>> cc.coordinate_map[83]
+ 2
+ >>> cc.coordinate_map[80] # Value not in the original list
+ Traceback (most recent call last):
+ ...
+ KeyError: 80
+ >>> cc.reverse_map[2]
+ 83
+ """
+ key = 0
+ for val in self.arr:
+ if val not in self.coordinate_map:
+ self.coordinate_map[val] = key
+ self.reverse_map[key] = val
+ key += 1
+
+ def compress(self, original: float | str) -> int:
+ """
+ Compress a single value.
+
+ Args:
+ original: The value to compress.
+
+ Returns:
+ The compressed integer, or -1 if not found in the original list.
+
+ >>> arr = [100, 10, 52, 83]
+ >>> cc = CoordinateCompressor(arr)
+ >>> cc.compress(100)
+ 3
+ >>> cc.compress(7) # Value not in the original list
+ -1
+ """
+ return self.coordinate_map.get(original, -1)
+
+ def decompress(self, num: int) -> int | float | str:
+ """
+ Decompress a single integer.
+
+ Args:
+ num: The compressed integer to decompress.
+
+ Returns:
+ The original value.
+
+ >>> arr = [100, 10, 52, 83]
+ >>> cc = CoordinateCompressor(arr)
+ >>> cc.decompress(0)
+ 10
+ >>> cc.decompress(5) # Compressed coordinate out of range
+ -1
+ """
+ return self.reverse_map[num] if 0 <= num < len(self.reverse_map) else -1
+
+
+if __name__ == "__main__":
+ from doctest import testmod
+
+ testmod()
+
+ arr: list[int | float | str] = [100, 10, 52, 83]
+ cc = CoordinateCompressor(arr)
+
+ for original in arr:
+ compressed = cc.compress(original)
+ decompressed = cc.decompress(compressed)
+ print(f"Original: {decompressed}, Compressed: {compressed}")
diff --git a/data_structures/arrays/rotate_array.py b/data_structures/arrays/rotate_array.py
new file mode 100644
index 000000000000..d5ce4b4078b3
--- /dev/null
+++ b/data_structures/arrays/rotate_array.py
@@ -0,0 +1,80 @@
+def rotate_array(arr: list[int], steps: int) -> list[int]:
+ """
+ Rotates a list to the right by steps positions.
+
+ Parameters:
+ arr (List[int]): The list of integers to rotate.
+ steps (int): Number of positions to rotate. Can be negative for left rotation.
+
+ Returns:
+ List[int]: Rotated list.
+
+ Examples:
+ >>> rotate_array([1, 2, 3, 4, 5], 2)
+ [4, 5, 1, 2, 3]
+ >>> rotate_array([1, 2, 3, 4, 5], -2)
+ [3, 4, 5, 1, 2]
+ >>> rotate_array([1, 2, 3, 4, 5], 7)
+ [4, 5, 1, 2, 3]
+ >>> rotate_array([], 3)
+ []
+ """
+
+ n = len(arr)
+ if n == 0:
+ return arr
+
+ steps = steps % n
+
+ if steps < 0:
+ steps += n
+
+ def reverse(start: int, end: int) -> None:
+ """
+ Reverses a portion of the list in place from index start to end.
+
+ Parameters:
+ start (int): Starting index of the portion to reverse.
+ end (int): Ending index of the portion to reverse.
+
+ Returns:
+ None
+
+ Examples:
+ >>> example = [1, 2, 3, 4, 5]
+ >>> def reverse_test(arr, start, end):
+ ... while start < end:
+ ... arr[start], arr[end] = arr[end], arr[start]
+ ... start += 1
+ ... end -= 1
+ >>> reverse_test(example, 0, 2)
+ >>> example
+ [3, 2, 1, 4, 5]
+ >>> reverse_test(example, 2, 4)
+ >>> example
+ [3, 2, 5, 4, 1]
+ """
+
+ while start < end:
+ arr[start], arr[end] = arr[end], arr[start]
+ start += 1
+ end -= 1
+
+ reverse(0, n - 1)
+ reverse(0, steps - 1)
+ reverse(steps, n - 1)
+
+ return arr
+
+
+if __name__ == "__main__":
+ examples = [
+ ([1, 2, 3, 4, 5], 2),
+ ([1, 2, 3, 4, 5], -2),
+ ([1, 2, 3, 4, 5], 7),
+ ([], 3),
+ ]
+
+ for arr, steps in examples:
+ rotated = rotate_array(arr.copy(), steps)
+ print(f"Rotate {arr} by {steps}: {rotated}")
diff --git a/data_structures/arrays/sudoku_solver.py b/data_structures/arrays/sudoku_solver.py
index 4c722f12fd6e..d2fa43bbf298 100644
--- a/data_structures/arrays/sudoku_solver.py
+++ b/data_structures/arrays/sudoku_solver.py
@@ -11,6 +11,19 @@
def cross(items_a, items_b):
"""
Cross product of elements in A and elements in B.
+
+ >>> cross('AB', '12')
+ ['A1', 'A2', 'B1', 'B2']
+ >>> cross('ABC', '123')
+ ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']
+ >>> cross('ABC', '1234')
+ ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4', 'C1', 'C2', 'C3', 'C4']
+ >>> cross('', '12')
+ []
+ >>> cross('A', '')
+ []
+ >>> cross('', '')
+ []
"""
return [a + b for a in items_a for b in items_b]
@@ -149,7 +162,7 @@ def search(values):
if all(len(values[s]) == 1 for s in squares):
return values ## Solved!
## Chose the unfilled square s with the fewest possibilities
- n, s = min((len(values[s]), s) for s in squares if len(values[s]) > 1)
+ _n, s = min((len(values[s]), s) for s in squares if len(values[s]) > 1)
return some(search(assign(values.copy(), s, d)) for d in values[s])
diff --git a/data_structures/binary_tree/binary_tree_path_sum.py b/data_structures/binary_tree/binary_tree_path_sum.py
index a3fe9ca7a7e2..8477690c777a 100644
--- a/data_structures/binary_tree/binary_tree_path_sum.py
+++ b/data_structures/binary_tree/binary_tree_path_sum.py
@@ -50,6 +50,26 @@ class BinaryTreePathSum:
>>> tree.right.right = Node(10)
>>> BinaryTreePathSum().path_sum(tree, 8)
2
+ >>> BinaryTreePathSum().path_sum(None, 0)
+ 0
+ >>> BinaryTreePathSum().path_sum(tree, 0)
+ 0
+
+ The second tree looks like this
+ 0
+ / \
+ 5 5
+
+ >>> tree2 = Node(0)
+ >>> tree2.left = Node(5)
+ >>> tree2.right = Node(5)
+
+ >>> BinaryTreePathSum().path_sum(tree2, 5)
+ 4
+ >>> BinaryTreePathSum().path_sum(tree2, -1)
+ 0
+ >>> BinaryTreePathSum().path_sum(tree2, 0)
+ 1
"""
target: int
diff --git a/data_structures/linked_list/from_sequence.py b/data_structures/linked_list/from_sequence.py
index fa43f4d10e08..b16b2258c1f1 100644
--- a/data_structures/linked_list/from_sequence.py
+++ b/data_structures/linked_list/from_sequence.py
@@ -1,5 +1,7 @@
-# Recursive Program to create a Linked List from a sequence and
-# print a string representation of it.
+"""
+Recursive Program to create a Linked List from a sequence and
+print a string representation of it.
+"""
class Node:
@@ -18,13 +20,32 @@ def __repr__(self):
return string_rep
-def make_linked_list(elements_list):
- """Creates a Linked List from the elements of the given sequence
- (list/tuple) and returns the head of the Linked List."""
+def make_linked_list(elements_list: list | tuple) -> Node:
+ """
+ Creates a Linked List from the elements of the given sequence
+ (list/tuple) and returns the head of the Linked List.
+
+ >>> make_linked_list([])
+ Traceback (most recent call last):
+ ...
+ ValueError: The Elements List is empty
+ >>> make_linked_list(())
+ Traceback (most recent call last):
+ ...
+ ValueError: The Elements List is empty
+ >>> make_linked_list([1])
+ <1> --->
+ >>> make_linked_list((1,))
+ <1> --->
+ >>> make_linked_list([1, 3, 5, 32, 44, 12, 43])
+ <1> ---> <3> ---> <5> ---> <32> ---> <44> ---> <12> ---> <43> --->
+ >>> make_linked_list((1, 3, 5, 32, 44, 12, 43))
+ <1> ---> <3> ---> <5> ---> <32> ---> <44> ---> <12> ---> <43> --->
+ """
# if elements_list is empty
if not elements_list:
- raise Exception("The Elements List is empty")
+ raise ValueError("The Elements List is empty")
# Set first element as Head
head = Node(elements_list[0])
@@ -34,11 +55,3 @@ def make_linked_list(elements_list):
current.next = Node(data)
current = current.next
return head
-
-
-list_data = [1, 3, 5, 32, 44, 12, 43]
-print(f"List: {list_data}")
-print("Creating Linked List from List.")
-linked_list = make_linked_list(list_data)
-print("Linked List:")
-print(linked_list)
diff --git a/data_structures/queues/circular_queue.py b/data_structures/queues/circular_queue.py
index efbf1efdc42d..e9cb2cac4fd8 100644
--- a/data_structures/queues/circular_queue.py
+++ b/data_structures/queues/circular_queue.py
@@ -17,7 +17,7 @@ def __len__(self) -> int:
>>> len(cq)
0
>>> cq.enqueue("A") # doctest: +ELLIPSIS
-
>>> cq.array
['A', None, None, None, None]
>>> len(cq)
@@ -51,17 +51,24 @@ def enqueue(self, data):
"""
This function inserts an element at the end of the queue using self.rear value
as an index.
+
>>> cq = CircularQueue(5)
>>> cq.enqueue("A") # doctest: +ELLIPSIS
-
>>> (cq.size, cq.first())
(1, 'A')
>>> cq.enqueue("B") # doctest: +ELLIPSIS
-
>>> cq.array
['A', 'B', None, None, None]
>>> (cq.size, cq.first())
(2, 'A')
+ >>> cq.enqueue("C").enqueue("D").enqueue("E") # doctest: +ELLIPSIS
+
+ >>> cq.enqueue("F")
+ Traceback (most recent call last):
+ ...
+ Exception: QUEUE IS FULL
"""
if self.size >= self.n:
raise Exception("QUEUE IS FULL")
@@ -75,6 +82,7 @@ def dequeue(self):
"""
This function removes an element from the queue using on self.front value as an
index and returns it
+
>>> cq = CircularQueue(5)
>>> cq.dequeue()
Traceback (most recent call last):
diff --git a/data_structures/stacks/stock_span_problem.py b/data_structures/stacks/stock_span_problem.py
index 5efe58d25798..74c2636784e2 100644
--- a/data_structures/stacks/stock_span_problem.py
+++ b/data_structures/stacks/stock_span_problem.py
@@ -8,8 +8,29 @@
"""
-def calculation_span(price, s):
+def calculate_span(price: list[int]) -> list[int]:
+ """
+ Calculate the span values for a given list of stock prices.
+ Args:
+ price: List of stock prices.
+ Returns:
+ List of span values.
+
+ >>> calculate_span([10, 4, 5, 90, 120, 80])
+ [1, 1, 2, 4, 5, 1]
+ >>> calculate_span([100, 50, 60, 70, 80, 90])
+ [1, 1, 2, 3, 4, 5]
+ >>> calculate_span([5, 4, 3, 2, 1])
+ [1, 1, 1, 1, 1]
+ >>> calculate_span([1, 2, 3, 4, 5])
+ [1, 2, 3, 4, 5]
+ >>> calculate_span([10, 20, 30, 40, 50])
+ [1, 2, 3, 4, 5]
+ >>> calculate_span([100, 80, 60, 70, 60, 75, 85])
+ [1, 1, 1, 2, 1, 4, 6]
+ """
n = len(price)
+ s = [0] * n
# Create a stack and push index of fist element to it
st = []
st.append(0)
@@ -21,18 +42,20 @@ def calculation_span(price, s):
for i in range(1, n):
# Pop elements from stack while stack is not
# empty and top of stack is smaller than price[i]
- while len(st) > 0 and price[st[0]] <= price[i]:
+ while len(st) > 0 and price[st[-1]] <= price[i]:
st.pop()
# If stack becomes empty, then price[i] is greater
# than all elements on left of it, i.e. price[0],
# price[1], ..price[i-1]. Else the price[i] is
# greater than elements after top of stack
- s[i] = i + 1 if len(st) <= 0 else (i - st[0])
+ s[i] = i + 1 if len(st) <= 0 else (i - st[-1])
# Push this element to stack
st.append(i)
+ return s
+
# A utility function to print elements of array
def print_array(arr, n):
@@ -42,10 +65,9 @@ def print_array(arr, n):
# Driver program to test above function
price = [10, 4, 5, 90, 120, 80]
-S = [0 for i in range(len(price) + 1)]
-# Fill the span values in array S[]
-calculation_span(price, S)
+# Calculate the span values
+S = calculate_span(price)
# Print the calculated span values
print_array(S, len(price))
diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py
index caf566a6ce30..bd2306befa79 100644
--- a/data_structures/trie/radix_tree.py
+++ b/data_structures/trie/radix_tree.py
@@ -115,7 +115,7 @@ def find(self, word: str) -> bool:
if not incoming_node:
return False
else:
- matching_string, remaining_prefix, remaining_word = incoming_node.match(
+ _matching_string, remaining_prefix, remaining_word = incoming_node.match(
word
)
# If there is remaining prefix, the word can't be on the tree
@@ -144,7 +144,7 @@ def delete(self, word: str) -> bool:
if not incoming_node:
return False
else:
- matching_string, remaining_prefix, remaining_word = incoming_node.match(
+ _matching_string, remaining_prefix, remaining_word = incoming_node.match(
word
)
# If there is remaining prefix, the word can't be on the tree
diff --git a/dynamic_programming/word_break.py b/dynamic_programming/word_break.py
index 4d7ac869080c..c4ba2d7aa976 100644
--- a/dynamic_programming/word_break.py
+++ b/dynamic_programming/word_break.py
@@ -90,7 +90,7 @@ def is_breakable(index: int) -> bool:
if index == len_string:
return True
- trie_node = trie
+ trie_node: Any = trie
for i in range(index, len_string):
trie_node = trie_node.get(string[i], None)
diff --git a/graphs/breadth_first_search_shortest_path_2.py b/graphs/breadth_first_search_shortest_path_2.py
index 4f9b6e65bdf3..efba9b7b6ae6 100644
--- a/graphs/breadth_first_search_shortest_path_2.py
+++ b/graphs/breadth_first_search_shortest_path_2.py
@@ -1,10 +1,12 @@
-"""Breadth-first search shortest path implementations.
+"""Breadth-first search the shortest path implementations.
doctest:
-python -m doctest -v bfs_shortest_path.py
+python -m doctest -v breadth_first_search_shortest_path_2.py
Manual test:
-python bfs_shortest_path.py
+python breadth_first_search_shortest_path_2.py
"""
+from collections import deque
+
demo_graph = {
"A": ["B", "C", "E"],
"B": ["A", "D", "E"],
@@ -17,7 +19,7 @@
def bfs_shortest_path(graph: dict, start, goal) -> list[str]:
- """Find shortest path between `start` and `goal` nodes.
+ """Find the shortest path between `start` and `goal` nodes.
Args:
graph (dict): node/list of neighboring nodes key/value pairs.
start: start node.
@@ -36,7 +38,7 @@ def bfs_shortest_path(graph: dict, start, goal) -> list[str]:
# keep track of explored nodes
explored = set()
# keep track of all the paths to be checked
- queue = [[start]]
+ queue = deque([[start]])
# return path if start is goal
if start == goal:
@@ -45,7 +47,7 @@ def bfs_shortest_path(graph: dict, start, goal) -> list[str]:
# keeps looping until all possible paths have been checked
while queue:
# pop the first path from the queue
- path = queue.pop(0)
+ path = queue.popleft()
# get the last node from the path
node = path[-1]
if node not in explored:
@@ -68,13 +70,13 @@ def bfs_shortest_path(graph: dict, start, goal) -> list[str]:
def bfs_shortest_path_distance(graph: dict, start, target) -> int:
- """Find shortest path distance between `start` and `target` nodes.
+ """Find the shortest path distance between `start` and `target` nodes.
Args:
graph: node/list of neighboring nodes key/value pairs.
start: node to start search from.
target: node to search for.
Returns:
- Number of edges in shortest path between `start` and `target` nodes.
+ Number of edges in the shortest path between `start` and `target` nodes.
-1 if no path exists.
Example:
>>> bfs_shortest_path_distance(demo_graph, "G", "D")
@@ -88,12 +90,12 @@ def bfs_shortest_path_distance(graph: dict, start, target) -> int:
return -1
if start == target:
return 0
- queue = [start]
+ queue = deque([start])
visited = set(start)
# Keep tab on distances from `start` node.
dist = {start: 0, target: -1}
while queue:
- node = queue.pop(0)
+ node = queue.popleft()
if node == target:
dist[target] = (
dist[node] if dist[target] == -1 else min(dist[target], dist[node])
diff --git a/graphs/check_bipatrite.py b/graphs/check_bipatrite.py
index 213f3f9480b5..897c78850d58 100644
--- a/graphs/check_bipatrite.py
+++ b/graphs/check_bipatrite.py
@@ -1,7 +1,7 @@
from collections import defaultdict, deque
-def is_bipartite_dfs(graph: defaultdict[int, list[int]]) -> bool:
+def is_bipartite_dfs(graph: dict[int, list[int]]) -> bool:
"""
Check if a graph is bipartite using depth-first search (DFS).
@@ -16,12 +16,9 @@ def is_bipartite_dfs(graph: defaultdict[int, list[int]]) -> bool:
Examples:
- >>> # FIXME: This test should pass.
- >>> is_bipartite_dfs(defaultdict(list, {0: [1, 2], 1: [0, 3], 2: [0, 4]}))
- Traceback (most recent call last):
- ...
- RuntimeError: dictionary changed size during iteration
- >>> is_bipartite_dfs(defaultdict(list, {0: [1, 2], 1: [0, 3], 2: [0, 1]}))
+ >>> is_bipartite_dfs({0: [1, 2], 1: [0, 3], 2: [0, 4]})
+ True
+ >>> is_bipartite_dfs({0: [1, 2], 1: [0, 3], 2: [0, 1]})
False
>>> is_bipartite_dfs({})
True
@@ -34,36 +31,26 @@ def is_bipartite_dfs(graph: defaultdict[int, list[int]]) -> bool:
>>> is_bipartite_dfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
False
>>> is_bipartite_dfs({7: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
- Traceback (most recent call last):
- ...
- KeyError: 0
+ False
>>> # FIXME: This test should fails with KeyError: 4.
>>> is_bipartite_dfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 9: [0]})
False
>>> is_bipartite_dfs({0: [-1, 3], 1: [0, -2]})
- Traceback (most recent call last):
- ...
- KeyError: -1
+ False
>>> is_bipartite_dfs({-1: [0, 2], 0: [-1, 1], 1: [0, 2], 2: [-1, 1]})
True
>>> is_bipartite_dfs({0.9: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2]})
- Traceback (most recent call last):
- ...
- KeyError: 0
+ True
>>> # FIXME: This test should fails with
>>> # TypeError: list indices must be integers or...
>>> is_bipartite_dfs({0: [1.0, 3.0], 1.0: [0, 2.0], 2.0: [1.0, 3.0], 3.0: [0, 2.0]})
True
>>> is_bipartite_dfs({"a": [1, 3], "b": [0, 2], "c": [1, 3], "d": [0, 2]})
- Traceback (most recent call last):
- ...
- KeyError: 1
+ True
>>> is_bipartite_dfs({0: ["b", "d"], 1: ["a", "c"], 2: ["b", "d"], 3: ["a", "c"]})
- Traceback (most recent call last):
- ...
- KeyError: 'b'
+ True
"""
def depth_first_search(node: int, color: int) -> bool:
@@ -80,6 +67,8 @@ def depth_first_search(node: int, color: int) -> bool:
"""
if visited[node] == -1:
visited[node] = color
+ if node not in graph:
+ return True
for neighbor in graph[node]:
if not depth_first_search(neighbor, 1 - color):
return False
@@ -92,7 +81,7 @@ def depth_first_search(node: int, color: int) -> bool:
return True
-def is_bipartite_bfs(graph: defaultdict[int, list[int]]) -> bool:
+def is_bipartite_bfs(graph: dict[int, list[int]]) -> bool:
"""
Check if a graph is bipartite using a breadth-first search (BFS).
@@ -107,12 +96,9 @@ def is_bipartite_bfs(graph: defaultdict[int, list[int]]) -> bool:
Examples:
- >>> # FIXME: This test should pass.
- >>> is_bipartite_bfs(defaultdict(list, {0: [1, 2], 1: [0, 3], 2: [0, 4]}))
- Traceback (most recent call last):
- ...
- RuntimeError: dictionary changed size during iteration
- >>> is_bipartite_bfs(defaultdict(list, {0: [1, 2], 1: [0, 2], 2: [0, 1]}))
+ >>> is_bipartite_bfs({0: [1, 2], 1: [0, 3], 2: [0, 4]})
+ True
+ >>> is_bipartite_bfs({0: [1, 2], 1: [0, 2], 2: [0, 1]})
False
>>> is_bipartite_bfs({})
True
@@ -125,36 +111,26 @@ def is_bipartite_bfs(graph: defaultdict[int, list[int]]) -> bool:
>>> is_bipartite_bfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
False
>>> is_bipartite_bfs({7: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
- Traceback (most recent call last):
- ...
- KeyError: 0
+ False
>>> # FIXME: This test should fails with KeyError: 4.
>>> is_bipartite_bfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 9: [0]})
False
>>> is_bipartite_bfs({0: [-1, 3], 1: [0, -2]})
- Traceback (most recent call last):
- ...
- KeyError: -1
+ False
>>> is_bipartite_bfs({-1: [0, 2], 0: [-1, 1], 1: [0, 2], 2: [-1, 1]})
True
>>> is_bipartite_bfs({0.9: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2]})
- Traceback (most recent call last):
- ...
- KeyError: 0
+ True
>>> # FIXME: This test should fails with
>>> # TypeError: list indices must be integers or...
>>> is_bipartite_bfs({0: [1.0, 3.0], 1.0: [0, 2.0], 2.0: [1.0, 3.0], 3.0: [0, 2.0]})
True
>>> is_bipartite_bfs({"a": [1, 3], "b": [0, 2], "c": [1, 3], "d": [0, 2]})
- Traceback (most recent call last):
- ...
- KeyError: 1
+ True
>>> is_bipartite_bfs({0: ["b", "d"], 1: ["a", "c"], 2: ["b", "d"], 3: ["a", "c"]})
- Traceback (most recent call last):
- ...
- KeyError: 'b'
+ True
"""
visited: defaultdict[int, int] = defaultdict(lambda: -1)
for node in graph:
@@ -164,6 +140,8 @@ def is_bipartite_bfs(graph: defaultdict[int, list[int]]) -> bool:
visited[node] = 0
while queue:
curr_node = queue.popleft()
+ if curr_node not in graph:
+ continue
for neighbor in graph[curr_node]:
if visited[neighbor] == -1:
visited[neighbor] = 1 - visited[curr_node]
@@ -173,7 +151,7 @@ def is_bipartite_bfs(graph: defaultdict[int, list[int]]) -> bool:
return True
-if __name__ == "__main":
+if __name__ == "__main__":
import doctest
result = doctest.testmod()
diff --git a/graphs/dijkstra_algorithm.py b/graphs/dijkstra_algorithm.py
index 51412b790bac..60646862fca8 100644
--- a/graphs/dijkstra_algorithm.py
+++ b/graphs/dijkstra_algorithm.py
@@ -52,45 +52,33 @@ def min_heapify(self, idx):
>>> priority_queue_test.array = [(5, 'A'), (10, 'B'), (15, 'C')]
>>> priority_queue_test.min_heapify(0)
- Traceback (most recent call last):
- ...
- TypeError: 'list' object is not callable
>>> priority_queue_test.array
[(5, 'A'), (10, 'B'), (15, 'C')]
>>> priority_queue_test.array = [(10, 'A'), (5, 'B'), (15, 'C')]
>>> priority_queue_test.min_heapify(0)
- Traceback (most recent call last):
- ...
- TypeError: 'list' object is not callable
>>> priority_queue_test.array
- [(10, 'A'), (5, 'B'), (15, 'C')]
+ [(5, 'B'), (10, 'A'), (15, 'C')]
>>> priority_queue_test.array = [(10, 'A'), (15, 'B'), (5, 'C')]
>>> priority_queue_test.min_heapify(0)
- Traceback (most recent call last):
- ...
- TypeError: 'list' object is not callable
>>> priority_queue_test.array
- [(10, 'A'), (15, 'B'), (5, 'C')]
+ [(5, 'C'), (15, 'B'), (10, 'A')]
>>> priority_queue_test.array = [(10, 'A'), (5, 'B')]
>>> priority_queue_test.cur_size = len(priority_queue_test.array)
>>> priority_queue_test.pos = {'A': 0, 'B': 1}
>>> priority_queue_test.min_heapify(0)
- Traceback (most recent call last):
- ...
- TypeError: 'list' object is not callable
>>> priority_queue_test.array
- [(10, 'A'), (5, 'B')]
+ [(5, 'B'), (10, 'A')]
"""
lc = self.left(idx)
rc = self.right(idx)
- if lc < self.cur_size and self.array(lc)[0] < self.array[idx][0]:
+ if lc < self.cur_size and self.array[lc][0] < self.array[idx][0]:
smallest = lc
else:
smallest = idx
- if rc < self.cur_size and self.array(rc)[0] < self.array[smallest][0]:
+ if rc < self.cur_size and self.array[rc][0] < self.array[smallest][0]:
smallest = rc
if smallest != idx:
self.swap(idx, smallest)
@@ -130,12 +118,12 @@ def extract_min(self):
>>> priority_queue_test.extract_min()
'C'
>>> priority_queue_test.array[0]
- (15, 'B')
+ (10, 'A')
"""
min_node = self.array[0][1]
self.array[0] = self.array[self.cur_size - 1]
self.cur_size -= 1
- self.min_heapify(1)
+ self.min_heapify(0)
del self.pos[min_node]
return min_node
diff --git a/graphs/graph_adjacency_list.py b/graphs/graph_adjacency_list.py
index 244e59e0e1bf..34014d69dfb8 100644
--- a/graphs/graph_adjacency_list.py
+++ b/graphs/graph_adjacency_list.py
@@ -61,6 +61,15 @@ def add_vertex(self, vertex: T) -> None:
"""
Adds a vertex to the graph. If the given vertex already exists,
a ValueError will be thrown.
+
+ >>> g = GraphAdjacencyList(vertices=[], edges=[], directed=False)
+ >>> g.add_vertex("A")
+ >>> g.adj_list
+ {'A': []}
+ >>> g.add_vertex("A")
+ Traceback (most recent call last):
+ ...
+ ValueError: Incorrect input: A is already in the graph.
"""
if self.contains_vertex(vertex):
msg = f"Incorrect input: {vertex} is already in the graph."
@@ -448,7 +457,7 @@ def test_remove_edge(self) -> None:
(
undirected_graph,
directed_graph,
- random_vertices,
+ _random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
@@ -502,7 +511,7 @@ def test_add_vertex_exception_check(self) -> None:
undirected_graph,
directed_graph,
random_vertices,
- random_edges,
+ _random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
@@ -516,7 +525,7 @@ def test_remove_vertex_exception_check(self) -> None:
undirected_graph,
directed_graph,
random_vertices,
- random_edges,
+ _random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for i in range(101):
@@ -530,7 +539,7 @@ def test_add_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
- random_vertices,
+ _random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
@@ -569,7 +578,7 @@ def test_contains_edge_exception_check(self) -> None:
undirected_graph,
directed_graph,
random_vertices,
- random_edges,
+ _random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
diff --git a/graphs/graph_adjacency_matrix.py b/graphs/graph_adjacency_matrix.py
index 8eeeae786513..6dca0fbbcf05 100644
--- a/graphs/graph_adjacency_matrix.py
+++ b/graphs/graph_adjacency_matrix.py
@@ -469,7 +469,7 @@ def test_remove_edge(self) -> None:
(
undirected_graph,
directed_graph,
- random_vertices,
+ _random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
@@ -523,7 +523,7 @@ def test_add_vertex_exception_check(self) -> None:
undirected_graph,
directed_graph,
random_vertices,
- random_edges,
+ _random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
@@ -537,7 +537,7 @@ def test_remove_vertex_exception_check(self) -> None:
undirected_graph,
directed_graph,
random_vertices,
- random_edges,
+ _random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for i in range(101):
@@ -551,7 +551,7 @@ def test_add_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
- random_vertices,
+ _random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
@@ -590,7 +590,7 @@ def test_contains_edge_exception_check(self) -> None:
undirected_graph,
directed_graph,
random_vertices,
- random_edges,
+ _random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
diff --git a/knapsack/README.md b/knapsack/README.md
index f31e5f591412..686ea929255a 100644
--- a/knapsack/README.md
+++ b/knapsack/README.md
@@ -1,4 +1,4 @@
-# A naive recursive implementation of 0-1 Knapsack Problem
+# A recursive implementation of 0-N Knapsack Problem
This overview is taken from:
diff --git a/knapsack/knapsack.py b/knapsack/knapsack.py
index bb507be1ba3c..0648773c919f 100644
--- a/knapsack/knapsack.py
+++ b/knapsack/knapsack.py
@@ -1,14 +1,23 @@
-"""A naive recursive implementation of 0-1 Knapsack Problem
+"""A recursive implementation of 0-N Knapsack Problem
https://en.wikipedia.org/wiki/Knapsack_problem
"""
from __future__ import annotations
+from functools import lru_cache
-def knapsack(capacity: int, weights: list[int], values: list[int], counter: int) -> int:
+
+def knapsack(
+ capacity: int,
+ weights: list[int],
+ values: list[int],
+ counter: int,
+ allow_repetition=False,
+) -> int:
"""
Returns the maximum value that can be put in a knapsack of a capacity cap,
- whereby each weight w has a specific value val.
+ whereby each weight w has a specific value val
+ with option to allow repetitive selection of items
>>> cap = 50
>>> val = [60, 100, 120]
@@ -17,28 +26,40 @@ def knapsack(capacity: int, weights: list[int], values: list[int], counter: int)
>>> knapsack(cap, w, val, c)
220
- The result is 220 cause the values of 100 and 120 got the weight of 50
+ Given the repetition is NOT allowed,
+ the result is 220 cause the values of 100 and 120 got the weight of 50
which is the limit of the capacity.
+ >>> knapsack(cap, w, val, c, True)
+ 300
+
+ Given the repetition is allowed,
+ the result is 300 cause the values of 60*5 (pick 5 times)
+ got the weight of 10*5 which is the limit of the capacity.
"""
- # Base Case
- if counter == 0 or capacity == 0:
- return 0
-
- # If weight of the nth item is more than Knapsack of capacity,
- # then this item cannot be included in the optimal solution,
- # else return the maximum of two cases:
- # (1) nth item included
- # (2) not included
- if weights[counter - 1] > capacity:
- return knapsack(capacity, weights, values, counter - 1)
- else:
- left_capacity = capacity - weights[counter - 1]
- new_value_included = values[counter - 1] + knapsack(
- left_capacity, weights, values, counter - 1
- )
- without_new_value = knapsack(capacity, weights, values, counter - 1)
- return max(new_value_included, without_new_value)
+ @lru_cache
+ def knapsack_recur(capacity: int, counter: int) -> int:
+ # Base Case
+ if counter == 0 or capacity == 0:
+ return 0
+
+ # If weight of the nth item is more than Knapsack of capacity,
+ # then this item cannot be included in the optimal solution,
+ # else return the maximum of two cases:
+ # (1) nth item included only once (0-1), if allow_repetition is False
+ # nth item included one or more times (0-N), if allow_repetition is True
+ # (2) not included
+ if weights[counter - 1] > capacity:
+ return knapsack_recur(capacity, counter - 1)
+ else:
+ left_capacity = capacity - weights[counter - 1]
+ new_value_included = values[counter - 1] + knapsack_recur(
+ left_capacity, counter - 1 if not allow_repetition else counter
+ )
+ without_new_value = knapsack_recur(capacity, counter - 1)
+ return max(new_value_included, without_new_value)
+
+ return knapsack_recur(capacity, counter)
if __name__ == "__main__":
diff --git a/knapsack/tests/test_greedy_knapsack.py b/knapsack/tests/test_greedy_knapsack.py
index e6a40084109e..7ebaddd3c99e 100644
--- a/knapsack/tests/test_greedy_knapsack.py
+++ b/knapsack/tests/test_greedy_knapsack.py
@@ -28,7 +28,7 @@ def test_negative_max_weight(self):
# profit = [10, 20, 30, 40, 50, 60]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = -15
- pytest.raises(ValueError, match="max_weight must greater than zero.")
+ pytest.raises(ValueError, match=r"max_weight must greater than zero.")
def test_negative_profit_value(self):
"""
@@ -38,7 +38,7 @@ def test_negative_profit_value(self):
# profit = [10, -20, 30, 40, 50, 60]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = 15
- pytest.raises(ValueError, match="Weight can not be negative.")
+ pytest.raises(ValueError, match=r"Weight can not be negative.")
def test_negative_weight_value(self):
"""
@@ -48,7 +48,7 @@ def test_negative_weight_value(self):
# profit = [10, 20, 30, 40, 50, 60]
# weight = [2, -4, 6, -8, 10, 12]
# max_weight = 15
- pytest.raises(ValueError, match="Profit can not be negative.")
+ pytest.raises(ValueError, match=r"Profit can not be negative.")
def test_null_max_weight(self):
"""
@@ -58,7 +58,7 @@ def test_null_max_weight(self):
# profit = [10, 20, 30, 40, 50, 60]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = null
- pytest.raises(ValueError, match="max_weight must greater than zero.")
+ pytest.raises(ValueError, match=r"max_weight must greater than zero.")
def test_unequal_list_length(self):
"""
@@ -68,7 +68,9 @@ def test_unequal_list_length(self):
# profit = [10, 20, 30, 40, 50]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = 100
- pytest.raises(IndexError, match="The length of profit and weight must be same.")
+ pytest.raises(
+ IndexError, match=r"The length of profit and weight must be same."
+ )
if __name__ == "__main__":
diff --git a/knapsack/tests/test_knapsack.py b/knapsack/tests/test_knapsack.py
index 7bfb8780627b..80378aae4579 100644
--- a/knapsack/tests/test_knapsack.py
+++ b/knapsack/tests/test_knapsack.py
@@ -30,7 +30,7 @@ def test_base_case(self):
def test_easy_case(self):
"""
- test for the base case
+ test for the easy case
"""
cap = 3
val = [1, 2, 3]
@@ -48,6 +48,16 @@ def test_knapsack(self):
c = len(val)
assert k.knapsack(cap, w, val, c) == 220
+ def test_knapsack_repetition(self):
+ """
+ test for the knapsack repetition
+ """
+ cap = 50
+ val = [60, 100, 120]
+ w = [10, 20, 30]
+ c = len(val)
+ assert k.knapsack(cap, w, val, c, True) == 300
+
if __name__ == "__main__":
unittest.main()
diff --git a/linear_algebra/gaussian_elimination.py b/linear_algebra/gaussian_elimination.py
index 6f4075b710fd..cf816940b0d1 100644
--- a/linear_algebra/gaussian_elimination.py
+++ b/linear_algebra/gaussian_elimination.py
@@ -33,7 +33,7 @@ def retroactive_resolution(
[ 0.5]])
"""
- rows, columns = np.shape(coefficients)
+ rows, _columns = np.shape(coefficients)
x: NDArray[float64] = np.zeros((rows, 1), dtype=float)
for row in reversed(range(rows)):
diff --git a/linear_algebra/jacobi_iteration_method.py b/linear_algebra/jacobi_iteration_method.py
index 2cc9c103018b..0f9fcde7af6c 100644
--- a/linear_algebra/jacobi_iteration_method.py
+++ b/linear_algebra/jacobi_iteration_method.py
@@ -112,7 +112,7 @@ def jacobi_iteration_method(
(coefficient_matrix, constant_matrix), axis=1
)
- rows, cols = table.shape
+ rows, _cols = table.shape
strictly_diagonally_dominant(table)
@@ -149,7 +149,7 @@ def jacobi_iteration_method(
# Here we get 'i_col' - these are the column numbers, for each row
# without diagonal elements, except for the last column.
- i_row, i_col = np.where(masks)
+ _i_row, i_col = np.where(masks)
ind = i_col.reshape(-1, rows - 1)
#'i_col' is converted to a two-dimensional list 'ind', which will be
diff --git a/machine_learning/apriori_algorithm.py b/machine_learning/apriori_algorithm.py
index 09a89ac236bd..5c3e2baba2c2 100644
--- a/machine_learning/apriori_algorithm.py
+++ b/machine_learning/apriori_algorithm.py
@@ -11,6 +11,7 @@
Examples: https://www.kaggle.com/code/earthian/apriori-association-rules-mining
"""
+from collections import Counter
from itertools import combinations
@@ -44,11 +45,16 @@ def prune(itemset: list, candidates: list, length: int) -> list:
>>> prune(itemset, candidates, 3)
[]
"""
+ itemset_counter = Counter(tuple(item) for item in itemset)
pruned = []
for candidate in candidates:
is_subsequence = True
for item in candidate:
- if item not in itemset or itemset.count(item) < length - 1:
+ item_tuple = tuple(item)
+ if (
+ item_tuple not in itemset_counter
+ or itemset_counter[item_tuple] < length - 1
+ ):
is_subsequence = False
break
if is_subsequence:
diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py
index 72970431c3fc..b4df64796bb1 100644
--- a/machine_learning/decision_tree.py
+++ b/machine_learning/decision_tree.py
@@ -146,14 +146,13 @@ def predict(self, x):
"""
if self.prediction is not None:
return self.prediction
- elif self.left or self.right is not None:
+ elif self.left is not None and self.right is not None:
if x >= self.decision_boundary:
return self.right.predict(x)
else:
return self.left.predict(x)
else:
- print("Error: Decision tree not yet trained")
- return None
+ raise ValueError("Decision tree not yet trained")
class TestDecisionTree:
@@ -201,4 +200,4 @@ def main():
main()
import doctest
- doctest.testmod(name="mean_squarred_error", verbose=True)
+ doctest.testmod(name="mean_squared_error", verbose=True)
diff --git a/machine_learning/k_means_clust.py b/machine_learning/k_means_clust.py
index a926362fc18b..a55153628f9c 100644
--- a/machine_learning/k_means_clust.py
+++ b/machine_learning/k_means_clust.py
@@ -37,7 +37,13 @@
heterogeneity,
k
)
- 5. Transfers Dataframe into excel format it must have feature called
+ 5. Plot the labeled 3D data points with centroids.
+ plot_kmeans(
+ X,
+ centroids,
+ cluster_assignment
+ )
+ 6. Transfers Dataframe into excel format it must have feature called
'Clust' with k means clustering numbers in it.
"""
@@ -126,6 +132,19 @@ def plot_heterogeneity(heterogeneity, k):
plt.show()
+def plot_kmeans(data, centroids, cluster_assignment):
+ ax = plt.axes(projection="3d")
+ ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=cluster_assignment, cmap="viridis")
+ ax.scatter(
+ centroids[:, 0], centroids[:, 1], centroids[:, 2], c="red", s=100, marker="x"
+ )
+ ax.set_xlabel("X")
+ ax.set_ylabel("Y")
+ ax.set_zlabel("Z")
+ ax.set_title("3D K-Means Clustering Visualization")
+ plt.show()
+
+
def kmeans(
data, k, initial_centroids, maxiter=500, record_heterogeneity=None, verbose=False
):
@@ -193,6 +212,7 @@ def kmeans(
verbose=True,
)
plot_heterogeneity(heterogeneity, k)
+ plot_kmeans(dataset["data"], centroids, cluster_assignment)
def report_generator(
diff --git a/machine_learning/polynomial_regression.py b/machine_learning/polynomial_regression.py
index 212f40bea197..f52177df1292 100644
--- a/machine_learning/polynomial_regression.py
+++ b/machine_learning/polynomial_regression.py
@@ -93,7 +93,7 @@ def _design_matrix(data: np.ndarray, degree: int) -> np.ndarray:
...
ValueError: Data must have dimensions N x 1
"""
- rows, *remaining = data.shape
+ _rows, *remaining = data.shape
if remaining:
raise ValueError("Data must have dimensions N x 1")
diff --git a/machine_learning/principle_component_analysis.py b/machine_learning/principle_component_analysis.py
index 46ccdb968494..174500d89620 100644
--- a/machine_learning/principle_component_analysis.py
+++ b/machine_learning/principle_component_analysis.py
@@ -65,7 +65,7 @@ def main() -> None:
"""
Driver function to execute PCA and display results.
"""
- data_x, data_y = collect_dataset()
+ data_x, _data_y = collect_dataset()
# Number of principal components to retain
n_components = 2
diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
new file mode 100644
index 000000000000..d6f630149087
--- /dev/null
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -0,0 +1,178 @@
+"""
+t-distributed stochastic neighbor embedding (t-SNE)
+
+For more details, see:
+https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
+"""
+
+import doctest
+
+import numpy as np
+from numpy import ndarray
+from sklearn.datasets import load_iris
+
+
+def collect_dataset() -> tuple[ndarray, ndarray]:
+ """
+ Load the Iris dataset and return features and labels.
+
+ Returns:
+ tuple[ndarray, ndarray]: Feature matrix and target labels.
+
+ >>> features, targets = collect_dataset()
+ >>> features.shape
+ (150, 4)
+ >>> targets.shape
+ (150,)
+ """
+ iris_dataset = load_iris()
+ return np.array(iris_dataset.data), np.array(iris_dataset.target)
+
+
+def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray:
+ """
+ Compute high-dimensional affinities (P matrix) using a Gaussian kernel.
+
+ Args:
+ data_matrix: Input data of shape (n_samples, n_features).
+ sigma: Gaussian kernel bandwidth.
+
+ Returns:
+ ndarray: Symmetrized probability matrix.
+
+ >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
+ >>> probabilities = compute_pairwise_affinities(x)
+ >>> float(round(probabilities[0, 1], 3))
+ 0.25
+ """
+ n_samples = data_matrix.shape[0]
+ squared_sum = np.sum(np.square(data_matrix), axis=1)
+ squared_distance = np.add(
+ np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum
+ )
+
+ affinity_matrix = np.exp(-squared_distance / (2 * sigma**2))
+ np.fill_diagonal(affinity_matrix, 0)
+
+ affinity_matrix /= np.sum(affinity_matrix)
+ return (affinity_matrix + affinity_matrix.T) / (2 * n_samples)
+
+
+def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]:
+ """
+ Compute low-dimensional affinities (Q matrix) using a Student-t distribution.
+
+ Args:
+ embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components).
+
+ Returns:
+ tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix).
+
+ >>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
+ >>> q_matrix, numerators = compute_low_dim_affinities(y)
+ >>> q_matrix.shape
+ (2, 2)
+ """
+ squared_sum = np.sum(np.square(embedding_matrix), axis=1)
+ numerator_matrix = 1 / (
+ 1
+ + np.add(
+ np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T,
+ squared_sum,
+ )
+ )
+ np.fill_diagonal(numerator_matrix, 0)
+
+ q_matrix = numerator_matrix / np.sum(numerator_matrix)
+ return q_matrix, numerator_matrix
+
+
+def apply_tsne(
+ data_matrix: ndarray,
+ n_components: int = 2,
+ learning_rate: float = 200.0,
+ n_iter: int = 500,
+) -> ndarray:
+ """
+ Apply t-SNE for dimensionality reduction.
+
+ Args:
+ data_matrix: Original dataset (features).
+ n_components: Target dimension (2D or 3D).
+ learning_rate: Step size for gradient descent.
+ n_iter: Number of iterations.
+
+ Returns:
+ ndarray: Low-dimensional embedding of the data.
+
+ >>> features, _ = collect_dataset()
+ >>> embedding = apply_tsne(features, n_components=2, n_iter=50)
+ >>> embedding.shape
+ (150, 2)
+ """
+ if n_components < 1 or n_iter < 1:
+ raise ValueError("n_components and n_iter must be >= 1")
+
+ n_samples = data_matrix.shape[0]
+ rng = np.random.default_rng()
+ embedding = rng.standard_normal((n_samples, n_components)) * 1e-4
+
+ high_dim_affinities = compute_pairwise_affinities(data_matrix)
+ high_dim_affinities = np.maximum(high_dim_affinities, 1e-12)
+
+ embedding_increment = np.zeros_like(embedding)
+ momentum = 0.5
+
+ for iteration in range(n_iter):
+ low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding)
+ low_dim_affinities = np.maximum(low_dim_affinities, 1e-12)
+
+ affinity_diff = high_dim_affinities - low_dim_affinities
+
+ gradient = 4 * (
+ np.dot((affinity_diff * numerator_matrix), embedding)
+ - np.multiply(
+ np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis],
+ embedding,
+ )
+ )
+
+ embedding_increment = momentum * embedding_increment - learning_rate * gradient
+ embedding += embedding_increment
+
+ if iteration == int(n_iter / 4):
+ momentum = 0.8
+
+ return embedding
+
+
+def main() -> None:
+ """
+ Run t-SNE on the Iris dataset and display the first 5 embeddings.
+
+ >>> main() # doctest: +ELLIPSIS
+ t-SNE embedding (first 5 points):
+ [[...
+ """
+ features, _labels = collect_dataset()
+ embedding = apply_tsne(features, n_components=2, n_iter=300)
+
+ if not isinstance(embedding, np.ndarray):
+ raise TypeError("t-SNE embedding must be an ndarray")
+
+ print("t-SNE embedding (first 5 points):")
+ print(embedding[:5])
+
+ # Optional visualization (Ruff/mypy compliant)
+
+ # import matplotlib.pyplot as plt
+ # plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis")
+ # plt.title("t-SNE Visualization of the Iris Dataset")
+ # plt.xlabel("Dimension 1")
+ # plt.ylabel("Dimension 2")
+ # plt.show()
+
+
+if __name__ == "__main__":
+ doctest.testmod()
+ main()
diff --git a/machine_learning/xgboost_classifier.py b/machine_learning/xgboost_classifier.py
index 1da933cf690f..e845480074b9 100644
--- a/machine_learning/xgboost_classifier.py
+++ b/machine_learning/xgboost_classifier.py
@@ -42,8 +42,6 @@ def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier:
def main() -> None:
"""
- >>> main()
-
Url for the algorithm:
https://xgboost.readthedocs.io/en/stable/
Iris type dataset is used to demonstrate algorithm.
diff --git a/maths/chinese_remainder_theorem.py b/maths/chinese_remainder_theorem.py
index 18af63d106e8..b7a7712ae917 100644
--- a/maths/chinese_remainder_theorem.py
+++ b/maths/chinese_remainder_theorem.py
@@ -65,7 +65,7 @@ def invert_modulo(a: int, n: int) -> int:
1
"""
- (b, x) = extended_euclid(a, n)
+ (b, _x) = extended_euclid(a, n)
if b < 0:
b = (b % n + n) % n
return b
diff --git a/maths/factorial.py b/maths/factorial.py
index aaf90f384bb9..ba61447c7564 100644
--- a/maths/factorial.py
+++ b/maths/factorial.py
@@ -56,7 +56,7 @@ def factorial_recursive(n: int) -> int:
raise ValueError("factorial() only accepts integral values")
if n < 0:
raise ValueError("factorial() not defined for negative values")
- return 1 if n in {0, 1} else n * factorial(n - 1)
+ return 1 if n in {0, 1} else n * factorial_recursive(n - 1)
if __name__ == "__main__":
diff --git a/maths/fibonacci.py b/maths/fibonacci.py
index 24b2d7ae449e..71ff479f9cc2 100644
--- a/maths/fibonacci.py
+++ b/maths/fibonacci.py
@@ -183,7 +183,7 @@ def fib_memoization(n: int) -> list[int]:
"""
if n < 0:
raise ValueError("n is negative")
- # Cache must be outside recursuive function
+ # Cache must be outside recursive function
# other it will reset every time it calls itself.
cache: dict[int, int] = {0: 0, 1: 1, 2: 1} # Prefilled cache
diff --git a/maths/largest_of_very_large_numbers.py b/maths/largest_of_very_large_numbers.py
index edee50371e02..e38ab2edb932 100644
--- a/maths/largest_of_very_large_numbers.py
+++ b/maths/largest_of_very_large_numbers.py
@@ -15,7 +15,7 @@ def res(x, y):
>>> res(-1, 5)
Traceback (most recent call last):
...
- ValueError: math domain error
+ ValueError: expected a positive input
"""
if 0 not in (x, y):
# We use the relation x^y = y*log10(x), where 10 is the base.
diff --git a/maths/modular_division.py b/maths/modular_division.py
index 2f8f4479b27d..94f12b3e096e 100644
--- a/maths/modular_division.py
+++ b/maths/modular_division.py
@@ -31,7 +31,7 @@ def modular_division(a: int, b: int, n: int) -> int:
assert n > 1
assert a > 0
assert greatest_common_divisor(a, n) == 1
- (d, t, s) = extended_gcd(n, a) # Implemented below
+ (_d, _t, s) = extended_gcd(n, a) # Implemented below
x = (b * s) % n
return x
@@ -47,7 +47,7 @@ def invert_modulo(a: int, n: int) -> int:
1
"""
- (b, x) = extended_euclid(a, n) # Implemented below
+ (b, _x) = extended_euclid(a, n) # Implemented below
if b < 0:
b = (b % n + n) % n
return b
diff --git a/maths/monte_carlo.py b/maths/monte_carlo.py
index d174a0b188a2..5eb176238ffb 100644
--- a/maths/monte_carlo.py
+++ b/maths/monte_carlo.py
@@ -8,7 +8,7 @@
from statistics import mean
-def pi_estimator(iterations: int):
+def pi_estimator(iterations: int) -> None:
"""
An implementation of the Monte Carlo method used to find pi.
1. Draw a 2x2 square centred at (0,0).
diff --git a/maths/numerical_analysis/weierstrass_method.py b/maths/numerical_analysis/weierstrass_method.py
new file mode 100644
index 000000000000..b5a767af3a86
--- /dev/null
+++ b/maths/numerical_analysis/weierstrass_method.py
@@ -0,0 +1,97 @@
+from collections.abc import Callable
+
+import numpy as np
+
+
+def weierstrass_method(
+ polynomial: Callable[[np.ndarray], np.ndarray],
+ degree: int,
+ roots: np.ndarray | None = None,
+ max_iter: int = 100,
+) -> np.ndarray:
+ """
+ Approximates all complex roots of a polynomial using the
+ Weierstrass (Durand-Kerner) method.
+ Args:
+ polynomial: A function that takes a NumPy array of complex numbers and returns
+ the polynomial values at those points.
+ degree: Degree of the polynomial (number of roots to find). Must be ≥ 1.
+ roots: Optional initial guess as a NumPy array of complex numbers.
+ Must have length equal to 'degree'.
+ If None, perturbed complex roots of unity are used.
+ max_iter: Number of iterations to perform (default: 100).
+
+ Returns:
+ np.ndarray: Array of approximated complex roots.
+
+ Raises:
+ ValueError: If degree < 1, or if initial roots length doesn't match the degree.
+
+ Note:
+ - Root updates are clipped to prevent numerical overflow.
+
+ Example:
+ >>> import numpy as np
+ >>> def check(poly, degree, expected):
+ ... roots = weierstrass_method(poly, degree)
+ ... return np.allclose(np.sort(roots), np.sort(expected))
+
+ >>> check(
+ ... lambda x: x**2 - 1,
+ ... 2,
+ ... np.array([-1, 1]))
+ True
+
+ >>> check(
+ ... lambda x: x**3 - 4.5*x**2 + 5.75*x - 1.875,
+ ... 3,
+ ... np.array([1.5, 0.5, 2.5])
+ ... )
+ True
+
+ See Also:
+ https://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method
+ """
+
+ if degree < 1:
+ raise ValueError("Degree of the polynomial must be at least 1.")
+
+ if roots is None:
+ # Use perturbed complex roots of unity as initial guesses
+ rng = np.random.default_rng()
+ roots = np.array(
+ [
+ np.exp(2j * np.pi * i / degree) * (1 + 1e-3 * rng.random())
+ for i in range(degree)
+ ],
+ dtype=np.complex128,
+ )
+
+ else:
+ roots = np.asarray(roots, dtype=np.complex128)
+ if roots.shape[0] != degree:
+ raise ValueError(
+ "Length of initial roots must match the degree of the polynomial."
+ )
+
+ for _ in range(max_iter):
+ # Construct the product denominator for each root
+ denominator = np.array([root - roots for root in roots], dtype=np.complex128)
+ np.fill_diagonal(denominator, 1.0) # Avoid zero in diagonal
+ denominator = np.prod(denominator, axis=1)
+
+ # Evaluate polynomial at each root
+ numerator = polynomial(roots).astype(np.complex128)
+
+ # Compute update and clip to prevent overflow
+ delta = numerator / denominator
+ delta = np.clip(delta, -1e10, 1e10)
+ roots -= delta
+
+ return roots
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
diff --git a/maths/prime_factors.py b/maths/prime_factors.py
index 47abcf10e618..6eff57d12d17 100644
--- a/maths/prime_factors.py
+++ b/maths/prime_factors.py
@@ -47,6 +47,46 @@ def prime_factors(n: int) -> list[int]:
return factors
+def unique_prime_factors(n: int) -> list[int]:
+ """
+ Returns unique prime factors of n as a list.
+
+ >>> unique_prime_factors(0)
+ []
+ >>> unique_prime_factors(100)
+ [2, 5]
+ >>> unique_prime_factors(2560)
+ [2, 5]
+ >>> unique_prime_factors(10**-2)
+ []
+ >>> unique_prime_factors(0.02)
+ []
+ >>> unique_prime_factors(10**241)
+ [2, 5]
+ >>> unique_prime_factors(10**-354)
+ []
+ >>> unique_prime_factors('hello')
+ Traceback (most recent call last):
+ ...
+ TypeError: '<=' not supported between instances of 'int' and 'str'
+ >>> unique_prime_factors([1,2,'hello'])
+ Traceback (most recent call last):
+ ...
+ TypeError: '<=' not supported between instances of 'int' and 'list'
+ """
+ i = 2
+ factors = []
+ while i * i <= n:
+ if not n % i:
+ while not n % i:
+ n //= i
+ factors.append(i)
+ i += 1
+ if n > 1:
+ factors.append(n)
+ return factors
+
+
if __name__ == "__main__":
import doctest
diff --git a/maths/radix2_fft.py b/maths/radix2_fft.py
index ccd5cdcc0160..5efbccc7a17d 100644
--- a/maths/radix2_fft.py
+++ b/maths/radix2_fft.py
@@ -39,14 +39,14 @@ class FFT:
>>> x = FFT(A, B)
Print product
- >>> x.product # 2x + 3x^2 + 8x^3 + 4x^4 + 6x^5
+ >>> x.product # 2x + 3x^2 + 8x^3 + 6x^4 + 8x^5
[(-0-0j), (2+0j), (3-0j), (8-0j), (6+0j), (8+0j)]
__str__ test
>>> print(x)
- A = 0*x^0 + 1*x^1 + 2*x^0 + 3*x^2
- B = 0*x^2 + 1*x^3 + 2*x^4
- A*B = 0*x^(-0-0j) + 1*x^(2+0j) + 2*x^(3-0j) + 3*x^(8-0j) + 4*x^(6+0j) + 5*x^(8+0j)
+ A = 0*x^0 + 1*x^1 + 0*x^2 + 2*x^3
+ B = 2*x^0 + 3*x^1 + 4*x^2
+ A*B = (-0-0j)*x^0 + (2+0j)*x^1 + (3-0j)*x^2 + (8-0j)*x^3 + (6+0j)*x^4 + (8+0j)*x^5
"""
def __init__(self, poly_a=None, poly_b=None):
@@ -159,13 +159,13 @@ def __multiply(self):
# Overwrite __str__ for print(); Shows A, B and A*B
def __str__(self):
a = "A = " + " + ".join(
- f"{coef}*x^{i}" for coef, i in enumerate(self.polyA[: self.len_A])
+ f"{coef}*x^{i}" for i, coef in enumerate(self.polyA[: self.len_A])
)
b = "B = " + " + ".join(
- f"{coef}*x^{i}" for coef, i in enumerate(self.polyB[: self.len_B])
+ f"{coef}*x^{i}" for i, coef in enumerate(self.polyB[: self.len_B])
)
c = "A*B = " + " + ".join(
- f"{coef}*x^{i}" for coef, i in enumerate(self.product)
+ f"{coef}*x^{i}" for i, coef in enumerate(self.product)
)
return f"{a}\n{b}\n{c}"
diff --git a/maths/special_numbers/proth_number.py b/maths/special_numbers/proth_number.py
index 47747ed260f7..b9b827b6a5a2 100644
--- a/maths/special_numbers/proth_number.py
+++ b/maths/special_numbers/proth_number.py
@@ -59,6 +59,50 @@ def proth(number: int) -> int:
return proth_list[number - 1]
+def is_proth_number(number: int) -> bool:
+ """
+ :param number: positive integer number
+ :return: true if number is a Proth number, false otherwise
+ >>> is_proth_number(1)
+ False
+ >>> is_proth_number(2)
+ False
+ >>> is_proth_number(3)
+ True
+ >>> is_proth_number(4)
+ False
+ >>> is_proth_number(5)
+ True
+ >>> is_proth_number(34)
+ False
+ >>> is_proth_number(-1)
+ Traceback (most recent call last):
+ ...
+ ValueError: Input value of [number=-1] must be > 0
+ >>> is_proth_number(6.0)
+ Traceback (most recent call last):
+ ...
+ TypeError: Input value of [number=6.0] must be an integer
+ """
+ if not isinstance(number, int):
+ message = f"Input value of [{number=}] must be an integer"
+ raise TypeError(message)
+
+ if number <= 0:
+ message = f"Input value of [{number=}] must be > 0"
+ raise ValueError(message)
+
+ if number == 1:
+ return False
+
+ number -= 1
+ n = 0
+ while number % 2 == 0:
+ n += 1
+ number //= 2
+ return number < 2**n
+
+
if __name__ == "__main__":
import doctest
@@ -73,3 +117,9 @@ def proth(number: int) -> int:
continue
print(f"The {number}th Proth number: {value}")
+
+ for number in [1, 2, 3, 4, 5, 9, 13, 49, 57, 193, 241, 163, 201]:
+ if is_proth_number(number):
+ print(f"{number} is a Proth number")
+ else:
+ print(f"{number} is not a Proth number")
diff --git a/maths/test_factorial.py b/maths/test_factorial.py
index d80d88add745..1795ebba194f 100644
--- a/maths/test_factorial.py
+++ b/maths/test_factorial.py
@@ -33,5 +33,11 @@ def test_negative_number(function):
function(-3)
+@pytest.mark.parametrize("function", [factorial, factorial_recursive])
+def test_float_number(function):
+ with pytest.raises(ValueError):
+ function(1.5)
+
+
if __name__ == "__main__":
pytest.main(["-v", __file__])
diff --git a/maths/volume.py b/maths/volume.py
index 08bdf72b013b..1715c9c300d5 100644
--- a/maths/volume.py
+++ b/maths/volume.py
@@ -555,7 +555,7 @@ def main():
print(f"Torus: {vol_torus(2, 2) = }") # ~= 157.9
print(f"Conical Frustum: {vol_conical_frustum(2, 2, 4) = }") # ~= 58.6
print(f"Spherical cap: {vol_spherical_cap(1, 2) = }") # ~= 5.24
- print(f"Spheres intersetion: {vol_spheres_intersect(2, 2, 1) = }") # ~= 21.21
+ print(f"Spheres intersection: {vol_spheres_intersect(2, 2, 1) = }") # ~= 21.21
print(f"Spheres union: {vol_spheres_union(2, 2, 1) = }") # ~= 45.81
print(
f"Hollow Circular Cylinder: {vol_hollow_circular_cylinder(1, 2, 3) = }"
diff --git a/neural_network/convolution_neural_network.py b/neural_network/convolution_neural_network.py
index d4ac360a98de..6b1aa50c7981 100644
--- a/neural_network/convolution_neural_network.py
+++ b/neural_network/convolution_neural_network.py
@@ -317,7 +317,7 @@ def predict(self, datas_test):
print((" - - Shape: Test_Data ", np.shape(datas_test)))
for p in range(len(datas_test)):
data_test = np.asmatrix(datas_test[p])
- data_focus1, data_conved1 = self.convolute(
+ _data_focus1, data_conved1 = self.convolute(
data_test,
self.conv1,
self.w_conv1,
@@ -339,7 +339,7 @@ def predict(self, datas_test):
def convolution(self, data):
# return the data of image after convoluting process so we can check it out
data_test = np.asmatrix(data)
- data_focus1, data_conved1 = self.convolute(
+ _data_focus1, data_conved1 = self.convolute(
data_test,
self.conv1,
self.w_conv1,
diff --git a/project_euler/problem_009/sol4.py b/project_euler/problem_009/sol4.py
new file mode 100644
index 000000000000..a07d40ccb54d
--- /dev/null
+++ b/project_euler/problem_009/sol4.py
@@ -0,0 +1,60 @@
+"""
+Project Euler Problem 9: https://projecteuler.net/problem=9
+
+Special Pythagorean triplet
+
+A Pythagorean triplet is a set of three natural numbers, a < b < c, for which,
+
+ a^2 + b^2 = c^2.
+
+For example, 3^2 + 4^2 = 9 + 16 = 25 = 5^2.
+
+There exists exactly one Pythagorean triplet for which a + b + c = 1000.
+Find the product abc.
+
+References:
+ - https://en.wikipedia.org/wiki/Pythagorean_triple
+"""
+
+
+def get_squares(n: int) -> list[int]:
+ """
+ >>> get_squares(0)
+ []
+ >>> get_squares(1)
+ [0]
+ >>> get_squares(2)
+ [0, 1]
+ >>> get_squares(3)
+ [0, 1, 4]
+ >>> get_squares(4)
+ [0, 1, 4, 9]
+ """
+ return [number * number for number in range(n)]
+
+
+def solution(n: int = 1000) -> int:
+ """
+ Precomputing squares and checking if a^2 + b^2 is the square by set look-up.
+
+ >>> solution(12)
+ 60
+ >>> solution(36)
+ 1620
+ """
+
+ squares = get_squares(n)
+ squares_set = set(squares)
+ for a in range(1, n // 3):
+ for b in range(a + 1, (n - a) // 2 + 1):
+ if (
+ squares[a] + squares[b] in squares_set
+ and squares[n - a - b] == squares[a] + squares[b]
+ ):
+ return a * b * (n - a - b)
+
+ return -1
+
+
+if __name__ == "__main__":
+ print(f"{solution() = }")
diff --git a/project_euler/problem_073/sol1.py b/project_euler/problem_073/sol1.py
index 2b66b7d8769b..c39110252ccd 100644
--- a/project_euler/problem_073/sol1.py
+++ b/project_euler/problem_073/sol1.py
@@ -36,7 +36,12 @@ def solution(max_d: int = 12_000) -> int:
fractions_number = 0
for d in range(max_d + 1):
- for n in range(d // 3 + 1, (d + 1) // 2):
+ n_start = d // 3 + 1
+ n_step = 1
+ if d % 2 == 0:
+ n_start += 1 - n_start % 2
+ n_step = 2
+ for n in range(n_start, (d + 1) // 2, n_step):
if gcd(n, d) == 1:
fractions_number += 1
return fractions_number
diff --git a/project_euler/problem_551/sol1.py b/project_euler/problem_551/sol1.py
index 100e9d41dd31..e13cf77a776d 100644
--- a/project_euler/problem_551/sol1.py
+++ b/project_euler/problem_551/sol1.py
@@ -185,7 +185,7 @@ def solution(n: int = 10**15) -> int:
i = 1
dn = 0
while True:
- diff, terms_jumped = next_term(digits, 20, i + dn, n)
+ _diff, terms_jumped = next_term(digits, 20, i + dn, n)
dn += terms_jumped
if dn == n - i:
break
diff --git a/pyproject.toml b/pyproject.toml
index b680cc0d439e..f1559d6bc1b1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,25 +3,27 @@ name = "thealgorithms-python"
version = "0.0.1"
description = "TheAlgorithms in Python"
authors = [ { name = "TheAlgorithms Contributors" } ]
-requires-python = ">=3.13"
+requires-python = ">=3.14"
classifiers = [
"Programming Language :: Python :: 3 :: Only",
- "Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
]
dependencies = [
"beautifulsoup4>=4.12.3",
+ "cython>=3.1.2",
"fake-useragent>=1.5.1",
"httpx>=0.28.1",
"imageio>=2.36.1",
"keras>=3.7",
- "lxml>=5.3",
+ "lxml>=6",
"matplotlib>=3.9.3",
"numpy>=2.1.3",
"opencv-python>=4.10.0.84",
"pandas>=2.2.3",
- "pillow>=11",
+ "pillow>=11.3",
"rich>=13.9.4",
"scikit-learn>=1.5.2",
+ "scipy>=1.16.2",
"sphinx-pyproject>=0.3",
"statsmodels>=0.14.4",
"sympy>=1.13.3",
@@ -32,7 +34,7 @@ dependencies = [
[dependency-groups]
test = [
- "pytest>=8.3.4",
+ "pytest>=8.4.1",
"pytest-cov>=6",
]
@@ -47,7 +49,7 @@ euler-validate = [
]
[tool.ruff]
-target-version = "py313"
+target-version = "py314"
output-format = "full"
lint.select = [
@@ -108,7 +110,7 @@ lint.ignore = [
# `ruff rule S101` for a description of that rule
"B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME
"B905", # `zip()` without an explicit `strict=` parameter -- FIX ME
- "EM101", # Exception must not use a string literal, assign to variable first
+ "EM101", # Exception must not use a string literal, assign to a variable first
"EXE001", # Shebang is present but file is not executable -- DO NOT FIX
"G004", # Logging statement uses f-string
"ISC001", # Conflicts with ruff format -- DO NOT FIX
@@ -124,7 +126,7 @@ lint.ignore = [
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME
"SIM905", # Consider using a list literal instead of `str.split` -- DO NOT FIX
"SLF001", # Private member accessed: `_Iterator` -- FIX ME
- "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX
+ "UP037", # FIX ME
]
lint.per-file-ignores."data_structures/hashing/tests/test_hash_map.py" = [
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 66b5d8a6b94e..000000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-beautifulsoup4
-fake-useragent
-httpx
-imageio
-keras
-lxml
-matplotlib
-numpy
-opencv-python
-pandas
-pillow
-rich
-scikit-learn
-sphinx-pyproject
-statsmodels
-sympy
-tweepy
-typing_extensions
-xgboost
diff --git a/scheduling/multi_level_feedback_queue.py b/scheduling/multi_level_feedback_queue.py
index abee3c85c5a5..58ba2afa0e67 100644
--- a/scheduling/multi_level_feedback_queue.py
+++ b/scheduling/multi_level_feedback_queue.py
@@ -255,7 +255,7 @@ def multi_level_feedback_queue(self) -> deque[Process]:
# all queues except last one have round_robin algorithm
for i in range(self.number_of_queues - 1):
- finished, self.ready_queue = self.round_robin(
+ _finished, self.ready_queue = self.round_robin(
self.ready_queue, self.time_slices[i]
)
# the last queue has first_come_first_served algorithm
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 000000000000..92ebf3a7e8ba
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,27 @@
+Dealing with the onslaught of Hacktoberfest
+* https://hacktoberfest.com
+
+Each year, October brings a swarm of new contributors participating in Hacktoberfest. This event has its pros and cons, but it presents a monumental workload for the few active maintainers of this repo. The maintainer workload is further impacted by a new version of CPython being released in the first week of each October.
+
+To help make our algorithms more valuable to visitors, our CONTRIBUTING.md file outlines several strict requirements, such as tests, type hints, descriptive names, functions, and/or classes. Maintainers reviewing pull requests should try to encourage improvements to meet these goals, but when the workload becomes overwhelming (esp. in October), pull requests that do not meet these goals should be closed.
+
+Below are a few [`gh`](https://cli.github.com) scripts that should close pull requests that do not match the definition of an acceptable algorithm as defined in CONTRIBUTING.md. I tend to run these scripts in the following order.
+
+* close_pull_requests_with_require_descriptive_names.sh
+* close_pull_requests_with_require_tests.sh
+* close_pull_requests_with_require_type_hints.sh
+* close_pull_requests_with_failing_tests.sh
+* close_pull_requests_with_awaiting_changes.sh
+* find_git_conflicts.sh
+
+### Run on 14 Oct 2025: 107 of 541 (19.77%) pull requests closed.
+
+Script run | Open pull requests | Pull requests closed
+--- | --- | ---
+None | 541 | 0
+require_descriptive_names | 515 | 26
+require_tests | 498 | 17
+require_type_hints | 496 | 2
+failing_tests | 438 | ___58___
+awaiting_changes | 434 | 4
+git_conflicts | [ broken ] | 0
diff --git a/scripts/build_directory_md.py b/scripts/build_directory_md.py
index aa95b95db4b5..bdad7686c7e3 100755
--- a/scripts/build_directory_md.py
+++ b/scripts/build_directory_md.py
@@ -18,8 +18,20 @@ def good_file_paths(top_dir: str = ".") -> Iterator[str]:
yield os.path.join(dir_path, filename).lstrip("./")
-def md_prefix(i):
- return f"{i * ' '}*" if i else "\n##"
+def md_prefix(indent: int) -> str:
+ """
+ Markdown prefix based on indent for bullet points
+
+ >>> md_prefix(0)
+ '\\n##'
+ >>> md_prefix(1)
+ ' *'
+ >>> md_prefix(2)
+ ' *'
+ >>> md_prefix(3)
+ ' *'
+ """
+ return f"{indent * ' '}*" if indent else "\n##"
def print_path(old_path: str, new_path: str) -> str:
diff --git a/sorts/binary_insertion_sort.py b/sorts/binary_insertion_sort.py
index 50653a99e7ce..b928316a849d 100644
--- a/sorts/binary_insertion_sort.py
+++ b/sorts/binary_insertion_sort.py
@@ -56,7 +56,7 @@ def binary_insertion_sort(collection: list) -> list:
return collection
-if __name__ == "__main":
+if __name__ == "__main__":
user_input = input("Enter numbers separated by a comma:\n").strip()
try:
unsorted = [int(item) for item in user_input.split(",")]
diff --git a/sorts/bucket_sort.py b/sorts/bucket_sort.py
index 1c1320a58a7d..893c7ff3a23a 100644
--- a/sorts/bucket_sort.py
+++ b/sorts/bucket_sort.py
@@ -51,12 +51,35 @@ def bucket_sort(my_list: list, bucket_count: int = 10) -> list:
>>> collection = random.sample(range(-50, 50), 50)
>>> bucket_sort(collection) == sorted(collection)
True
+ >>> data = [1, 2, 2, 1, 1, 3]
+ >>> bucket_sort(data) == sorted(data)
+ True
+ >>> data = [5, 5, 5, 5, 5]
+ >>> bucket_sort(data) == sorted(data)
+ True
+ >>> data = [1000, -1000, 500, -500, 0]
+ >>> bucket_sort(data) == sorted(data)
+ True
+ >>> data = [5.5, 2.2, -1.1, 3.3, 0.0]
+ >>> bucket_sort(data) == sorted(data)
+ True
+ >>> bucket_sort([1]) == [1]
+ True
+ >>> data = [-1.1, -1.5, -3.4, 2.5, 3.6, -3.3]
+ >>> bucket_sort(data) == sorted(data)
+ True
+ >>> data = [9, 2, 7, 1, 5]
+ >>> bucket_sort(data) == sorted(data)
+ True
"""
if len(my_list) == 0 or bucket_count <= 0:
return []
min_value, max_value = min(my_list), max(my_list)
+ if min_value == max_value:
+ return my_list
+
bucket_size = (max_value - min_value) / bucket_count
buckets: list[list] = [[] for _ in range(bucket_count)]
@@ -73,3 +96,6 @@ def bucket_sort(my_list: list, bucket_count: int = 10) -> list:
testmod()
assert bucket_sort([4, 5, 3, 2, 1]) == [1, 2, 3, 4, 5]
assert bucket_sort([0, 1, -10, 15, 2, -2]) == [-10, -2, 0, 1, 2, 15]
+ assert bucket_sort([1.1, 1.2, -1.2, 0, 2.4]) == [-1.2, 0, 1.1, 1.2, 2.4]
+ assert bucket_sort([5, 5, 5, 5, 5]) == [5, 5, 5, 5, 5]
+ assert bucket_sort([-5, -1, -6, -2]) == [-6, -5, -2, -1]
diff --git a/sorts/comb_sort.py b/sorts/comb_sort.py
index 3c8b1e99a454..94ad8f533328 100644
--- a/sorts/comb_sort.py
+++ b/sorts/comb_sort.py
@@ -5,8 +5,7 @@
Comb sort improves on bubble sort algorithm.
In bubble sort, distance (or gap) between two compared elements is always one.
Comb sort improvement is that gap can be much more than 1, in order to prevent slowing
-down by small values
-at the end of a list.
+down by small values at the end of a list.
More info on: https://en.wikipedia.org/wiki/Comb_sort
diff --git a/sorts/cyclic_sort.py b/sorts/cyclic_sort.py
new file mode 100644
index 000000000000..9e81291548d4
--- /dev/null
+++ b/sorts/cyclic_sort.py
@@ -0,0 +1,55 @@
+"""
+This is a pure Python implementation of the Cyclic Sort algorithm.
+
+For doctests run following command:
+python -m doctest -v cyclic_sort.py
+or
+python3 -m doctest -v cyclic_sort.py
+For manual testing run:
+python cyclic_sort.py
+or
+python3 cyclic_sort.py
+"""
+
+
+def cyclic_sort(nums: list[int]) -> list[int]:
+ """
+ Sorts the input list of n integers from 1 to n in-place
+ using the Cyclic Sort algorithm.
+
+ :param nums: List of n integers from 1 to n to be sorted.
+ :return: The same list sorted in ascending order.
+
+ Time complexity: O(n), where n is the number of integers in the list.
+
+ Examples:
+ >>> cyclic_sort([])
+ []
+ >>> cyclic_sort([3, 5, 2, 1, 4])
+ [1, 2, 3, 4, 5]
+ """
+
+ # Perform cyclic sort
+ index = 0
+ while index < len(nums):
+ # Calculate the correct index for the current element
+ correct_index = nums[index] - 1
+ # If the current element is not at its correct position,
+ # swap it with the element at its correct index
+ if index != correct_index:
+ nums[index], nums[correct_index] = nums[correct_index], nums[index]
+ else:
+ # If the current element is already in its correct position,
+ # move to the next element
+ index += 1
+
+ return nums
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
+ user_input = input("Enter numbers separated by a comma:\n").strip()
+ unsorted = [int(item) for item in user_input.split(",")]
+ print(*cyclic_sort(unsorted), sep=",")
diff --git a/sorts/merge_sort.py b/sorts/merge_sort.py
index 0628b848b794..11c202788035 100644
--- a/sorts/merge_sort.py
+++ b/sorts/merge_sort.py
@@ -18,6 +18,7 @@ def merge_sort(collection: list) -> list:
:return: The same collection ordered in ascending order.
Time Complexity: O(n log n)
+ Space Complexity: O(n)
Examples:
>>> merge_sort([0, 5, 3, 2, 2])
diff --git a/sorts/stalin_sort.py b/sorts/stalin_sort.py
new file mode 100644
index 000000000000..6dd5708c7f01
--- /dev/null
+++ b/sorts/stalin_sort.py
@@ -0,0 +1,47 @@
+"""
+Stalin Sort algorithm: Removes elements that are out of order.
+Elements that are not greater than or equal to the previous element are discarded.
+Reference: https://medium.com/@kaweendra/the-ultimate-sorting-algorithm-6513d6968420
+"""
+
+
+def stalin_sort(sequence: list[int]) -> list[int]:
+ """
+ Sorts a list using the Stalin sort algorithm.
+
+ >>> stalin_sort([4, 3, 5, 2, 1, 7])
+ [4, 5, 7]
+
+ >>> stalin_sort([1, 2, 3, 4])
+ [1, 2, 3, 4]
+
+ >>> stalin_sort([4, 5, 5, 2, 3])
+ [4, 5, 5]
+
+ >>> stalin_sort([6, 11, 12, 4, 1, 5])
+ [6, 11, 12]
+
+ >>> stalin_sort([5, 0, 4, 3])
+ [5]
+
+ >>> stalin_sort([5, 4, 3, 2, 1])
+ [5]
+
+ >>> stalin_sort([1, 2, 3, 4, 5])
+ [1, 2, 3, 4, 5]
+
+ >>> stalin_sort([1, 2, 8, 7, 6])
+ [1, 2, 8]
+ """
+ result = [sequence[0]]
+ for element in sequence[1:]:
+ if element >= result[-1]:
+ result.append(element)
+
+ return result
+
+
+if __name__ == "__main__":
+ import doctest
+
+ doctest.testmod()
diff --git a/strings/anagrams.py b/strings/anagrams.py
index fb9ac0bd1f45..71cc142fb2ad 100644
--- a/strings/anagrams.py
+++ b/strings/anagrams.py
@@ -6,19 +6,26 @@
def signature(word: str) -> str:
- """Return a word sorted
+ """
+ Return a word's frequency-based signature.
+
>>> signature("test")
- 'estt'
+ 'e1s1t2'
>>> signature("this is a test")
- ' aehiisssttt'
+ ' 3a1e1h1i2s3t3'
>>> signature("finaltest")
- 'aefilnstt'
+ 'a1e1f1i1l1n1s1t2'
"""
- return "".join(sorted(word))
+ frequencies = collections.Counter(word)
+ return "".join(
+ f"{char}{frequency}" for char, frequency in sorted(frequencies.items())
+ )
def anagram(my_word: str) -> list[str]:
- """Return every anagram of the given word
+ """
+ Return every anagram of the given word from the dictionary.
+
>>> anagram('test')
['sett', 'stet', 'test']
>>> anagram('this is a test')
@@ -40,5 +47,5 @@ def anagram(my_word: str) -> list[str]:
all_anagrams = {word: anagram(word) for word in word_list if len(anagram(word)) > 1}
with open("anagrams.txt", "w") as file:
- file.write("all_anagrams = \n ")
+ file.write("all_anagrams = \n")
file.write(pprint.pformat(all_anagrams))
diff --git a/strings/capitalize.py b/strings/capitalize.py
index c0b45e0d9614..628ebffc8852 100644
--- a/strings/capitalize.py
+++ b/strings/capitalize.py
@@ -1,6 +1,3 @@
-from string import ascii_lowercase, ascii_uppercase
-
-
def capitalize(sentence: str) -> str:
"""
Capitalizes the first letter of a sentence or word.
@@ -19,11 +16,9 @@ def capitalize(sentence: str) -> str:
if not sentence:
return ""
- # Create a dictionary that maps lowercase letters to uppercase letters
# Capitalize the first character if it's a lowercase letter
# Concatenate the capitalized character with the rest of the string
- lower_to_upper = dict(zip(ascii_lowercase, ascii_uppercase))
- return lower_to_upper.get(sentence[0], sentence[0]) + sentence[1:]
+ return sentence[0].upper() + sentence[1:]
if __name__ == "__main__":
diff --git a/strings/edit_distance.py b/strings/edit_distance.py
index e842c8555c8e..77ed23037937 100644
--- a/strings/edit_distance.py
+++ b/strings/edit_distance.py
@@ -14,6 +14,20 @@ def edit_distance(source: str, target: str) -> int:
>>> edit_distance("GATTIC", "GALTIC")
1
+ >>> edit_distance("NUM3", "HUM2")
+ 2
+ >>> edit_distance("cap", "CAP")
+ 3
+ >>> edit_distance("Cat", "")
+ 3
+ >>> edit_distance("cat", "cat")
+ 0
+ >>> edit_distance("", "123456789")
+ 9
+ >>> edit_distance("Be@uty", "Beautyyyy!")
+ 5
+ >>> edit_distance("lstring", "lsstring")
+ 1
"""
if len(source) == 0:
return len(target)
diff --git a/strings/palindrome.py b/strings/palindrome.py
index bfdb3ddcf396..e765207e5942 100644
--- a/strings/palindrome.py
+++ b/strings/palindrome.py
@@ -11,6 +11,8 @@
"BB": True,
"ABC": False,
"amanaplanacanalpanama": True, # "a man a plan a canal panama"
+ "abcdba": False,
+ "AB": False,
}
# Ensure our test data is valid
assert all((key == key[::-1]) is value for key, value in test_data.items())
@@ -61,7 +63,7 @@ def is_palindrome_recursive(s: str) -> bool:
>>> all(is_palindrome_recursive(key) is value for key, value in test_data.items())
True
"""
- if len(s) <= 2:
+ if len(s) <= 1:
return True
if s[0] == s[len(s) - 1]:
return is_palindrome_recursive(s[1:-1])
diff --git a/web_programming/covid_stats_via_xpath.py b/web_programming/covid_stats_via_xpath.py
index f7db51b63169..88a248610441 100644
--- a/web_programming/covid_stats_via_xpath.py
+++ b/web_programming/covid_stats_via_xpath.py
@@ -1,7 +1,8 @@
"""
-This is to show simple COVID19 info fetching from worldometers site using lxml
-* The main motivation to use lxml in place of bs4 is that it is faster and therefore
-more convenient to use in Python web projects (e.g. Django or Flask-based)
+This script demonstrates fetching simple COVID-19 statistics from the
+Worldometers archive site using lxml. lxml is chosen over BeautifulSoup
+for its speed and convenience in Python web projects (such as Django or
+Flask).
"""
# /// script
@@ -19,19 +20,40 @@
class CovidData(NamedTuple):
- cases: int
- deaths: int
- recovered: int
+ cases: str
+ deaths: str
+ recovered: str
-def covid_stats(url: str = "https://www.worldometers.info/coronavirus/") -> CovidData:
+def covid_stats(
+ url: str = (
+ "https://web.archive.org/web/20250825095350/"
+ "https://www.worldometers.info/coronavirus/"
+ ),
+) -> CovidData:
xpath_str = '//div[@class = "maincounter-number"]/span/text()'
- return CovidData(
- *html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str)
+ try:
+ response = httpx.get(url, timeout=10).raise_for_status()
+ except httpx.TimeoutException:
+ print(
+ "Request timed out. Please check your network connection "
+ "or try again later."
+ )
+ return CovidData("N/A", "N/A", "N/A")
+ except httpx.HTTPStatusError as e:
+ print(f"HTTP error occurred: {e}")
+ return CovidData("N/A", "N/A", "N/A")
+ data = html.fromstring(response.content).xpath(xpath_str)
+ if len(data) != 3:
+ print("Unexpected data format. The page structure may have changed.")
+ data = "N/A", "N/A", "N/A"
+ return CovidData(*data)
+
+
+if __name__ == "__main__":
+ fmt = (
+ "Total COVID-19 cases in the world: {}\n"
+ "Total deaths due to COVID-19 in the world: {}\n"
+ "Total COVID-19 patients recovered in the world: {}"
)
-
-
-fmt = """Total COVID-19 cases in the world: {}
-Total deaths due to COVID-19 in the world: {}
-Total COVID-19 patients recovered in the world: {}"""
-print(fmt.format(*covid_stats()))
+ print(fmt.format(*covid_stats()))