Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions integration_tests/sonar/test_sonar_tempfile_mktemp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
class TestTempfileMktemp(SonarIntegrationTest):
codemod = SonarTempfileMktemp
code_path = "tests/samples/tempfile_mktemp.py"
replacement_lines = [(3, "tempfile.mkstemp()\n")]
expected_diff = "--- \n+++ \n@@ -1,3 +1,3 @@\n import tempfile\n \n-tempfile.mktemp()\n+tempfile.mkstemp()\n"
replacement_lines = [
(3, "with tempfile.NamedTemporaryFile(delete=False) as tf:\n"),
(4, " filename = tf.name\n"),
]
expected_diff = "--- \n+++ \n@@ -1,3 +1,4 @@\n import tempfile\n \n-filename = tempfile.mktemp()\n+with tempfile.NamedTemporaryFile(delete=False) as tf:\n+ filename = tf.name\n"
expected_line_change = "3"
change_description = TempfileMktempTransformer.change_description
10 changes: 6 additions & 4 deletions integration_tests/test_tempfile_mktemp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ class TestTempfileMktemp(BaseIntegrationTest):
original_code = """
import tempfile

tempfile.mktemp()
var = "hello"
filename = tempfile.mktemp()
"""
replacement_lines = [(3, "tempfile.mkstemp()\n")]
expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n import tempfile\n \n-tempfile.mktemp()\n+tempfile.mkstemp()\n var = "hello"\n'
replacement_lines = [
(3, "with tempfile.NamedTemporaryFile(delete=False) as tf:\n"),
(4, " filename = tf.name\n"),
]
expected_diff = "--- \n+++ \n@@ -1,3 +1,4 @@\n import tempfile\n \n-filename = tempfile.mktemp()\n+with tempfile.NamedTemporaryFile(delete=False) as tf:\n+ filename = tf.name\n"
expected_line_change = "3"
change_description = TempfileMktempTransformer.change_description
2 changes: 1 addition & 1 deletion src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class DocMetadata:
),
"secure-tempfile": DocMetadata(
importance="High",
guidance_explained="We believe this codemod is safe and will cause no unexpected errors.",
guidance_explained="We believe this codemod is safe. You should review this code before merging to make sure temporary files are created, used, and closed according to your expectations.",
),
"upgrade-sslcontext-minimum-version": DocMetadata(
importance="High",
Expand Down
14 changes: 8 additions & 6 deletions src/core_codemods/docs/pixee_python_secure-tempfile.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
This codemod replaces all `tempfile.mktemp` calls to the more secure `tempfile.mkstemp`.
This codemod replaces all `tempfile.mktemp` calls with the more secure `tempfile.NamedTemporaryFile`

The Python [tempfile documentation](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) is explicit
that `tempfile.mktemp` should be deprecated to avoid an unsafe and unexpected race condition.
The changes from this codemod look like this:
The Python [tempfile documentation](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) is explicit that `tempfile.mktemp` should be deprecated to avoid an unsafe and unexpected race condition. `tempfile.mktemp` does not handle the possibility that the returned file name could already be used by another process by the time your code opens the file. A more secure approach to create temporary files is to use `tempfile.NamedTemporaryFile` which will create the file for you and handle all security conditions.

The changes from this codemod look like this:

```diff
import tempfile
- tempfile.mktemp(...)
+ tempfile.mkstemp(...)
- filename = tempfile.mktemp()
+ with tempfile.NamedTemporaryFile(delete=False) as tf:
+ filename = tf.name
```

The change sets `delete=False` to closely follow your code's intention when calling `tempfile.mktemp`. However, you should use this as a starting point to determine when your temporary file should be deleted.
126 changes: 106 additions & 20 deletions src/core_codemods/tempfile_mktemp.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,130 @@
from textwrap import dedent
from typing import Optional

import libcst as cst
from libcst import matchers

from codemodder.codemods.libcst_transformer import (
LibcstResultTransformer,
LibcstTransformerPipeline,
)
from codemodder.codemods.semgrep import SemgrepRuleDetector
from codemodder.codemods.utils_mixin import NameResolutionMixin
from codemodder.codemods.utils_mixin import NameAndAncestorResolutionMixin
from codemodder.utils.utils import clean_simplestring
from core_codemods.api import CoreCodemod, Metadata, Reference, ReviewGuidance


class TempfileMktempTransformer(LibcstResultTransformer, NameResolutionMixin):
class TempfileMktempTransformer(
LibcstResultTransformer, NameAndAncestorResolutionMixin
):
change_description = "Replaces `tempfile.mktemp` with `tempfile.mkstemp`."
_module_name = "tempfile"

def on_result_found(self, original_node, updated_node):
maybe_name = self.get_aliased_prefix_name(original_node, self._module_name)
if (maybe_name := maybe_name or self._module_name) == self._module_name:
self.add_needed_import(self._module_name)
self.remove_unused_import(original_node)
return self.update_call_target(updated_node, maybe_name, "mkstemp")
def leave_SimpleStatementLine(self, original_node, updated_node):
match original_node:
case cst.SimpleStatementLine(body=[bsstmt]):
return self.check_mktemp(original_node, bsstmt)
return updated_node

def check_mktemp(
self, original_node: cst.SimpleStatementLine, bsstmt: cst.BaseSmallStatement
) -> cst.SimpleStatementLine | cst.FlattenSentinel:
if maybe_tuple := self._is_assigned_to_mktemp(bsstmt): # type: ignore
assign_name, call = maybe_tuple
return self.report_and_change(call, assign_name)
if maybe_tuple := self._mktemp_is_sink(bsstmt):
wrapper_func_name, call = maybe_tuple
return self.report_and_change(call, wrapper_func_name, assignment=False)
return original_node

def report_and_change(
self, node: cst.Call, name: cst.Name, assignment=True
) -> cst.FlattenSentinel:
self.report_change(node)
self.add_needed_import(self._module_name)
self.remove_unused_import(node)
with_block = (
f"{name.value} = tf.name" if assignment else f"{name.value}(tf.name)"
)
new_stmt = dedent(
f"""
with tempfile.NamedTemporaryFile({self._make_args(node)}) as tf:
{with_block}
"""
Comment on lines +45 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means the file will be closed as soon as it is created. It needs to account for any references after the assignment and include them in the block.

I remember tackling this problem in the file-resource-leak codemod, so you can refer to it for a partial solution.
An ideal solution would just include as far as the last reference goes, but a simple one is just to move every statement after the assignment to the with block. You can get those by just grabbing all the statements in the parent cst.Module | cst.IndentedBlock with index greater than the assignment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to understand your comment in the context of the current _make_args. If you look at all unit tests, every one uses delete=False. I purposefully didn't account for more complex cases in this iteration. Does that address your concern or did I miss something?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for not being clear, let me try with an example.

Consider the following:

tmp_file = open(tempfile.mktemp(), "w+")
tmp_file.write("text")
print(tmp_file.name)

If I read the code correctly, the codemod will produce:

with tempfile.NamedTemporaryFile("w+") as tf:
    tmp_file = tf.name
tmp_file.write("text")
print(tmp_file.name)

Which means the file will be closed as soon as the assignment is done (due to the with statement). This ends with an error at the temp_file.write("text") (due to the file being closed).

My point is that the resulting code is quite useless as it only creates the file. Nothing else can be done with it since the file is closed almost as soon as it is created.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The codemod isn't implemented to change your test example. It short circuits early because it doesn't match the pattern I've asked it for which is much more specific than what you have. In fact, there are similar tests all under the xfailed class, but I've added your test example as a "no_change" test to demonstrate this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually I think we can/should make this codemod smarter and allow it to function much the same way as the resource leak codemod. But for now my understanding is that it will simply replace usages of tempfile.mktemp() in order to get a temporary filename.

Can we make sure to add an issue for the more sophisticated behavior if it doesn't already exist?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly! I communicated this at our previous meeting, this is iterative and this PR addresses only the first easiest cases as a way to demonstrate that we're aware the original codemod was just wrong. Will ticket the rest.

).rstrip()
return cst.FlattenSentinel(
[
cst.parse_statement(new_stmt),
]
)

def _make_args(self, node: cst.Call) -> str:
"""Convert args passed to tempfile.mktemp() to string for args to tempfile.NamedTemporaryFile"""

default = "delete=False"
if not node.args:
return default
new_args = ""
arg_keys = ("suffix", "prefix", "dir")
for idx, arg in enumerate(node.args):
cst.ensure_type(val := arg.value, cst.SimpleString)
new_args += f'{arg_keys[idx]}="{clean_simplestring(val)}", '
return f"{new_args}{default}"

def _is_assigned_to_mktemp(
self, bsstmt: cst.BaseSmallStatement
) -> Optional[tuple[cst.Name, cst.Call]]:
match bsstmt:
case cst.Assign(value=value, targets=targets):
maybe_value = self._is_mktemp_call(value) # type: ignore
if maybe_value and all(
map(
lambda t: matchers.matches(
t, matchers.AssignTarget(target=matchers.Name())
),
targets, # type: ignore
)
):
# # Todo: handle multiple potential targets
return (targets[0].target, maybe_value)
case cst.AnnAssign(target=target, value=value):
maybe_value = self._is_mktemp_call(value) # type: ignore
if maybe_value and isinstance(target, cst.Name): # type: ignore
return (target, maybe_value)
return None

def _is_mktemp_call(self, value) -> Optional[cst.Call]:
match value:
case cst.Call() if self.find_base_name(value.func) == "tempfile.mktemp":
return value
return None

def _mktemp_is_sink(
self, bsstmt: cst.BaseSmallStatement
) -> Optional[tuple[cst.Name, cst.Call]]:
match bsstmt:
case cst.Expr(value=cst.Call() as call):
if not (args := call.args):
return None

# todo: handle more complex cases of mktemp in different arg pos
match first_arg_call := args[0].value:
case cst.Call():
if maybe_value := self._is_mktemp_call(first_arg_call): # type: ignore
wrapper_func = call.func
return (wrapper_func, maybe_value)
return None


TempfileMktemp = CoreCodemod(
metadata=Metadata(
name="secure-tempfile",
summary="Upgrade and Secure Temp File Creation",
review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW,
review_guidance=ReviewGuidance.MERGE_AFTER_REVIEW,
references=[
Reference(
url="https://docs.python.org/3/library/tempfile.html#tempfile.mktemp"
),
],
),
detector=SemgrepRuleDetector(
"""
rules:
- patterns:
- pattern: tempfile.mktemp(...)
- pattern-inside: |
import tempfile
...
"""
),
transformer=LibcstTransformerPipeline(TempfileMktempTransformer),
)
5 changes: 3 additions & 2 deletions tests/codemods/sonar/test_sonar_tempfile_mktemp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ def test_simple(self, tmpdir):
input_code = """
import tempfile

tempfile.mktemp()
filename = tempfile.mktemp()
"""
expected = """
import tempfile

tempfile.mkstemp()
with tempfile.NamedTemporaryFile(delete=False) as tf:
filename = tf.name
"""
issues = {
"issues": [
Expand Down
Loading