-
-
Notifications
You must be signed in to change notification settings - Fork 26.2k
Closed
Description
Describe the bug
I have converted the object dtype to category but it keeps hitting this error. I try LinearRegression
and hits the same error.
Steps/Code to Reproduce
https://github.com/khteh/JupyterNotebooks/blob/master/ZTM/BulldozerPricingRegression.ipynb
Expected Results
No error, duh!
Actual Results
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/arrays/categorical.py:592, in Categorical.astype(self, dtype, copy)
591 try:
--> 592 new_cats = new_cats.astype(dtype=dtype, copy=copy)
593 fill_value = self.categories._na_value
ValueError: could not convert string to float: '100C'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[205], line 1
----> 1 get_ipython().run_cell_magic('time', '', '#print("\\nX_train.info:")\n#X_train.info()\n#print("\\nY_train.info:")\n#Y_train.info()\nY_train.head()\nmodel = RandomForestRegressor(n_jobs=-1)\nmodel.fit(X=X_train, y=Y_train)\n')
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/IPython/core/interactiveshell.py:2565, in InteractiveShell.run_cell_magic(self, magic_name, line, cell)
2563 with self.builtin_trap:
2564 args = (magic_arg_s, cell)
-> 2565 result = fn(*args, **kwargs)
2567 # The code below prevents the output from being displayed
2568 # when using magics with decorator @output_can_be_silenced
2569 # when the last Python token in the expression is a ';'.
2570 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/IPython/core/magics/execution.py:1470, in ExecutionMagics.time(self, line, cell, local_ns)
1468 if interrupt_occured:
1469 if exit_on_interrupt and captured_exception:
-> 1470 raise captured_exception
1471 return
1472 return out
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/IPython/core/magics/execution.py:1439, in ExecutionMagics.time(self, line, cell, local_ns)
1437 if expr_val is not None:
1438 code_2 = self.shell.compile(expr_val, source, 'eval')
-> 1439 out = eval(code_2, glob, local_ns)
1440 except KeyboardInterrupt as e:
1441 captured_exception = e
File <timed exec>:7
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/sklearn/base.py:1365, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1358 estimator._validate_params()
1360 with config_context(
1361 skip_parameter_validation=(
1362 prefer_skip_nested_validation or global_skip_validation
1363 )
1364 ):
-> 1365 return fit_method(estimator, *args, **kwargs)
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/sklearn/ensemble/_forest.py:359, in BaseForest.fit(self, X, y, sample_weight)
356 if issparse(y):
357 raise ValueError("sparse multilabel-indicator for y is not supported.")
--> 359 X, y = validate_data(
360 self,
361 X,
362 y,
363 multi_output=True,
364 accept_sparse="csc",
365 dtype=DTYPE,
366 ensure_all_finite=False,
367 )
368 # _compute_missing_values_in_feature_mask checks if X has missing values and
369 # will raise an error if the underlying tree base estimator can't handle missing
370 # values. Only the criterion is required to determine if the tree supports
371 # missing values.
372 estimator = type(self.estimator)(criterion=self.criterion)
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/sklearn/utils/validation.py:2971, in validate_data(_estimator, X, y, reset, validate_separately, skip_check_array, **check_params)
2969 y = check_array(y, input_name="y", **check_y_params)
2970 else:
-> 2971 X, y = check_X_y(X, y, **check_params)
2972 out = X, y
2974 if not no_val_X and check_params.get("ensure_2d", True):
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/sklearn/utils/validation.py:1368, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1362 raise ValueError(
1363 f"{estimator_name} requires y to be passed, but the target y is None"
1364 )
1366 ensure_all_finite = _deprecate_force_all_finite(force_all_finite, ensure_all_finite)
-> 1368 X = check_array(
1369 X,
1370 accept_sparse=accept_sparse,
1371 accept_large_sparse=accept_large_sparse,
1372 dtype=dtype,
1373 order=order,
1374 copy=copy,
1375 force_writeable=force_writeable,
1376 ensure_all_finite=ensure_all_finite,
1377 ensure_2d=ensure_2d,
1378 allow_nd=allow_nd,
1379 ensure_min_samples=ensure_min_samples,
1380 ensure_min_features=ensure_min_features,
1381 estimator=estimator,
1382 input_name="X",
1383 )
1385 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
1387 check_consistent_length(X, y)
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/sklearn/utils/validation.py:971, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
966 if pandas_requires_conversion:
967 # pandas dataframe requires conversion earlier to handle extension dtypes with
968 # nans
969 # Use the original dtype for conversion if dtype is None
970 new_dtype = dtype_orig if dtype is None else dtype
--> 971 array = array.astype(new_dtype)
972 # Since we converted here, we do not need to convert again later
973 dtype = None
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/generic.py:6662, in NDFrame.astype(self, dtype, copy, errors)
6656 results = [
6657 ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items()
6658 ]
6660 else:
6661 # else, only a single dtype is given
-> 6662 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
6663 res = self._constructor_from_mgr(new_data, axes=new_data.axes)
6664 return res.__finalize__(self, method="astype")
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/internals/managers.py:430, in BaseBlockManager.astype(self, dtype, copy, errors)
427 elif using_copy_on_write():
428 copy = False
--> 430 return self.apply(
431 "astype",
432 dtype=dtype,
433 copy=copy,
434 errors=errors,
435 using_cow=using_copy_on_write(),
436 )
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/internals/managers.py:363, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
361 applied = b.apply(f, **kwargs)
362 else:
--> 363 applied = getattr(b, f)(**kwargs)
364 result_blocks = extend_blocks(applied, result_blocks)
366 out = type(self).from_blocks(result_blocks, self.axes)
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/internals/blocks.py:784, in Block.astype(self, dtype, copy, errors, using_cow, squeeze)
781 raise ValueError("Can not squeeze with more than one column.")
782 values = values[0, :] # type: ignore[call-overload]
--> 784 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
786 new_values = maybe_coerce_values(new_values)
788 refs = None
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/dtypes/astype.py:237, in astype_array_safe(values, dtype, copy, errors)
234 dtype = dtype.numpy_dtype
236 try:
--> 237 new_values = astype_array(values, dtype, copy=copy)
238 except (ValueError, TypeError):
239 # e.g. _astype_nansafe can fail on object-dtype of strings
240 # trying to convert to float
241 if errors == "ignore":
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/dtypes/astype.py:179, in astype_array(values, dtype, copy)
175 return values
177 if not isinstance(values, np.ndarray):
178 # i.e. ExtensionArray
--> 179 values = values.astype(dtype, copy=copy)
181 else:
182 values = _astype_nansafe(values, dtype, copy=copy)
File ~/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/pandas/core/arrays/categorical.py:603, in Categorical.astype(self, dtype, copy)
598 except (
599 TypeError, # downstream error msg for CategoricalIndex is misleading
600 ValueError,
601 ):
602 msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
--> 603 raise ValueError(msg)
605 result = take_nd(
606 new_cats, ensure_platform_int(self._codes), fill_value=fill_value
607 )
609 return result
ValueError: Cannot cast object dtype to float32
Versions
System:
python: 3.13.3 (main, Aug 14 2025, 11:53:40) [GCC 14.2.0]
executable: /home/khteh/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/bin/python
machine: Linux-6.14.0-28-generic-x86_64-with-glibc2.41
Python dependencies:
sklearn: 1.7.1
pip: 25.0
setuptools: 80.9.0
numpy: 2.3.2
scipy: 1.16.1
Cython: None
pandas: 2.3.1
matplotlib: 3.10.5
joblib: 1.5.1
threadpoolctl: 3.6.0
Built with OpenMP: True
threadpoolctl info:
user_api: blas
internal_api: openblas
num_threads: 16
prefix: libscipy_openblas
filepath: /home/khteh/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/numpy.libs/libscipy_openblas64_-8fb3d286.so
version: 0.3.30
threading_layer: pthreads
architecture: SkylakeX
user_api: blas
internal_api: openblas
num_threads: 16
prefix: libscipy_openblas
filepath: /home/khteh/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/scipy.libs/libscipy_openblas-68440149.so
version: 0.3.28
threading_layer: pthreads
architecture: SkylakeX
user_api: openmp
internal_api: openmp
num_threads: 16
prefix: libgomp
filepath: /home/khteh/.local/share/virtualenvs/JupyterNotebooks-uVG1pv5y/lib/python3.13/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0
version: None
Metadata
Metadata
Assignees
Labels
No labels