Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 58 additions & 19 deletions python/python_calamine/_python_calamine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,15 @@ class CalamineSheet:
For suppress this behaviour, set `skip_empty_area` to `False`.
"""

def iter_rows(
self,
) -> typing.Iterator[
list[
int
| float
| str
| bool
| datetime.time
| datetime.date
| datetime.datetime
| datetime.timedelta
]
]:
"""Retunrning data from sheet as iterator of lists."""
def iter_rows(self) -> CalamineCellIterator:
"""Return data from sheet as an iterator."""

def iter_formulas(self) -> CalamineFormulaIterator:
"""Return formulas from sheet as an iterator.

Raises:
ValueError: If read_formulas=False when creating the workbook.
"""

@property
def merged_cell_ranges(
Expand All @@ -101,35 +95,79 @@ class CalamineSheet:
list of merged cell ranges (tuple[start coordinate, end coordinate]) or None for unsuported format
"""

@typing.final
class CalamineCellIterator:
"""Iterator for cell data in a CalamineSheet."""

position: int
start: tuple[int, int]
height: int
width: int

def __iter__(self) -> "CalamineCellIterator": ...
def __next__(
self,
) -> list[
int
| float
| str
| bool
| datetime.time
| datetime.date
| datetime.datetime
| datetime.timedelta
]: ...

@typing.final
class CalamineFormulaIterator:
"""Iterator for formula data in a CalamineSheet."""

position: int
start: tuple[int, int]
width: int
height: int

def __iter__(self) -> "CalamineFormulaIterator": ...
def __next__(self) -> list[str]: ...

@typing.final
class CalamineWorkbook(contextlib.AbstractContextManager):
path: str | None
sheet_names: list[str]
sheets_metadata: list[SheetMetadata]
Comment on lines 133 to 137
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Missing read_formulas property in type stub.

The CalamineWorkbook class exposes a read_formulas property (via #[pyo3(get)] in Rust), which is tested in test_formulas_read_flag_exposed, but it's not declared in the type stub.

Proposed fix
 `@typing.final`
 class CalamineWorkbook(contextlib.AbstractContextManager):
     path: str | None
     sheet_names: list[str]
     sheets_metadata: list[SheetMetadata]
+    read_formulas: bool
     `@classmethod`
     def from_object(
🤖 Prompt for AI Agents
In `@python/python_calamine/_python_calamine.pyi` around lines 133 - 137, The type
stub for class CalamineWorkbook is missing the read_formulas property; add a
read-only boolean attribute declaration to the CalamineWorkbook class (e.g., add
"read_formulas: bool") so the stub matches the Rust #[pyo3(get)]-exposed
property and satisfies the test that checks test_formulas_read_flag_exposed.

@classmethod
def from_object(
cls, path_or_filelike: str | os.PathLike | ReadBuffer
cls,
path_or_filelike: str | os.PathLike | ReadBuffer,
read_formulas: bool = False,
) -> "CalamineWorkbook":
"""Determining type of pyobject and reading from it.

Args:
path_or_filelike (str | os.PathLike | ReadBuffer): path to file or IO (must imlpement read/seek methods).
read_formulas (bool): Enable formula reading support. Default is False.
"""

@classmethod
def from_path(cls, path: str | os.PathLike) -> "CalamineWorkbook":
def from_path(
cls, path: str | os.PathLike, read_formulas: bool = False
) -> "CalamineWorkbook":
"""Reading file from path.

Args:
path (str | os.PathLike): path to file.
read_formulas (bool): Enable formula reading support. Default is False.
"""

@classmethod
def from_filelike(cls, filelike: ReadBuffer) -> "CalamineWorkbook":
def from_filelike(
cls, filelike: ReadBuffer, read_formulas: bool = False
) -> "CalamineWorkbook":
"""Reading file from IO.

Args:
filelike : IO (must imlpement read/seek methods).
read_formulas (bool): Enable formula reading support. Default is False.
"""

def close(self) -> None:
Expand Down Expand Up @@ -185,10 +223,11 @@ class ZipError(CalamineError): ...
class WorkbookClosed(CalamineError): ...

def load_workbook(
path_or_filelike: str | os.PathLike | ReadBuffer,
path_or_filelike: str | os.PathLike | ReadBuffer, read_formulas: bool = False
) -> CalamineWorkbook:
"""Determining type of pyobject and reading from it.

Args:
path_or_filelike (str | os.PathLike | ReadBuffer): path to file or IO (must imlpement read/seek methods).
read_formulas (bool): Enable formula reading support. Default is False.
"""
15 changes: 11 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,27 @@ use pyo3::prelude::*;
mod types;
mod utils;
use crate::types::{
CalamineError, CalamineSheet, CalamineWorkbook, CellValue, Error, PasswordError, SheetMetadata,
SheetTypeEnum, SheetVisibleEnum, WorkbookClosed, WorksheetNotFound, XmlError, ZipError,
CalamineError, CalamineFormulaIterator, CalamineSheet, CalamineWorkbook, CellValue, Error,
PasswordError, SheetMetadata, SheetTypeEnum, SheetVisibleEnum, WorkbookClosed,
WorksheetNotFound, XmlError, ZipError,
};

#[pyfunction]
fn load_workbook(py: Python, path_or_filelike: Py<PyAny>) -> PyResult<CalamineWorkbook> {
CalamineWorkbook::from_object(py, path_or_filelike)
#[pyo3(signature = (path_or_filelike, read_formulas=false))]
fn load_workbook(
py: Python,
path_or_filelike: Py<PyAny>,
read_formulas: bool,
) -> PyResult<CalamineWorkbook> {
CalamineWorkbook::from_object(py, path_or_filelike, read_formulas)
}

#[pymodule(gil_used = false)]
fn _python_calamine(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(load_workbook, m)?)?;
m.add_class::<CalamineWorkbook>()?;
m.add_class::<CalamineSheet>()?;
m.add_class::<CalamineFormulaIterator>()?;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add CalamineCellIterator here?

m.add_class::<SheetMetadata>()?;
m.add_class::<SheetTypeEnum>()?;
m.add_class::<SheetVisibleEnum>()?;
Expand Down
4 changes: 3 additions & 1 deletion src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ pub use cell::CellValue;
pub use errors::{
CalamineError, Error, PasswordError, WorkbookClosed, WorksheetNotFound, XmlError, ZipError,
};
pub use sheet::{CalamineSheet, SheetMetadata, SheetTypeEnum, SheetVisibleEnum};
pub use sheet::{
CalamineFormulaIterator, CalamineSheet, SheetMetadata, SheetTypeEnum, SheetVisibleEnum,
};
pub use workbook::CalamineWorkbook;
109 changes: 108 additions & 1 deletion src/types/sheet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,18 +134,21 @@ pub struct CalamineSheet {
#[pyo3(get)]
name: String,
range: Arc<Range<Data>>,
formula_range: Option<Arc<Range<String>>>,
merged_cell_ranges: Option<Vec<Dimensions>>,
}

impl CalamineSheet {
pub fn new(
name: String,
range: Range<Data>,
formula_range: Option<Range<String>>,
merged_cell_ranges: Option<Vec<Dimensions>>,
) -> Self {
CalamineSheet {
name,
range: Arc::new(range),
formula_range: formula_range.map(Arc::new),
merged_cell_ranges,
}
}
Expand Down Expand Up @@ -224,6 +227,23 @@ impl CalamineSheet {
CalamineCellIterator::from_range(Arc::clone(&self.range))
}

fn iter_formulas(&self) -> PyResult<CalamineFormulaIterator> {
match &self.formula_range {
Some(formula_range) => {
let data_start = self.range.start().unwrap_or((0, 0));
let data_end = self.range.end().unwrap_or((0, 0));
Ok(CalamineFormulaIterator::from_range_with_data_bounds(
Arc::clone(formula_range),
data_start,
data_end
))
},
None => Err(pyo3::exceptions::PyValueError::new_err(
"Formula iteration is disabled. Use read_formulas=True when creating the workbook to enable formula access."
)),
}
}
Comment on lines +230 to +245
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Formula iterator row count can diverge from iter_rows for non‑zero starts or empty sheets.

iter_rows() emits leading empty rows when start.0 > 0, but CalamineFormulaIterator’s height is computed from data_end - data_start + 1, so it skips those rows. For empty ranges, unwrap_or((0,0)) produces a 1×1 empty row. This violates the “same dimensions” contract in those cases.

🐛 Suggested fix to align formula dimensions with iter_rows and empty ranges
-        let data_start = self.range.start().unwrap_or((0, 0));
-        let data_end = self.range.end().unwrap_or((0, 0));
+        let data_start = self.range.start();
+        let data_end = self.range.end();
         Ok(CalamineFormulaIterator::from_range_with_data_bounds(
             Arc::clone(formula_range),
             data_start,
             data_end
         ))
-    fn from_range_with_data_bounds(
-        range: Arc<Range<String>>,
-        data_start: (u32, u32),
-        data_end: (u32, u32),
-    ) -> CalamineFormulaIterator {
-        let width = if data_start <= data_end {
-            (data_end.1 - data_start.1 + 1) as usize
-        } else {
-            0
-        };
-
-        let height = if data_start <= data_end {
-            (data_end.0 - data_start.0 + 1) as usize
-        } else {
-            0
-        };
-
-        CalamineFormulaIterator {
-            position: 0,
-            start: data_start,
-            range,
-            width,
-            height,
-        }
-    }
+    fn from_range_with_data_bounds(
+        range: Arc<Range<String>>,
+        data_start: Option<(u32, u32)>,
+        data_end: Option<(u32, u32)>,
+    ) -> CalamineFormulaIterator {
+        let (start, width, height) = match (data_start, data_end) {
+            (Some(start), Some(end)) if start <= end => {
+                let width = (end.1 - start.1 + 1) as usize;
+                // include leading empty rows to match iter_rows
+                let height = (end.0 + 1) as usize;
+                (start, width, height)
+            }
+            _ => ((0, 0), 0, 0),
+        };
+
+        CalamineFormulaIterator {
+            position: 0,
+            start,
+            range,
+            width,
+            height,
+        }
+    }
-        let current_row = slf.start.0 + slf.position;
-        slf.position += 1;
+        let current_row = slf.position;
+        slf.position += 1;
 
         // Create the result row with proper width, filled with empty strings
         let mut result_row = vec!["".to_string(); slf.width];
 
-        // Fill in formulas for this row by checking each column position
-        for (col_idx, result_cell) in result_row.iter_mut().enumerate() {
-            let current_col = slf.start.1 + col_idx as u32;
-            if let Some(formula) = slf.range.get_value((current_row, current_col)) {
-                if !formula.is_empty() {
-                    *result_cell = formula.clone();
-                }
-            }
-        }
+        // Fill in formulas only once we are within the data start row
+        if current_row >= slf.start.0 {
+            for (col_idx, result_cell) in result_row.iter_mut().enumerate() {
+                let current_col = slf.start.1 + col_idx as u32;
+                if let Some(formula) = slf.range.get_value((current_row, current_col)) {
+                    if !formula.is_empty() {
+                        *result_cell = formula.clone();
+                    }
+                }
+            }
+        }

Also applies to: 331-387

🤖 Prompt for AI Agents
In `@src/types/sheet.rs` around lines 230 - 245, iter_formulas currently uses
unwrap_or((0,0)) for range bounds which causes a 1×1 empty row for empty ranges
and drops leading empty rows when start.0 > 0; fix by computing bounds like
iter_rows: let data_start = self.range.start().unwrap_or((0,0)); if
self.range.is_empty() set data_end to a value that yields zero height (e.g. make
data_end row/col < data_start so height/width compute to 0), otherwise use
self.range.end().unwrap(); keep data_start unchanged so leading empty rows (when
start.0 > 0) are preserved, then call
CalamineFormulaIterator::from_range_with_data_bounds with those adjusted
data_start/data_end values. Ensure you use self.range.is_empty(),
self.range.start(), self.range.end(), iter_formulas, and
CalamineFormulaIterator::from_range_with_data_bounds to locate and implement the
change.


#[getter]
fn merged_cell_ranges(slf: PyRef<'_, Self>) -> Option<Vec<MergedCellRange>> {
slf.merged_cell_ranges
Expand All @@ -234,7 +254,9 @@ impl CalamineSheet {

#[pyclass]
pub struct CalamineCellIterator {
#[pyo3(get)]
position: u32,
#[pyo3(get)]
start: (u32, u32),
empty_row: Vec<CellValue>,
iter: Rows<'static, Data>,
Expand All @@ -250,7 +272,7 @@ impl CalamineCellIterator {
CalamineCellIterator {
empty_row,
position: 0,
start: range.start().unwrap(),
start: range.start().unwrap_or((0, 0)),
iter: unsafe {
std::mem::transmute::<
calamine::Rows<'_, calamine::Data>,
Expand Down Expand Up @@ -279,4 +301,89 @@ impl CalamineCellIterator {
Some(PyList::new(slf.py(), slf.empty_row.clone())).transpose()
}
}

#[getter]
fn height(&self) -> usize {
self.range.height()
}
#[getter]
fn width(&self) -> usize {
self.range.width()
}
}

#[pyclass]
pub struct CalamineFormulaIterator {
#[pyo3(get)]
position: u32,
#[pyo3(get)]
start: (u32, u32),
#[allow(dead_code)]
range: Arc<Range<String>>,
// Data range dimensions for coordinate mapping
#[pyo3(get)]
width: usize,
#[pyo3(get)]
height: usize,
}

impl CalamineFormulaIterator {
fn from_range_with_data_bounds(
range: Arc<Range<String>>,
data_start: (u32, u32),
data_end: (u32, u32),
) -> CalamineFormulaIterator {
let width = if data_start <= data_end {
(data_end.1 - data_start.1 + 1) as usize
} else {
0
};

let height = if data_start <= data_end {
(data_end.0 - data_start.0 + 1) as usize
} else {
0
};

CalamineFormulaIterator {
position: 0,
start: data_start,
range,
width,
height,
}
}
}

#[pymethods]
impl CalamineFormulaIterator {
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}

fn __next__(mut slf: PyRefMut<'_, Self>) -> PyResult<Option<Bound<'_, PyList>>> {
// Check if we've exceeded the data range height
if slf.position >= slf.height as u32 {
return Ok(None);
}

// Calculate the current absolute row position
let current_row = slf.start.0 + slf.position;
slf.position += 1;

// Create the result row with proper width, filled with empty strings
let mut result_row = vec!["".to_string(); slf.width];

// Fill in formulas for this row by checking each column position
for (col_idx, result_cell) in result_row.iter_mut().enumerate() {
let current_col = slf.start.1 + col_idx as u32;
if let Some(formula) = slf.range.get_value((current_row, current_col)) {
if !formula.is_empty() {
*result_cell = formula.clone();
}
}
}

Some(PyList::new(slf.py(), result_row)).transpose()
}
}
Loading