Design and implement a spreadsheet system called OpenSheet that supports cell values, formulas, and automatic dependency resolution. The system must handle cell references, evaluate formulas, and detect circular dependencies. This problem tests your ability to build a dependency graph, perform topological evaluation, and optimize for different performance requirements.
Implement a Spreadsheet class that supports setting values/formulas and getting computed values with real-time dependency resolution.
Spreadsheet() - Initializes the spreadsheetsetCell(key: str, value: str) - Sets a cell to a value or formula
"42", "3.14")= and can reference other cells (e.g., "=A1 + A2")+, -, *, /A1, B2, Z99)getCell(key: str) -> float - Returns the computed value of a cell
0 or None for non-existent cells (implementation choice)spreadsheet = Spreadsheet()
# Set simple values
spreadsheet.setCell('A1', '1')
spreadsheet.setCell('A2', '2')
# Set formulas with dependencies
spreadsheet.setCell('A3', '=A1 + A2') # A3 = 1 + 2 = 3
spreadsheet.setCell('A4', '=A3 + A2') # A4 = 3 + 2 = 5
spreadsheet.setCell('A5', '=A3 + A4') # A5 = 3 + 5 = 8
# Get values (triggers recursive evaluation)
print(spreadsheet.getCell('A3')) # Output: 3.0
print(spreadsheet.getCell('A4')) # Output: 5.0
print(spreadsheet.getCell('A5')) # Output: 8.0
# Update a value - dependent cells recompute on next access
spreadsheet.setCell('A1', '10')
print(spreadsheet.getCell('A3')) # Output: 12.0 (10 + 2)
print(spreadsheet.getCell('A5')) # Output: 26.0 (12 + 14)
# Non-existent cell
print(spreadsheet.getCell('Z99')) # Output: 0 or None
This implementation has O(N) getCell() complexity, where N is the number of dependencies. [Source: darkinterview.com]
import re
from typing import Dict, Set
class Spreadsheet:
def __init__(self):
# Store raw cell values/formulas
self.cell_values: Dict[str, str] = {}
def setCell(self, key: str, value: str) -> None:
"""
Store cell value or formula.
Time: O(1)
Space: O(1)
"""
self.cell_values[key] = value
def getCell(self, key: str) -> float:
"""
Get computed value, recursively evaluating dependencies.
Time: O(N) where N = number of dependencies
Space: O(D) where D = max dependency depth (recursion stack)
"""
if key not in self.cell_values:
return 0 # or None
value = self.cell_values[key]
# If it's a simple number, return it
if not value.startswith('='):
return float(value)
# It's a formula - evaluate it
formula = value[1:] # Remove '=' prefix
# Find all cell references (e.g., A1, B2, AA10)
cell_refs = re.findall(r'[A-Z]+\d+', formula)
# Replace each cell reference with its computed value
evaluated_formula = formula
ref cell_refs:
ref_value = .getCell(ref)
evaluated_formula = re.sub( + ref + , (ref_value), evaluated_formula)
:
result = (evaluated_formula)
(result)
Exception e:
ValueError()
The basic implementation above will cause infinite recursion on circular references. Add cycle detection:
class Spreadsheet:
def __init__(self):
self.cell_values: Dict[str, str] = {}
def setCell(self, key: str, value: str) -> None:
self.cell_values[key] = value
def getCell(self, key: str) -> float:
"""Get cell value with circular dependency detection"""
visited = set()
return self._evaluate(key, visited)
def _evaluate(self, key: str, visited: Set[str]) -> float:
"""
Recursive evaluation with cycle detection.
visited: Set of cells currently being evaluated (call stack)
"""
# Check for circular dependency
if key in visited:
raise ValueError(f"Circular dependency detected involving cell {key}")
if key not in self.cell_values:
return 0
value = self.cell_values[key]
# Simple number
if not value.startswith('='):
return float(value)
# Add to visited set (entering this cell's evaluation)
visited.add(key)
:
formula = value[:]
cell_refs = re.findall(, formula)
evaluated_formula = formula
ref cell_refs:
ref_value = ._evaluate(ref, visited)
evaluated_formula = re.sub( + ref + , (ref_value), evaluated_formula)
result = (evaluated_formula)
(result)
:
visited.remove(key)
def test_circular_dependency():
"""Test detection of circular references"""
spreadsheet = Spreadsheet()
# A1 -> A2 -> A3 -> A1 (cycle)
spreadsheet.setCell('A1', '=A2 + 1')
spreadsheet.setCell('A2', '=A3 + 1')
spreadsheet.setCell('A3', '=A1 + 1')
try:
spreadsheet.getCell('A1')
assert False, "Should have raised circular dependency error"
except ValueError as e:
assert "Circular dependency" in str(e)
# Self-reference
spreadsheet.setCell('B1', '=B1 + 1')
try:
spreadsheet.getCell('B1')
assert False, "Should have raised circular dependency error"
except ValueError as e:
assert "Circular dependency" in str(e)
The interviewer asks: "How would you optimize this so getCell() is O(1) instead of O(N)?"
Instead of computing on every getCell(), maintain a cache of computed values and update them eagerly when dependencies change. [Source: darkinterview.com]
import re
from typing import Dict, Set
from collections import defaultdict, deque
class OptimizedSpreadsheet:
def __init__(self):
# Raw values/formulas
self.cell_values: Dict[str, str] = {}
# Computed numeric values (cache)
self.computed_values: Dict[str, float] = {}
# Dependency graph
self.dependencies: Dict[str, Set[str]] = defaultdict(set) # cell -> cells it depends on
self.dependents: Dict[str, Set[str]] = defaultdict(set) # cell -> cells that depend on it
def setCell(self, key: str, value: str) -> None:
"""
Set cell and eagerly update all dependent cells.
Time: O(D) where D = number of cells affected (topological order)
Space: O(D)
"""
# Store raw value
self.cell_values[key] = value
# Clear old dependencies
for dep in self.dependencies[key]:
self.dependents[dep].discard(key)
self.dependencies[key].clear()
value.startswith():
formula = value[:]
cell_refs = (re.findall(, formula))
ref cell_refs:
.dependencies[key].add(ref)
.dependents[ref].add(key)
._has_cycle(key):
dep .dependencies[key]:
.dependents[dep].discard(key)
.dependencies[key].clear()
key .cell_values:
.cell_values[key]
key .computed_values:
.computed_values[key]
ValueError()
._recompute_affected(key)
() -> :
key .computed_values:
.computed_values[key]
() -> :
visited = ()
rec_stack = ()
() -> :
cell rec_stack:
cell visited:
visited.add(cell)
rec_stack.add(cell)
dep .dependencies.get(cell, []):
dfs(dep):
rec_stack.remove(cell)
dfs(start)
() -> :
affected = ()
queue = deque([start])
queue:
cell = queue.popleft()
cell affected:
affected.add(cell)
dependent .dependents.get(cell, []):
queue.append(dependent)
in_degree = defaultdict()
cell affected:
dep .dependencies[cell]:
dep affected:
in_degree[cell] +=
queue = deque([cell cell affected in_degree[cell] == ])
queue:
cell = queue.popleft()
._compute_single_cell(cell)
dependent .dependents.get(cell, []):
dependent affected:
in_degree[dependent] -=
in_degree[dependent] == :
queue.append(dependent)
() -> :
key .cell_values:
.computed_values[key] =
value = .cell_values[key]
value.startswith():
.computed_values[key] = (value)
formula = value[:]
cell_refs = re.findall(, formula)
evaluated_formula = formula
ref cell_refs:
ref_value = .computed_values.get(ref, )
evaluated_formula = re.sub( + ref + , (ref_value), evaluated_formula)
:
result = (evaluated_formula)
.computed_values[key] = (result)
Exception e:
.computed_values[key] =
spreadsheet = OptimizedSpreadsheet()
# Set operations trigger cascade recomputation of dependents
spreadsheet.setCell('A1', '1')
spreadsheet.setCell('A2', '2')
spreadsheet.setCell('A3', '=A1 + A2')
spreadsheet.setCell('A4', '=A3 + A2')
spreadsheet.setCell('A5', '=A3 + A4')
# O(1) get operations (just cache lookup)
print(spreadsheet.getCell('A5')) # 8.0 - instant
# Update triggers cascade recomputation
spreadsheet.setCell('A1', '10')
# Get is still O(1)
print(spreadsheet.getCell('A5')) # 26.0 - instant
import pytest
def test_basic_operations():
"""Test basic set and get"""
s = OptimizedSpreadsheet()
s.setCell('A1', '5')
assert s.getCell('A1') == 5.0
s.setCell('A2', '=A1 + 3')
assert s.getCell('A2') == 8.0
s.setCell('A1', '10')
assert s.getCell('A2') == 13.0
def test_complex_dependencies():
"""Test multi-level dependencies"""
s = OptimizedSpreadsheet()
s.setCell('A1', '1')
s.setCell('A2', '2')
s.setCell('B1', '=A1 + A2')
s.setCell('B2', '=B1 * 2')
s.setCell('C1', '=B1 + B2')
assert s.getCell('C1') == 9.0 # (1+2) + (1+2)*2 = 3 + 6 = 9
s.setCell('A1', '5')
assert s.getCell('C1') == 21.0 # (5+2) + (5+2)*2 = 7 + 14 = 21
def test_circular_dependency_detection():
"""Test various circular dependency patterns"""
s = OptimizedSpreadsheet()
# Direct self-reference
with pytest.raises(ValueError, match="Circular dependency"):
s.setCell('A1', '=A1 + 1')
# Two-cell cycle
s.setCell('A1', )
pytest.raises(ValueError, =):
s.setCell(, )
s.setCell(, )
s.setCell(, )
pytest.raises(ValueError, =):
s.setCell(, )
():
s = OptimizedSpreadsheet()
s.setCell(, )
s.setCell(, )
s.setCell(, )
s.setCell(, )
s.setCell(, )
s.setCell(, )
s.getCell() ==
s.getCell() ==
s.getCell() ==
(s.getCell() - ) <
():
s = OptimizedSpreadsheet()
s.getCell() ==
s.setCell(, )
s.getCell() ==
():
s = OptimizedSpreadsheet()
s.setCell(, )
s.setCell(, )
s.setCell(, )
s.getCell() ==
s.setCell(, )
s.getCell() ==
The interviewer might ask about:
=SUM(A1:A10)?