openpyxl is a Python library for reading from and writing to Excel 2010+ (.xlsx) files. This comprehensive guide covers everything from basic operations to advanced features.
Installation
Install openpyxl using pip:
pip install openpyxl
Basic Operations Using openpyxl
Creating a New Workbook
from openpyxl import Workbook
# Create a new workbook
wb = Workbook()
# Get the active sheet
sheet = wb.active
# Write values to cells
sheet['A1'] = 'Hello'
sheet['B1'] = 'World'
# Save the
workbook
wb.save('example.xlsx')
This code snippet creates a new workbook, writes values to cells A1 and B1, and saves it as 'example.xlsx'.
Reading an Existing Workbook
from openpyxl
import load_workbook
# Load the workbook
wb = load_workbook('example.xlsx')
# Get sheet names
print(wb.sheetnames)
# Select a specific sheet
sheet = wb['Sheet1']
# Read cell value
value = sheet['A1'].value
print(value)
# Read multiple cells
cell_range = sheet['A1:B2']
for row in cell_range:
for cell in row:
print(cell.value)
This code demonstrates how to load an existing workbook, access sheets, and read cell values.
Working with Cells and Ranges
Cell Access Methods
# Different ways to access cells
sheet['A1'] = 'Method 1'
sheet.cell(row=1, column=1, value='Method 2')
# Accessing cell values
value1 = sheet['A1'].value
value2 = sheet.cell(row=1, column=1).value
The different access methods each have their advantages:
- Direct assignment (A1 style) is more readable and familiar to Excel users
- Row/column notation is better for programmatic access and loops
- Both methods provide access to the same underlying cell object
Working with Ranges
# Iterating through a range
for row in sheet['A1:C3']:
for cell in row:
print(cell.value)
# Using cell ranges
cell_range = sheet['A1:C3']
tuple(sheet['A1:C3'])
# Convert range to tuple
# Working with rows and columns
for row in sheet.iter_rows(min_row=1, max_row=3, min_col=1, max_col=3):
for cell in row:
print(cell.value)
Range operations offer several advantages:
- More efficient than accessing individual cells
- Allow for bulk operations on data
- Support both row-wise and column-wise iteration
- Can be used with list comprehensions for data extraction
Sheet Operations
Managing Sheets
# Create a new sheet
wb.create_sheet("NewSheet")
# Change sheet title
sheet.title = "Modified Sheet"
# Copy a sheet
source = wb['Sheet1']
wb.copy_worksheet(source)
# Remove a sheet
wb.remove(wb['Sheet to Remove'])
Sheet Properties
# Set sheet tab color
sheet.sheet_properties.tabColor = "1072BA"
# Freeze panes
sheet.freeze_panes = 'B2'
# Set column width
sheet.column_dimensions['A'].width = 20
# Set row height
sheet.row_dimensions[1].height = 30
Cell Formatting
Basic Formatting
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
# Create font
font = Font(name='Arial',
size=12,
bold=True,
italic=True,
color='FF0000')
# Apply font to cell
cell = sheet['A1']
cell.font = font
# Background color
cell.fill = PatternFill(start_color='FFFF00',
end_color='FFFF00',
fill_type='solid')
# Alignment
cell.alignment = Alignment(horizontal='center',
vertical='center')
# Borders
border = Border(left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin'))
cell.border = border
Number Formats
# Date format
cell.number_format = 'DD/MM/YYYY'
# Currency format
cell.number_format = '$#,##0.00'
# Percentage format
cell.number_format = '0.00%'
Data Validation and Formulas
Adding Data Validation
from openpyxl.worksheet.datavalidation import DataValidation
# Create validation for whole numbers
dv = DataValidation(type="whole",
operator="between",
formula1=1,
formula2=100)
dv.error = "Please enter a number between 1 and 100"
dv.errorTitle = "Invalid Entry"
# Add validation to worksheet
sheet.add_data_validation(dv)
dv.add('A1:A100')
Working with Formulas
# Add simple formula
sheet['D1'] = '=SUM(A1:C1)'
# Add more complex formula
sheet['E1'] = '=IF(D1>100,"High","Low")'
Advanced Features
Merge and Unmerge Cells
# Merge cells
sheet.merge_cells('A1:D1')
# Unmerge cells
sheet.unmerge_cells('A1:D1')
Working with Images
from openpyxl.drawing.image import Image
# Add image
img = Image('logo.png')
sheet.add_image(img, 'A1')
Charts
from openpyxl.chart import BarChart, Reference
# Create chart
chart = BarChart()
data = Reference(sheet, min_col=1, min_row=1, max_row=10, max_col=1)
chart.add_data(data)
# Add chart to sheet
sheet.add_chart(chart, "E1")
Best Practices and Performance Tips
Memory Optimization
# For large files, use read-only
mode
wb = load_workbook(filename='large_file.xlsx', read_only=True)
# For writing large files, use write-only
mode
wb = Workbook(write_only=True)
Error Handling
try:
wb = load_workbook('example.xlsx')
except FileNotFoundError:
print("File not found")
except Exception as e:
print(f"An error occurred: {e}")
Saving with Backup
from datetime import datetime
def save_with_backup(workbook, filename):
"""Save workbook with timestamp backup"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{filename.split('.')[0]}_{timestamp}.xlsx"
workbook.save(backup_name)
Common Patterns and Examples
Copying Data Between Sheets
def copy_sheet_data(source_sheet, target_sheet):
"""Copy data from one sheet to another"""
for row in source_sheet.rows:
for cell in row:
target_sheet[cell.coordinate].value = cell.value
target_sheet[cell.coordinate].style = cell.style
Conditional Formatting
from openpyxl.formatting.rule import ColorScaleRule
# Add conditional formatting
color_scale = ColorScaleRule(start_color='FFEB84',
end_color='FF7128',
start_type='percentile',
start_value=0,
end_type='percentile',
end_value=100)
sheet.conditional_formatting.add('A1:A100', color_scale)
There is no doubt that openpyxl provides powerful capabilities for Excel automation in Python. By understanding its features and following best practices, you can create efficient and maintainable Excel automation scripts.