This module provides an extended learning path for those who want to dive deeper into Python programming for data science. It's designed to complement the basic Python refresher with more advanced concepts and practical applications.
The collections module provides specialized container datatypes that go beyond the built-in types:
from collections import Counter, defaultdict, namedtuple, deque
# Counter: count occurrences of elements
words = ["apple", "banana", "apple", "orange", "banana", "apple"]
word_count = Counter(words)
print(word_count) # Counter({'apple': 3, 'banana': 2, 'orange': 1})
print(word_count.most_common(2)) # [('apple', 3), ('banana', 2)]
# defaultdict: dictionary with default values for missing keys
word_categories = defaultdict(list)
word_categories["fruit"].append("apple") # No KeyError even though "fruit" wasn't initialized
print(word_categories) # defaultdict(<class 'list'>, {'fruit': ['apple']})
# namedtuple: create simple classes with named fields
Person = namedtuple("Person", ["name", "age", "job"])
alice = Person("Alice", 30, "Data Scientist")
print(alice.name) # "Alice"
print(alice.job) # "Data Scientist"
# deque: double-ended queue with fast appends and pops from both ends
queue = deque(["task1", "task2", "task3"])
queue.append("task4") # Add to right end
queue.appendleft("task0") # Add to left end
print(queue) # deque(['task0', 'task1', 'task2', 'task3', 'task4'])
print(queue.popleft()) # "task0" (remove and return from left end)
# Dictionary merging (Python 3.9+)
dict1 = {"a": 1, "b": 2}
dict2 = {"b": 3, "c": 4}
merged = dict1 | dict2 # {"a": 1, "b": 3, "c": 4} (note: b takes value from dict2)
# Dictionary comprehensions with conditions
squares_of_evens = {x: x**2 for x in range(10) if x % 2 == 0}
print(squares_of_evens) # {0: 0, 2: 4, 4: 16, 6: 36, 8: 64}
# Nested dictionaries for complex data
employees = {
"Alice": {
"department": "Data Science",
"skills": ["Python", "ML", "Statistics"],
"projects": {"completed": 5, "ongoing": 2}
},
"Bob": {
"department": "Engineering",
"skills": ["Python", "SQL", "DevOps"],
"projects": {"completed": 3, "ongoing": 1}
}
}
print(employees["Alice"]["skills"][1]) # "ML"
print(employees["Bob"]["projects"]["completed"]) # 3
# Nested list comprehensions
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flattened = [num for row in matrix for num in row]
print(flattened) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# Transpose a matrix using list comprehension
transposed = [[row[i] for row in matrix] for i in range(len(matrix[0]))]
print(transposed) # [[1, 4, 7], [2, 5, 8], [3, 6, 9]]
# List operations with zip and enumerate
names = ["Alice", "Bob", "Charlie"]
ages = [30, 25, 35]
# Combine two lists into list of tuples
for name, age in zip(names, ages):
print(f"{name} is {age} years old")
# Get index and value
for i, name in enumerate(names):
print(f"Index {i}: {name}")
# Basic class definition
class DataPoint:
def __init__(self, x, y, label=None):
self.x = x
self.y = y
self.label = label
def distance_from_origin(self):
return (self.x ** 2 + self.y ** 2) ** 0.5
def __str__(self):
return f"DataPoint({self.x}, {self.y}, {self.label})"
# Creating objects
point1 = DataPoint(3, 4, "Example")
point2 = DataPoint(1, 2)
print(point1.distance_from_origin()) # 5.0
print(point2) # "DataPoint(1, 2, None)"