|
|
@ -0,0 +1,53 @@ |
|
|
|
# Load the dataset |
|
|
|
cat("Task 1: Load Dataset\n") |
|
|
|
data <- read.csv('WestRoxbury.csv', header = TRUE) # Enable Heading |
|
|
|
|
|
|
|
# Task 2: Extract the row numbers of the dataset |
|
|
|
row_numbers <- row.names(data) |
|
|
|
cat("Task 2: Row numbers extracted.\n\n") |
|
|
|
|
|
|
|
# Task 3: Partition row numbers into subsets |
|
|
|
set.seed(123) # For reproducibility |
|
|
|
row_numbers_shuffled <- sample(row_numbers) # Shuffle row numbers |
|
|
|
|
|
|
|
# Number of subsets (10 subsets) |
|
|
|
num_subsets <- 10 |
|
|
|
|
|
|
|
# Calculate the subset size and the remainder |
|
|
|
subset_size <- floor(length(row_numbers_shuffled) / num_subsets) |
|
|
|
cat("Subset size default is: ", subset_size, "\n") |
|
|
|
remainder <- length(row_numbers_shuffled) %% num_subsets |
|
|
|
cat("Remainder is: ", remainder, "\n") |
|
|
|
|
|
|
|
# Create a list of partitioned row numbers |
|
|
|
# the subnet stop at near ending for remainder rows, reserve final subset for adding remainder |
|
|
|
partitioned_row_numbers <- list() |
|
|
|
start_index <- 1 |
|
|
|
for (i in 1:(num_subsets - 1)) { # For the first (num_subsets - 1) subsets. Reserve final subset. |
|
|
|
end_index <- start_index + subset_size - 1 |
|
|
|
partitioned_row_numbers[[i]] <- row_numbers_shuffled[start_index:end_index] # load row to subset |
|
|
|
start_index <- end_index + 1 # bump up index for next subset in loop. |
|
|
|
} |
|
|
|
# end_index will be recorded for final run on new subset |
|
|
|
# Add the remainder rows to the last subset (subset num_subsets) |
|
|
|
partitioned_row_numbers[[num_subsets]] <- row_numbers_shuffled[start_index:(start_index + subset_size + remainder - 1)] |
|
|
|
|
|
|
|
cat("Task 3: Row numbers partitioned into 10 subsets with the remainder in subset 10.\n") |
|
|
|
|
|
|
|
# Task 4: Partition the dataset into 10 subsets using the partitioned row numbers |
|
|
|
partitioned_data <- list() |
|
|
|
for (i in 1:num_subsets) { |
|
|
|
# Check if partitioned_row_numbers[[i]] exists and is not empty |
|
|
|
if (length(partitioned_row_numbers[[i]]) > 0) { |
|
|
|
partitioned_data[[i]] <- data[partitioned_row_numbers[[i]], ] |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
cat("Task 4: Dataset partitioned into 10 subsets.\n\n") |
|
|
|
|
|
|
|
# Task 5: Display the number of observations in each subset |
|
|
|
for (i in 1:num_subsets) { |
|
|
|
cat("Number of observations in subset", i, "=", nrow(partitioned_data[[i]]), "\n") |
|
|
|
} |
|
|
|
|
|
|
|
|