diff --git a/assess_learners/BagLearner.py b/assess_learners/BagLearner.py new file mode 100644 index 0000000..6673ab2 --- /dev/null +++ b/assess_learners/BagLearner.py @@ -0,0 +1,45 @@ +import numpy as np + + +class BagLearner(object): + def __init__(self, learner, bags=20, boost=False, verbose=False, **kwargs): + self.learner = learner + self.bags = bags + self.boost = boost + self.verbose = verbose + self.kwargs = kwargs + self.learners = [learner(**kwargs) for _ in range(bags)] + + def author(self): + return 'felixm' # replace tb34 with your Georgia Tech username + + def get_bag(self, data_x, data_y): + num_items = int(data_x.shape[0] * 0.5) # 50% of samples + bag_x, bag_y = [], [] + for _ in range(num_items): + i = np.random.randint(0, data_x.shape[0]) + bag_x.append(data_x[i,:]) + bag_y.append(data_y[i]) + return np.array(bag_x), np.array(bag_y) + + + def addEvidence(self, data_x, data_y): + """ + @summary: Add training data to learner + @param dataX: X values of data to add + @param dataY: the Y training values + """ + for learner in self.learners: + x, y = self.get_bag(data_x, data_y) + learner.addEvidence(x, y) + + def query(self, points): + """ + @summary: Estimate a set of test points given the model we built. + @param points: should be a numpy array with each row corresponding to a specific query. + @returns the estimated values according to the saved model. + """ + return np.mean([l.query(points) for l in self.learners], axis=0) + +if __name__=="__main__": + print("the secret clue is 'zzyzx'") diff --git a/assess_learners/DTLearner.py b/assess_learners/DTLearner.py index 11f5b54..ef798f2 100644 --- a/assess_learners/DTLearner.py +++ b/assess_learners/DTLearner.py @@ -36,20 +36,13 @@ class DTLearner(object): i_max = i return i_max - def make_tree_absolute(self, tree): - for i in range(tree.shape[0]): - if tree[i, 2] == self.NA: - continue - tree[i, 2] = i + tree[i, 2] - tree[i, 3] = i + tree[i, 3] - return tree - def build_tree(self, xs, y): assert(xs.shape[0] == y.shape[0]) assert(xs.shape[0] > 0) # If this is 0 something went wrong. - if xs.shape[0] == 1: - return self.create_node(self.LEAF, y[0], self.NA, self.NA) + if xs.shape[0] <= self.leaf_size: + value = np.median(y) + return self.create_node(self.LEAF, value, self.NA, self.NA) if np.all(y[0] == y): return self.create_node(self.LEAF, y[0], self.NA, self.NA) diff --git a/assess_learners/InsaneLearner.py b/assess_learners/InsaneLearner.py new file mode 100644 index 0000000..28cf876 --- /dev/null +++ b/assess_learners/InsaneLearner.py @@ -0,0 +1,28 @@ +import BagLearner as bgl +import LinRegLearner as lrl + +class InsaneLearner(object): + def __init__(self, verbose=False): + def bag_learner(): + return bgl.BagLearner(lrl.LinRegLearner, bags=20, verbose=verbose) + self.learner = bgl.BagLearner(bag_learner, bags=20, verbose=verbose) + + def author(self): + return 'felixm' # replace tb34 with your Georgia Tech username + + def addEvidence(self, data_x, data_y): + """ + @summary: Add training data to learner + @param dataX: X values of data to add + @param dataY: the Y training values + """ + self.learner.addEvidence(data_x, data_y) + + def query(self, points): + """ + @summary: Estimate a set of test points given the model we built. + @param points: should be a numpy array with each row corresponding to a specific query. + @returns the estimated values according to the saved model. + """ + return self.learner.query(points) + diff --git a/assess_learners/RTLearner.py b/assess_learners/RTLearner.py new file mode 100644 index 0000000..a2dc02f --- /dev/null +++ b/assess_learners/RTLearner.py @@ -0,0 +1,83 @@ +import numpy as np + + +class RTLearner(object): + + LEAF = -1 + NA = -1 + + def __init__(self, leaf_size = 1, verbose = False): + self.leaf_size = leaf_size + self.verbose = verbose + + def author(self): + return 'felixm' # replace tb34 with your Georgia Tech username + + def create_node(self, factor, split_value, left, right): + return np.array([[factor, split_value, left, right], ]) + + def build_tree(self, xs, y): + assert(xs.shape[0] == y.shape[0]) + assert(xs.shape[0] > 0) # If this is 0 something went wrong. + + if xs.shape[0] <= self.leaf_size: + value = np.median(y) + return self.create_node(self.LEAF, value, self.NA, self.NA) + + if np.all(y[0] == y): + return self.create_node(self.LEAF, y[0], self.NA, self.NA) + + i = np.random.randint(0, xs.shape[1]) + # If we pick an i for which all x are the same, try again. + while np.all(xs[0,i] == xs[:,i]): + i = np.random.randint(0, xs.shape[1]) + + r1, r2 = np.random.randint(0, xs.shape[0], size = 2) + split_value = (xs[r1, i] + xs[r2, i]) / 2.0 + + select_lt = xs[:, i] <= split_value + select_rt = xs[:, i] > split_value + # Avoid case where all values are low or equal to the median. + if select_lt.all() or select_rt.all(): + select_lt = xs[:, i] < split_value + select_rt = xs[:, i] >= split_value + + lt = self.build_tree(xs[select_lt], y[select_lt]) + rt = self.build_tree(xs[select_rt], y[select_rt]) + root = self.create_node(i, split_value, 1, rt.shape[0] + 1) + + root = np.concatenate([root, lt, rt]) + return root + + def addEvidence(self, data_x, data_y): + """ + @summary: Add training data to learner + @param dataX: X values of data to add + @param dataY: the Y training values + """ + self.rel_tree = self.build_tree(data_x, data_y) + + def query_point(self, point): + node_index = 0 + while self.rel_tree[node_index, 0] != self.LEAF: + node = self.rel_tree[node_index] + split_factor = int(node[0]) + split_value = node[1] + if point[split_factor] <= split_value: + node_index += int(node[2]) + else: + node_index += int(node[3]) + return self.rel_tree[node_index, 1] + + def query(self, points): + """ + @summary: Estimate a set of test points given the model we built. + @param points: should be a numpy array with each row corresponding to a specific query. + @returns the estimated values according to the saved model. + """ + query_point = lambda p: self.query_point(p) + r = np.apply_along_axis(query_point, 1, points) + return r + +if __name__=="__main__": + print("the secret clue is 'zzyzx'") diff --git a/assess_learners/comments.txt b/assess_learners/comments.txt new file mode 100644 index 0000000..c44c64b --- /dev/null +++ b/assess_learners/comments.txt @@ -0,0 +1,344 @@ +
--- Summary --- +Tests passed: 2 out of 18 + +--- Details --- +Test #0: failed +Description: Test Case 01: Deterministic Tree (group: DTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [243 201 323 417 406 123 82 238 322 472 166 506 236 300 378 447 350 287 + 104 167 47 55 311 175 141 291 401 250 11 357 51 78 152 474 412 277 + 218 254 103 535 215 38 174 217 296 10 193 131 146 397 315 186 129 502 + 525 377 121 355 316 499 443 220 216 170 477 366 95 494 359 127 27 241 + 416 367 305 76 524 207 265 418 369 436 136 330 120 181 486 297 312 327 + 290 148 247 400 445 320 138 321 505 470 22 403 438 380 169 375 228 390 + 145 395 285 387 226 347 450 414 94 433 440 26 44 204 89 182 209 317 + 349 464 19 188 239 180 332 30 75 405 71 144 399 81 483 298 339 68 + 163 442 150 149 96 511 310 233 12 168 458 532 364 460 271 45 179 8 + 465 420 184 213 396 264 48 356 435 258 319 155 517 153 13 83 454 500 + 219 509 227 50 109 151 344 425 40 54 137 205 504 74 431 29 185 197 + 108 116 165 283 222 419 0 139 101 286 478 199 274 496 162 365 273 191 + 97 248 229 398 112 383 276 518 473 158 33 314 512 441 118 338 434 462 + 534 479 501 135 6 449 393 200 65 114 309 352 80 345 493 221 259 142 + 37 24 423 5 340 508 299 457 266 475 105 125 17 79 208 177 391 234 + 491 256 437 388 301 308 452 335 389 70 64 3 282 198 439 41 353 31 + 334 245 392 402 488 281 456 60 451 484 196 263 272 346 176 249 531 255 + 261 426 268 117 342 453 463 63 515 195 351 404 25 128 318 370 471 178 + 348 225 206 489 57 18 87 189 326 306 279 288 361 124 341 194 21 202 + 429 203 9 446 14 307 289 171 354 466 211 235 130 107 498 520 459 430 + 113 133 360 32 246 379 513 237 523 35 84 187 210 527 252 140 492 85 + 304 529 122 66 251 173 373 530 1 212 102 132 371 448 514 164 192 384 + 262 90 343 328 36 526 172 46 292 106 497 147 232 516 408 98 42 394 + 39 444 99 111 374 362 415 43 231 409 119 214 376 49 481 490 510 333 + 358 336 160 15 422 482 324 303 293 253 73 468 20 278 507 495 230 16 + 386 244 77 53 275 270 428 533 522 190 485 59 411 52 257 503 86 476 + 302 62 242 480 427 313 424 93 56 280 381 410 240 159 67 487 161 69 + 58 91 126 28 432 455 325 143 295 134 519 467 260 461 329 382 183 157 + 100 294 521 224 34 23 421 61 269 368 363 528 88 4 2 223 154 92 + 407 72 385 413 337 110 115 372 469 156 284 331 267 7] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got 0.6014034074910817 expected 0.95 + +Test #1: failed +Description: Test Case 02: Deterministic Tree (group: DTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [406 526 88 332 153 491 255 36 499 107 222 429 256 108 322 440 432 506 + 534 447 524 238 234 45 176 514 436 109 480 247 150 313 421 317 494 67 + 210 125 392 525 134 233 228 300 135 32 513 282 320 100 439 90 37 221 + 212 182 117 82 149 364 343 510 33 226 457 418 162 92 338 152 476 425 + 466 103 49 170 217 416 231 209 41 17 358 303 353 495 30 271 21 324 + 292 147 2 388 204 328 521 394 8 399 31 19 342 43 522 402 454 348 + 370 93 377 306 482 195 360 180 518 261 69 218 498 326 384 479 6 464 + 27 285 380 441 185 113 190 450 79 75 227 307 198 77 11 39 423 291 + 188 84 434 248 0 369 433 173 264 372 98 87 55 333 186 391 136 294 + 60 205 345 444 405 319 428 341 130 99 272 533 137 351 346 71 192 308 + 431 344 442 266 128 435 225 183 347 378 12 214 420 207 159 398 446 235 + 246 356 270 337 488 486 76 121 485 458 407 213 355 200 191 242 196 143 + 102 452 530 236 110 404 127 437 193 411 385 259 206 97 177 89 179 123 + 508 40 224 161 56 415 284 72 390 268 139 230 42 366 520 144 467 61 + 283 68 451 475 267 288 413 118 106 265 424 483 94 401 80 492 296 274 + 215 119 438 96 244 249 166 305 371 531 50 151 257 449 232 15 64 474 + 126 395 260 469 329 216 523 362 528 245 14 132 35 74 263 262 101 13 + 304 53 219 1 509 20 168 58 298 311 181 383 279 502 229 155 178 133 + 142 289 201 419 529 496 512 223 26 208 519 325 148 202 287 25 387 47 + 252 174 352 38 443 286 46 63 243 48 116 165 86 471 167 373 184 340 + 426 131 83 484 258 169 376 254 389 10 124 16 397 427 237 290 472 456 + 70 396 301 156 463 460 465 269 315 318 339 54 489 251 51 497 302 295 + 503 203 505 278 403 277 129 163 400 273 240 199 517 381 462 535 330 386 + 349 336 382 211 459 62 504 470 115 299 393 409 253 141 468 18 532 379 + 281 368 34 473 146 310 507 138 3 445 105 239 78 515 73 410 354 361 + 334 417 297 414 187 365 461 120 22 312 323 501 309 154 65 23 114 24 + 164 197 430 455 95 111 374 350 276 321 314 275 122 408 481 158 453 448 + 140 29 145 357 189 4 500 5 493 52 66 375 487 7 59 220 241 157 + 194 335 250 327 363 478 293 112 516 81 316 57 331 44 9 28 422 280 + 511 477 91 490 412 172 367 175 160 104 359 171 85 527] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got 0.6054164848819564 expected 0.95 + +Test #2: failed +Description: Test Case 03: Deterministic Tree (group: DTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [404 400 371 426 236 207 2 229 243 257 399 192 266 344 18 239 494 295 + 136 479 255 353 313 510 176 278 199 178 433 516 186 241 181 489 263 293 + 158 382 447 511 140 287 276 442 386 21 87 342 318 163 407 162 326 459 + 284 150 49 177 88 92 383 530 68 31 301 112 525 53 475 315 205 473 + 509 28 458 316 54 340 330 443 42 462 131 265 350 145 138 451 438 455 + 508 64 193 30 486 337 102 174 294 336 258 437 200 187 127 468 312 51 + 361 159 466 260 499 105 352 460 209 9 224 463 234 360 141 417 424 227 + 113 430 528 286 75 411 275 271 108 206 379 33 481 117 521 170 61 501 + 36 230 27 119 3 267 58 476 218 262 223 269 101 497 519 440 518 256 + 514 149 251 160 57 179 233 268 77 401 5 60 84 25 402 128 240 522 + 533 484 161 172 213 534 245 445 423 368 15 217 211 175 358 214 320 369 + 196 520 464 165 351 24 384 345 396 228 70 512 208 314 59 38 292 103 + 55 515 78 376 95 333 348 82 456 357 385 490 85 120 289 244 435 96 + 355 453 122 8 403 144 156 203 529 346 506 291 246 390 325 436 420 97 + 434 100 130 439 387 212 373 106 201 397 81 124 249 44 222 215 225 431 + 482 302 356 487 290 197 405 444 338 45 457 307 392 72 153 248 416 524 + 166 347 74 0 297 126 43 152 242 306 195 285 414 393 527 62 381 319 + 133 142 6 304 467 65 93 129 335 427 155 19 409 17 277 448 154 526 + 110 472 56 500 280 40 415 185 226 296 374 370 147 428 48 419 532 261 + 188 328 421 323 493 363 22 168 311 303 503 98 20 288 67 470 535 259 + 12 109 327 89 339 90 231 94 7 189 441 254 305 425 309 169 422 116 + 164 317 372 29 366 114 377 204 183 134 364 389 157 513 1 232 237 354 + 523 180 16 137 47 507 216 123 505 171 210 151 362 413 99 253 115 202 + 167 321 322 471 308 274 104 395 135 39 86 34 71 488 412 365 341 252 + 388 272 454 492 380 432 429 182 118 139 283 331 300 235 238 367 10 250 + 198 191 80 194 190 221 121 50 496 146 148 483 220 26 52 11 531 408 + 69 281 63 66 469 474 14 184 37 452 410 498 107 76 491 13 32 343 + 173 132 310 418 375 91 270 264 398 41 332 279 504 449 299 282 219 83 + 73 324 359 4 465 349 478 391 495 125 35 298 329 406 143 517 446 273 + 477 247 334 502 23 79 46 394 461 480 378 111 450 485] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got 0.6612956813428421 expected 0.95 + +Test #3: failed +Description: Test Case 04: Deterministic Tree (group: DTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [267 137 167 435 402 440 173 320 89 50 232 474 247 203 272 445 283 484 + 383 226 49 11 102 143 419 450 47 166 421 241 189 263 161 224 186 298 + 520 117 191 78 497 524 334 71 36 347 501 125 466 443 469 139 188 389 + 160 360 379 514 35 142 465 358 486 238 171 243 523 405 269 136 310 5 + 372 58 259 529 307 257 522 147 180 211 277 0 526 276 165 146 63 235 + 471 51 345 227 22 10 164 204 513 140 90 27 74 457 190 205 304 525 + 430 219 461 214 84 118 472 387 373 385 434 495 76 158 42 367 221 2 + 418 17 429 220 222 420 532 431 110 496 289 148 106 86 460 300 438 14 + 104 91 503 343 476 119 70 212 1 377 316 223 376 467 116 487 215 468 + 346 168 265 264 13 473 97 33 382 192 201 182 195 400 59 482 93 498 + 295 156 20 55 151 297 417 329 282 185 342 6 12 254 369 499 446 449 + 246 444 194 23 491 331 410 108 131 179 490 133 534 134 408 67 349 174 + 371 437 448 325 242 248 530 273 357 81 199 288 250 176 159 399 309 4 + 196 361 262 344 285 15 213 107 480 413 252 19 403 206 149 68 100 374 + 39 141 21 207 478 506 129 66 464 407 251 470 28 423 485 415 321 533 + 504 378 341 69 124 217 337 483 150 255 178 109 336 425 439 38 455 489 + 239 200 145 330 34 390 507 135 428 46 163 48 388 197 105 208 392 521 + 249 432 162 209 299 98 454 198 515 326 396 7 95 92 57 292 183 365 + 441 509 128 354 169 364 363 481 293 73 172 16 99 245 233 356 253 350 + 312 414 229 458 3 202 279 77 65 274 386 477 352 60 427 433 511 244 + 31 266 333 453 54 114 391 401 101 45 72 170 228 381 85 319 398 308 + 527 311 187 103 516 327 30 313 271 359 281 493 494 175 517 528 284 426 + 94 270 237 230 287 184 37 115 502 447 475 61 24 318 87 422 314 531 + 112 340 64 355 452 294 127 18 275 323 88 280 291 338 79 234 353 29 + 138 510 258 181 82 225 436 339 412 512 479 126 8 113 256 535 348 411 + 406 32 25 424 44 231 83 153 368 384 278 335 43 404 157 301 462 130 + 375 193 456 395 317 324 305 303 416 260 268 236 111 132 144 52 505 322 + 75 362 155 366 261 332 519 122 351 290 328 393 80 123 380 306 218 451 + 463 492 296 442 152 177 9 409 120 154 240 53 26 41 56 62 286 459 + 40 397 216 508 500 488 96 302 370 210 121 315 518 394] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got 0.5183041364379453 expected 0.95 + +Test #4: failed +Description: Test Case 01: Random Tree (group: RTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [243 201 323 417 406 123 82 238 322 472 166 506 236 300 378 447 350 287 + 104 167 47 55 311 175 141 291 401 250 11 357 51 78 152 474 412 277 + 218 254 103 535 215 38 174 217 296 10 193 131 146 397 315 186 129 502 + 525 377 121 355 316 499 443 220 216 170 477 366 95 494 359 127 27 241 + 416 367 305 76 524 207 265 418 369 436 136 330 120 181 486 297 312 327 + 290 148 247 400 445 320 138 321 505 470 22 403 438 380 169 375 228 390 + 145 395 285 387 226 347 450 414 94 433 440 26 44 204 89 182 209 317 + 349 464 19 188 239 180 332 30 75 405 71 144 399 81 483 298 339 68 + 163 442 150 149 96 511 310 233 12 168 458 532 364 460 271 45 179 8 + 465 420 184 213 396 264 48 356 435 258 319 155 517 153 13 83 454 500 + 219 509 227 50 109 151 344 425 40 54 137 205 504 74 431 29 185 197 + 108 116 165 283 222 419 0 139 101 286 478 199 274 496 162 365 273 191 + 97 248 229 398 112 383 276 518 473 158 33 314 512 441 118 338 434 462 + 534 479 501 135 6 449 393 200 65 114 309 352 80 345 493 221 259 142 + 37 24 423 5 340 508 299 457 266 475 105 125 17 79 208 177 391 234 + 491 256 437 388 301 308 452 335 389 70 64 3 282 198 439 41 353 31 + 334 245 392 402 488 281 456 60 451 484 196 263 272 346 176 249 531 255 + 261 426 268 117 342 453 463 63 515 195 351 404 25 128 318 370 471 178 + 348 225 206 489 57 18 87 189 326 306 279 288 361 124 341 194 21 202 + 429 203 9 446 14 307 289 171 354 466 211 235 130 107 498 520 459 430 + 113 133 360 32 246 379 513 237 523 35 84 187 210 527 252 140 492 85 + 304 529 122 66 251 173 373 530 1 212 102 132 371 448 514 164 192 384 + 262 90 343 328 36 526 172 46 292 106 497 147 232 516 408 98 42 394 + 39 444 99 111 374 362 415 43 231 409 119 214 376 49 481 490 510 333 + 358 336 160 15 422 482 324 303 293 253 73 468 20 278 507 495 230 16 + 386 244 77 53 275 270 428 533 522 190 485 59 411 52 257 503 86 476 + 302 62 242 480 427 313 424 93 56 280 381 410 240 159 67 487 161 69 + 58 91 126 28 432 455 325 143 295 134 519 467 260 461 329 382 183 157 + 100 294 521 224 34 23 421 61 269 368 363 528 88 4 2 223 154 92 + 407 72 385 413 337 110 115 372 469 156 284 331 267 7] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got 0.24146830713967032 expected 0.95 + +Test #5: failed +Description: Test Case 02: Random Tree (group: RTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [406 526 88 332 153 491 255 36 499 107 222 429 256 108 322 440 432 506 + 534 447 524 238 234 45 176 514 436 109 480 247 150 313 421 317 494 67 + 210 125 392 525 134 233 228 300 135 32 513 282 320 100 439 90 37 221 + 212 182 117 82 149 364 343 510 33 226 457 418 162 92 338 152 476 425 + 466 103 49 170 217 416 231 209 41 17 358 303 353 495 30 271 21 324 + 292 147 2 388 204 328 521 394 8 399 31 19 342 43 522 402 454 348 + 370 93 377 306 482 195 360 180 518 261 69 218 498 326 384 479 6 464 + 27 285 380 441 185 113 190 450 79 75 227 307 198 77 11 39 423 291 + 188 84 434 248 0 369 433 173 264 372 98 87 55 333 186 391 136 294 + 60 205 345 444 405 319 428 341 130 99 272 533 137 351 346 71 192 308 + 431 344 442 266 128 435 225 183 347 378 12 214 420 207 159 398 446 235 + 246 356 270 337 488 486 76 121 485 458 407 213 355 200 191 242 196 143 + 102 452 530 236 110 404 127 437 193 411 385 259 206 97 177 89 179 123 + 508 40 224 161 56 415 284 72 390 268 139 230 42 366 520 144 467 61 + 283 68 451 475 267 288 413 118 106 265 424 483 94 401 80 492 296 274 + 215 119 438 96 244 249 166 305 371 531 50 151 257 449 232 15 64 474 + 126 395 260 469 329 216 523 362 528 245 14 132 35 74 263 262 101 13 + 304 53 219 1 509 20 168 58 298 311 181 383 279 502 229 155 178 133 + 142 289 201 419 529 496 512 223 26 208 519 325 148 202 287 25 387 47 + 252 174 352 38 443 286 46 63 243 48 116 165 86 471 167 373 184 340 + 426 131 83 484 258 169 376 254 389 10 124 16 397 427 237 290 472 456 + 70 396 301 156 463 460 465 269 315 318 339 54 489 251 51 497 302 295 + 503 203 505 278 403 277 129 163 400 273 240 199 517 381 462 535 330 386 + 349 336 382 211 459 62 504 470 115 299 393 409 253 141 468 18 532 379 + 281 368 34 473 146 310 507 138 3 445 105 239 78 515 73 410 354 361 + 334 417 297 414 187 365 461 120 22 312 323 501 309 154 65 23 114 24 + 164 197 430 455 95 111 374 350 276 321 314 275 122 408 481 158 453 448 + 140 29 145 357 189 4 500 5 493 52 66 375 487 7 59 220 241 157 + 194 335 250 327 363 478 293 112 516 81 316 57 331 44 9 28 422 280 + 511 477 91 490 412 172 367 175 160 104 359 171 85 527] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got 0.5296884102723453 expected 0.95 + +Test #6: failed +Description: Test Case 03: Random Tree (group: RTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [404 400 371 426 236 207 2 229 243 257 399 192 266 344 18 239 494 295 + 136 479 255 353 313 510 176 278 199 178 433 516 186 241 181 489 263 293 + 158 382 447 511 140 287 276 442 386 21 87 342 318 163 407 162 326 459 + 284 150 49 177 88 92 383 530 68 31 301 112 525 53 475 315 205 473 + 509 28 458 316 54 340 330 443 42 462 131 265 350 145 138 451 438 455 + 508 64 193 30 486 337 102 174 294 336 258 437 200 187 127 468 312 51 + 361 159 466 260 499 105 352 460 209 9 224 463 234 360 141 417 424 227 + 113 430 528 286 75 411 275 271 108 206 379 33 481 117 521 170 61 501 + 36 230 27 119 3 267 58 476 218 262 223 269 101 497 519 440 518 256 + 514 149 251 160 57 179 233 268 77 401 5 60 84 25 402 128 240 522 + 533 484 161 172 213 534 245 445 423 368 15 217 211 175 358 214 320 369 + 196 520 464 165 351 24 384 345 396 228 70 512 208 314 59 38 292 103 + 55 515 78 376 95 333 348 82 456 357 385 490 85 120 289 244 435 96 + 355 453 122 8 403 144 156 203 529 346 506 291 246 390 325 436 420 97 + 434 100 130 439 387 212 373 106 201 397 81 124 249 44 222 215 225 431 + 482 302 356 487 290 197 405 444 338 45 457 307 392 72 153 248 416 524 + 166 347 74 0 297 126 43 152 242 306 195 285 414 393 527 62 381 319 + 133 142 6 304 467 65 93 129 335 427 155 19 409 17 277 448 154 526 + 110 472 56 500 280 40 415 185 226 296 374 370 147 428 48 419 532 261 + 188 328 421 323 493 363 22 168 311 303 503 98 20 288 67 470 535 259 + 12 109 327 89 339 90 231 94 7 189 441 254 305 425 309 169 422 116 + 164 317 372 29 366 114 377 204 183 134 364 389 157 513 1 232 237 354 + 523 180 16 137 47 507 216 123 505 171 210 151 362 413 99 253 115 202 + 167 321 322 471 308 274 104 395 135 39 86 34 71 488 412 365 341 252 + 388 272 454 492 380 432 429 182 118 139 283 331 300 235 238 367 10 250 + 198 191 80 194 190 221 121 50 496 146 148 483 220 26 52 11 531 408 + 69 281 63 66 469 474 14 184 37 452 410 498 107 76 491 13 32 343 + 173 132 310 418 375 91 270 264 398 41 332 279 504 449 299 282 219 83 + 73 324 359 4 465 349 478 391 495 125 35 298 329 406 143 517 446 273 + 477 247 334 502 23 79 46 394 461 480 378 111 450 485] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got 0.40960830448948204 expected 0.95 + +Test #7: failed +Description: Test Case 04: Random Tree (group: RTLearner) +IncorrectOutput: Test failed on one or more output criteria. + Inputs: + data file: Istanbul.csv + permutation: [267 137 167 435 402 440 173 320 89 50 232 474 247 203 272 445 283 484 + 383 226 49 11 102 143 419 450 47 166 421 241 189 263 161 224 186 298 + 520 117 191 78 497 524 334 71 36 347 501 125 466 443 469 139 188 389 + 160 360 379 514 35 142 465 358 486 238 171 243 523 405 269 136 310 5 + 372 58 259 529 307 257 522 147 180 211 277 0 526 276 165 146 63 235 + 471 51 345 227 22 10 164 204 513 140 90 27 74 457 190 205 304 525 + 430 219 461 214 84 118 472 387 373 385 434 495 76 158 42 367 221 2 + 418 17 429 220 222 420 532 431 110 496 289 148 106 86 460 300 438 14 + 104 91 503 343 476 119 70 212 1 377 316 223 376 467 116 487 215 468 + 346 168 265 264 13 473 97 33 382 192 201 182 195 400 59 482 93 498 + 295 156 20 55 151 297 417 329 282 185 342 6 12 254 369 499 446 449 + 246 444 194 23 491 331 410 108 131 179 490 133 534 134 408 67 349 174 + 371 437 448 325 242 248 530 273 357 81 199 288 250 176 159 399 309 4 + 196 361 262 344 285 15 213 107 480 413 252 19 403 206 149 68 100 374 + 39 141 21 207 478 506 129 66 464 407 251 470 28 423 485 415 321 533 + 504 378 341 69 124 217 337 483 150 255 178 109 336 425 439 38 455 489 + 239 200 145 330 34 390 507 135 428 46 163 48 388 197 105 208 392 521 + 249 432 162 209 299 98 454 198 515 326 396 7 95 92 57 292 183 365 + 441 509 128 354 169 364 363 481 293 73 172 16 99 245 233 356 253 350 + 312 414 229 458 3 202 279 77 65 274 386 477 352 60 427 433 511 244 + 31 266 333 453 54 114 391 401 101 45 72 170 228 381 85 319 398 308 + 527 311 187 103 516 327 30 313 271 359 281 493 494 175 517 528 284 426 + 94 270 237 230 287 184 37 115 502 447 475 61 24 318 87 422 314 531 + 112 340 64 355 452 294 127 18 275 323 88 280 291 338 79 234 353 29 + 138 510 258 181 82 225 436 339 412 512 479 126 8 113 256 535 348 411 + 406 32 25 424 44 231 83 153 368 384 278 335 43 404 157 301 462 130 + 375 193 456 395 317 324 305 303 416 260 268 236 111 132 144 52 505 322 + 75 362 155 366 261 332 519 122 351 290 328 393 80 123 380 306 218 451 + 463 492 296 442 152 177 9 409 120 154 240 53 26 41 56 62 286 459 + 40 397 216 508 500 488 96 302 370 210 121 315 518 394] + Failures: + In-sample with leaf_size=1 correlation less than allowed: got -0.23631524611792065 expected 0.95 + Out-of-sample correlation less than allowed: got -0.10777754693759267 expected 0.15 + +Test #8: failed +Description: Test Case 01: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #9: failed +Description: Test Case 02: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #10: failed +Description: Test Case 03: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #11: failed +Description: Test Case 04: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #12: failed +Description: Test Case 05: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #13: failed +Description: Test Case 06: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #14: failed +Description: Test Case 07: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #15: failed +Description: Test Case 08: Bagging (group: BagLearner) +TypeError: __init__() got an unexpected keyword argument 'kwargs' + +Test #16: passed +Test #17: passed +diff --git a/assess_learners/points.txt b/assess_learners/points.txt new file mode 100644 index 0000000..fe6b09a --- /dev/null +++ b/assess_learners/points.txt @@ -0,0 +1 @@ +15.0 diff --git a/assess_learners/testlearner.py b/assess_learners/testlearner.py index f22966b..404eea9 100644 --- a/assess_learners/testlearner.py +++ b/assess_learners/testlearner.py @@ -26,6 +26,9 @@ import numpy as np import math import LinRegLearner as lrl import DTLearner as dtl +import RTLearner as rtl +import BagLearner as bgl +import InsaneLearner as isl import sys if __name__=="__main__": @@ -36,11 +39,7 @@ if __name__=="__main__": # data = np.array([list(map(float,s.strip().split(',')[1:])) # for s in inf.readlines()[1:]]) data = np.array([list(map(float,s.strip().split(',')[1:])) - for s in inf.readlines()]) - - # XXX: Get rid of some rows and columns for easier development. - # XXX: Remove later for testing! - # data = data[:10,5:] + for s in inf.readlines()[1:]]) # compute how much of the data is training and testing train_rows = int(0.6* data.shape[0]) @@ -58,6 +57,9 @@ if __name__=="__main__": # create a learner and train it # learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner learner = dtl.DTLearner(verbose = True) # create a LinRegLearner + # learner = rtl.RTLearner(verbose = True) # create a LinRegLearner + # learner = bgl.BagLearner(dtl.DTLearner, bags=50) # create a LinRegLearner + # learner = isl.InsaneLearner() learner.addEvidence(trainX, trainY) print(learner.author())