From 3f2d2f4df3ae726be7cd590eff66de100fc5576d Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Thu, 24 Sep 2020 17:32:29 -0400 Subject: [PATCH] Finish all learners, but they don't pass tests. I have to figure out why they perform so bad. --- assess_learners/BagLearner.py | 45 ++++ assess_learners/DTLearner.py | 13 +- assess_learners/InsaneLearner.py | 28 +++ assess_learners/RTLearner.py | 83 ++++++++ assess_learners/comments.txt | 344 +++++++++++++++++++++++++++++++ assess_learners/points.txt | 1 + assess_learners/testlearner.py | 12 +- 7 files changed, 511 insertions(+), 15 deletions(-) create mode 100644 assess_learners/BagLearner.py create mode 100644 assess_learners/InsaneLearner.py create mode 100644 assess_learners/RTLearner.py create mode 100644 assess_learners/comments.txt create mode 100644 assess_learners/points.txt diff --git a/assess_learners/BagLearner.py b/assess_learners/BagLearner.py new file mode 100644 index 0000000..6673ab2 --- /dev/null +++ b/assess_learners/BagLearner.py @@ -0,0 +1,45 @@ +import numpy as np + + +class BagLearner(object): + def __init__(self, learner, bags=20, boost=False, verbose=False, **kwargs): + self.learner = learner + self.bags = bags + self.boost = boost + self.verbose = verbose + self.kwargs = kwargs + self.learners = [learner(**kwargs) for _ in range(bags)] + + def author(self): + return 'felixm' # replace tb34 with your Georgia Tech username + + def get_bag(self, data_x, data_y): + num_items = int(data_x.shape[0] * 0.5) # 50% of samples + bag_x, bag_y = [], [] + for _ in range(num_items): + i = np.random.randint(0, data_x.shape[0]) + bag_x.append(data_x[i,:]) + bag_y.append(data_y[i]) + return np.array(bag_x), np.array(bag_y) + + + def addEvidence(self, data_x, data_y): + """ + @summary: Add training data to learner + @param dataX: X values of data to add + @param dataY: the Y training values + """ + for learner in self.learners: + x, y = self.get_bag(data_x, data_y) + learner.addEvidence(x, y) + + def query(self, points): + """ + @summary: Estimate a set of test points given the model we built. + @param points: should be a numpy array with each row corresponding to a specific query. + @returns the estimated values according to the saved model. + """ + return np.mean([l.query(points) for l in self.learners], axis=0) + +if __name__=="__main__": + print("the secret clue is 'zzyzx'") diff --git a/assess_learners/DTLearner.py b/assess_learners/DTLearner.py index 11f5b54..ef798f2 100644 --- a/assess_learners/DTLearner.py +++ b/assess_learners/DTLearner.py @@ -36,20 +36,13 @@ class DTLearner(object): i_max = i return i_max - def make_tree_absolute(self, tree): - for i in range(tree.shape[0]): - if tree[i, 2] == self.NA: - continue - tree[i, 2] = i + tree[i, 2] - tree[i, 3] = i + tree[i, 3] - return tree - def build_tree(self, xs, y): assert(xs.shape[0] == y.shape[0]) assert(xs.shape[0] > 0) # If this is 0 something went wrong. - if xs.shape[0] == 1: - return self.create_node(self.LEAF, y[0], self.NA, self.NA) + if xs.shape[0] <= self.leaf_size: + value = np.median(y) + return self.create_node(self.LEAF, value, self.NA, self.NA) if np.all(y[0] == y): return self.create_node(self.LEAF, y[0], self.NA, self.NA) diff --git a/assess_learners/InsaneLearner.py b/assess_learners/InsaneLearner.py new file mode 100644 index 0000000..28cf876 --- /dev/null +++ b/assess_learners/InsaneLearner.py @@ -0,0 +1,28 @@ +import BagLearner as bgl +import LinRegLearner as lrl + +class InsaneLearner(object): + def __init__(self, verbose=False): + def bag_learner(): + return bgl.BagLearner(lrl.LinRegLearner, bags=20, verbose=verbose) + self.learner = bgl.BagLearner(bag_learner, bags=20, verbose=verbose) + + def author(self): + return 'felixm' # replace tb34 with your Georgia Tech username + + def addEvidence(self, data_x, data_y): + """ + @summary: Add training data to learner + @param dataX: X values of data to add + @param dataY: the Y training values + """ + self.learner.addEvidence(data_x, data_y) + + def query(self, points): + """ + @summary: Estimate a set of test points given the model we built. + @param points: should be a numpy array with each row corresponding to a specific query. + @returns the estimated values according to the saved model. + """ + return self.learner.query(points) + diff --git a/assess_learners/RTLearner.py b/assess_learners/RTLearner.py new file mode 100644 index 0000000..a2dc02f --- /dev/null +++ b/assess_learners/RTLearner.py @@ -0,0 +1,83 @@ +import numpy as np + + +class RTLearner(object): + + LEAF = -1 + NA = -1 + + def __init__(self, leaf_size = 1, verbose = False): + self.leaf_size = leaf_size + self.verbose = verbose + + def author(self): + return 'felixm' # replace tb34 with your Georgia Tech username + + def create_node(self, factor, split_value, left, right): + return np.array([[factor, split_value, left, right], ]) + + def build_tree(self, xs, y): + assert(xs.shape[0] == y.shape[0]) + assert(xs.shape[0] > 0) # If this is 0 something went wrong. + + if xs.shape[0] <= self.leaf_size: + value = np.median(y) + return self.create_node(self.LEAF, value, self.NA, self.NA) + + if np.all(y[0] == y): + return self.create_node(self.LEAF, y[0], self.NA, self.NA) + + i = np.random.randint(0, xs.shape[1]) + # If we pick an i for which all x are the same, try again. + while np.all(xs[0,i] == xs[:,i]): + i = np.random.randint(0, xs.shape[1]) + + r1, r2 = np.random.randint(0, xs.shape[0], size = 2) + split_value = (xs[r1, i] + xs[r2, i]) / 2.0 + + select_lt = xs[:, i] <= split_value + select_rt = xs[:, i] > split_value + # Avoid case where all values are low or equal to the median. + if select_lt.all() or select_rt.all(): + select_lt = xs[:, i] < split_value + select_rt = xs[:, i] >= split_value + + lt = self.build_tree(xs[select_lt], y[select_lt]) + rt = self.build_tree(xs[select_rt], y[select_rt]) + root = self.create_node(i, split_value, 1, rt.shape[0] + 1) + + root = np.concatenate([root, lt, rt]) + return root + + def addEvidence(self, data_x, data_y): + """ + @summary: Add training data to learner + @param dataX: X values of data to add + @param dataY: the Y training values + """ + self.rel_tree = self.build_tree(data_x, data_y) + + def query_point(self, point): + node_index = 0 + while self.rel_tree[node_index, 0] != self.LEAF: + node = self.rel_tree[node_index] + split_factor = int(node[0]) + split_value = node[1] + if point[split_factor] <= split_value: + node_index += int(node[2]) + else: + node_index += int(node[3]) + return self.rel_tree[node_index, 1] + + def query(self, points): + """ + @summary: Estimate a set of test points given the model we built. + @param points: should be a numpy array with each row corresponding to a specific query. + @returns the estimated values according to the saved model. + """ + query_point = lambda p: self.query_point(p) + r = np.apply_along_axis(query_point, 1, points) + return r + +if __name__=="__main__": + print("the secret clue is 'zzyzx'") diff --git a/assess_learners/comments.txt b/assess_learners/comments.txt new file mode 100644 index 0000000..c44c64b --- /dev/null +++ b/assess_learners/comments.txt @@ -0,0 +1,344 @@ +
--- Summary ---
+Tests passed: 2 out of 18
+
+--- Details ---
+Test #0: failed 
+Description: Test Case 01: Deterministic Tree (group: DTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [243 201 323 417 406 123  82 238 322 472 166 506 236 300 378 447 350 287
+ 104 167  47  55 311 175 141 291 401 250  11 357  51  78 152 474 412 277
+ 218 254 103 535 215  38 174 217 296  10 193 131 146 397 315 186 129 502
+ 525 377 121 355 316 499 443 220 216 170 477 366  95 494 359 127  27 241
+ 416 367 305  76 524 207 265 418 369 436 136 330 120 181 486 297 312 327
+ 290 148 247 400 445 320 138 321 505 470  22 403 438 380 169 375 228 390
+ 145 395 285 387 226 347 450 414  94 433 440  26  44 204  89 182 209 317
+ 349 464  19 188 239 180 332  30  75 405  71 144 399  81 483 298 339  68
+ 163 442 150 149  96 511 310 233  12 168 458 532 364 460 271  45 179   8
+ 465 420 184 213 396 264  48 356 435 258 319 155 517 153  13  83 454 500
+ 219 509 227  50 109 151 344 425  40  54 137 205 504  74 431  29 185 197
+ 108 116 165 283 222 419   0 139 101 286 478 199 274 496 162 365 273 191
+  97 248 229 398 112 383 276 518 473 158  33 314 512 441 118 338 434 462
+ 534 479 501 135   6 449 393 200  65 114 309 352  80 345 493 221 259 142
+  37  24 423   5 340 508 299 457 266 475 105 125  17  79 208 177 391 234
+ 491 256 437 388 301 308 452 335 389  70  64   3 282 198 439  41 353  31
+ 334 245 392 402 488 281 456  60 451 484 196 263 272 346 176 249 531 255
+ 261 426 268 117 342 453 463  63 515 195 351 404  25 128 318 370 471 178
+ 348 225 206 489  57  18  87 189 326 306 279 288 361 124 341 194  21 202
+ 429 203   9 446  14 307 289 171 354 466 211 235 130 107 498 520 459 430
+ 113 133 360  32 246 379 513 237 523  35  84 187 210 527 252 140 492  85
+ 304 529 122  66 251 173 373 530   1 212 102 132 371 448 514 164 192 384
+ 262  90 343 328  36 526 172  46 292 106 497 147 232 516 408  98  42 394
+  39 444  99 111 374 362 415  43 231 409 119 214 376  49 481 490 510 333
+ 358 336 160  15 422 482 324 303 293 253  73 468  20 278 507 495 230  16
+ 386 244  77  53 275 270 428 533 522 190 485  59 411  52 257 503  86 476
+ 302  62 242 480 427 313 424  93  56 280 381 410 240 159  67 487 161  69
+  58  91 126  28 432 455 325 143 295 134 519 467 260 461 329 382 183 157
+ 100 294 521 224  34  23 421  61 269 368 363 528  88   4   2 223 154  92
+ 407  72 385 413 337 110 115 372 469 156 284 331 267   7]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got 0.6014034074910817 expected 0.95
+
+Test #1: failed 
+Description: Test Case 02: Deterministic Tree (group: DTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [406 526  88 332 153 491 255  36 499 107 222 429 256 108 322 440 432 506
+ 534 447 524 238 234  45 176 514 436 109 480 247 150 313 421 317 494  67
+ 210 125 392 525 134 233 228 300 135  32 513 282 320 100 439  90  37 221
+ 212 182 117  82 149 364 343 510  33 226 457 418 162  92 338 152 476 425
+ 466 103  49 170 217 416 231 209  41  17 358 303 353 495  30 271  21 324
+ 292 147   2 388 204 328 521 394   8 399  31  19 342  43 522 402 454 348
+ 370  93 377 306 482 195 360 180 518 261  69 218 498 326 384 479   6 464
+  27 285 380 441 185 113 190 450  79  75 227 307 198  77  11  39 423 291
+ 188  84 434 248   0 369 433 173 264 372  98  87  55 333 186 391 136 294
+  60 205 345 444 405 319 428 341 130  99 272 533 137 351 346  71 192 308
+ 431 344 442 266 128 435 225 183 347 378  12 214 420 207 159 398 446 235
+ 246 356 270 337 488 486  76 121 485 458 407 213 355 200 191 242 196 143
+ 102 452 530 236 110 404 127 437 193 411 385 259 206  97 177  89 179 123
+ 508  40 224 161  56 415 284  72 390 268 139 230  42 366 520 144 467  61
+ 283  68 451 475 267 288 413 118 106 265 424 483  94 401  80 492 296 274
+ 215 119 438  96 244 249 166 305 371 531  50 151 257 449 232  15  64 474
+ 126 395 260 469 329 216 523 362 528 245  14 132  35  74 263 262 101  13
+ 304  53 219   1 509  20 168  58 298 311 181 383 279 502 229 155 178 133
+ 142 289 201 419 529 496 512 223  26 208 519 325 148 202 287  25 387  47
+ 252 174 352  38 443 286  46  63 243  48 116 165  86 471 167 373 184 340
+ 426 131  83 484 258 169 376 254 389  10 124  16 397 427 237 290 472 456
+  70 396 301 156 463 460 465 269 315 318 339  54 489 251  51 497 302 295
+ 503 203 505 278 403 277 129 163 400 273 240 199 517 381 462 535 330 386
+ 349 336 382 211 459  62 504 470 115 299 393 409 253 141 468  18 532 379
+ 281 368  34 473 146 310 507 138   3 445 105 239  78 515  73 410 354 361
+ 334 417 297 414 187 365 461 120  22 312 323 501 309 154  65  23 114  24
+ 164 197 430 455  95 111 374 350 276 321 314 275 122 408 481 158 453 448
+ 140  29 145 357 189   4 500   5 493  52  66 375 487   7  59 220 241 157
+ 194 335 250 327 363 478 293 112 516  81 316  57 331  44   9  28 422 280
+ 511 477  91 490 412 172 367 175 160 104 359 171  85 527]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got 0.6054164848819564 expected 0.95
+
+Test #2: failed 
+Description: Test Case 03: Deterministic Tree (group: DTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [404 400 371 426 236 207   2 229 243 257 399 192 266 344  18 239 494 295
+ 136 479 255 353 313 510 176 278 199 178 433 516 186 241 181 489 263 293
+ 158 382 447 511 140 287 276 442 386  21  87 342 318 163 407 162 326 459
+ 284 150  49 177  88  92 383 530  68  31 301 112 525  53 475 315 205 473
+ 509  28 458 316  54 340 330 443  42 462 131 265 350 145 138 451 438 455
+ 508  64 193  30 486 337 102 174 294 336 258 437 200 187 127 468 312  51
+ 361 159 466 260 499 105 352 460 209   9 224 463 234 360 141 417 424 227
+ 113 430 528 286  75 411 275 271 108 206 379  33 481 117 521 170  61 501
+  36 230  27 119   3 267  58 476 218 262 223 269 101 497 519 440 518 256
+ 514 149 251 160  57 179 233 268  77 401   5  60  84  25 402 128 240 522
+ 533 484 161 172 213 534 245 445 423 368  15 217 211 175 358 214 320 369
+ 196 520 464 165 351  24 384 345 396 228  70 512 208 314  59  38 292 103
+  55 515  78 376  95 333 348  82 456 357 385 490  85 120 289 244 435  96
+ 355 453 122   8 403 144 156 203 529 346 506 291 246 390 325 436 420  97
+ 434 100 130 439 387 212 373 106 201 397  81 124 249  44 222 215 225 431
+ 482 302 356 487 290 197 405 444 338  45 457 307 392  72 153 248 416 524
+ 166 347  74   0 297 126  43 152 242 306 195 285 414 393 527  62 381 319
+ 133 142   6 304 467  65  93 129 335 427 155  19 409  17 277 448 154 526
+ 110 472  56 500 280  40 415 185 226 296 374 370 147 428  48 419 532 261
+ 188 328 421 323 493 363  22 168 311 303 503  98  20 288  67 470 535 259
+  12 109 327  89 339  90 231  94   7 189 441 254 305 425 309 169 422 116
+ 164 317 372  29 366 114 377 204 183 134 364 389 157 513   1 232 237 354
+ 523 180  16 137  47 507 216 123 505 171 210 151 362 413  99 253 115 202
+ 167 321 322 471 308 274 104 395 135  39  86  34  71 488 412 365 341 252
+ 388 272 454 492 380 432 429 182 118 139 283 331 300 235 238 367  10 250
+ 198 191  80 194 190 221 121  50 496 146 148 483 220  26  52  11 531 408
+  69 281  63  66 469 474  14 184  37 452 410 498 107  76 491  13  32 343
+ 173 132 310 418 375  91 270 264 398  41 332 279 504 449 299 282 219  83
+  73 324 359   4 465 349 478 391 495 125  35 298 329 406 143 517 446 273
+ 477 247 334 502  23  79  46 394 461 480 378 111 450 485]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got 0.6612956813428421 expected 0.95
+
+Test #3: failed 
+Description: Test Case 04: Deterministic Tree (group: DTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [267 137 167 435 402 440 173 320  89  50 232 474 247 203 272 445 283 484
+ 383 226  49  11 102 143 419 450  47 166 421 241 189 263 161 224 186 298
+ 520 117 191  78 497 524 334  71  36 347 501 125 466 443 469 139 188 389
+ 160 360 379 514  35 142 465 358 486 238 171 243 523 405 269 136 310   5
+ 372  58 259 529 307 257 522 147 180 211 277   0 526 276 165 146  63 235
+ 471  51 345 227  22  10 164 204 513 140  90  27  74 457 190 205 304 525
+ 430 219 461 214  84 118 472 387 373 385 434 495  76 158  42 367 221   2
+ 418  17 429 220 222 420 532 431 110 496 289 148 106  86 460 300 438  14
+ 104  91 503 343 476 119  70 212   1 377 316 223 376 467 116 487 215 468
+ 346 168 265 264  13 473  97  33 382 192 201 182 195 400  59 482  93 498
+ 295 156  20  55 151 297 417 329 282 185 342   6  12 254 369 499 446 449
+ 246 444 194  23 491 331 410 108 131 179 490 133 534 134 408  67 349 174
+ 371 437 448 325 242 248 530 273 357  81 199 288 250 176 159 399 309   4
+ 196 361 262 344 285  15 213 107 480 413 252  19 403 206 149  68 100 374
+  39 141  21 207 478 506 129  66 464 407 251 470  28 423 485 415 321 533
+ 504 378 341  69 124 217 337 483 150 255 178 109 336 425 439  38 455 489
+ 239 200 145 330  34 390 507 135 428  46 163  48 388 197 105 208 392 521
+ 249 432 162 209 299  98 454 198 515 326 396   7  95  92  57 292 183 365
+ 441 509 128 354 169 364 363 481 293  73 172  16  99 245 233 356 253 350
+ 312 414 229 458   3 202 279  77  65 274 386 477 352  60 427 433 511 244
+  31 266 333 453  54 114 391 401 101  45  72 170 228 381  85 319 398 308
+ 527 311 187 103 516 327  30 313 271 359 281 493 494 175 517 528 284 426
+  94 270 237 230 287 184  37 115 502 447 475  61  24 318  87 422 314 531
+ 112 340  64 355 452 294 127  18 275 323  88 280 291 338  79 234 353  29
+ 138 510 258 181  82 225 436 339 412 512 479 126   8 113 256 535 348 411
+ 406  32  25 424  44 231  83 153 368 384 278 335  43 404 157 301 462 130
+ 375 193 456 395 317 324 305 303 416 260 268 236 111 132 144  52 505 322
+  75 362 155 366 261 332 519 122 351 290 328 393  80 123 380 306 218 451
+ 463 492 296 442 152 177   9 409 120 154 240  53  26  41  56  62 286 459
+  40 397 216 508 500 488  96 302 370 210 121 315 518 394]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got 0.5183041364379453 expected 0.95
+
+Test #4: failed 
+Description: Test Case 01: Random Tree (group: RTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [243 201 323 417 406 123  82 238 322 472 166 506 236 300 378 447 350 287
+ 104 167  47  55 311 175 141 291 401 250  11 357  51  78 152 474 412 277
+ 218 254 103 535 215  38 174 217 296  10 193 131 146 397 315 186 129 502
+ 525 377 121 355 316 499 443 220 216 170 477 366  95 494 359 127  27 241
+ 416 367 305  76 524 207 265 418 369 436 136 330 120 181 486 297 312 327
+ 290 148 247 400 445 320 138 321 505 470  22 403 438 380 169 375 228 390
+ 145 395 285 387 226 347 450 414  94 433 440  26  44 204  89 182 209 317
+ 349 464  19 188 239 180 332  30  75 405  71 144 399  81 483 298 339  68
+ 163 442 150 149  96 511 310 233  12 168 458 532 364 460 271  45 179   8
+ 465 420 184 213 396 264  48 356 435 258 319 155 517 153  13  83 454 500
+ 219 509 227  50 109 151 344 425  40  54 137 205 504  74 431  29 185 197
+ 108 116 165 283 222 419   0 139 101 286 478 199 274 496 162 365 273 191
+  97 248 229 398 112 383 276 518 473 158  33 314 512 441 118 338 434 462
+ 534 479 501 135   6 449 393 200  65 114 309 352  80 345 493 221 259 142
+  37  24 423   5 340 508 299 457 266 475 105 125  17  79 208 177 391 234
+ 491 256 437 388 301 308 452 335 389  70  64   3 282 198 439  41 353  31
+ 334 245 392 402 488 281 456  60 451 484 196 263 272 346 176 249 531 255
+ 261 426 268 117 342 453 463  63 515 195 351 404  25 128 318 370 471 178
+ 348 225 206 489  57  18  87 189 326 306 279 288 361 124 341 194  21 202
+ 429 203   9 446  14 307 289 171 354 466 211 235 130 107 498 520 459 430
+ 113 133 360  32 246 379 513 237 523  35  84 187 210 527 252 140 492  85
+ 304 529 122  66 251 173 373 530   1 212 102 132 371 448 514 164 192 384
+ 262  90 343 328  36 526 172  46 292 106 497 147 232 516 408  98  42 394
+  39 444  99 111 374 362 415  43 231 409 119 214 376  49 481 490 510 333
+ 358 336 160  15 422 482 324 303 293 253  73 468  20 278 507 495 230  16
+ 386 244  77  53 275 270 428 533 522 190 485  59 411  52 257 503  86 476
+ 302  62 242 480 427 313 424  93  56 280 381 410 240 159  67 487 161  69
+  58  91 126  28 432 455 325 143 295 134 519 467 260 461 329 382 183 157
+ 100 294 521 224  34  23 421  61 269 368 363 528  88   4   2 223 154  92
+ 407  72 385 413 337 110 115 372 469 156 284 331 267   7]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got 0.24146830713967032 expected 0.95
+
+Test #5: failed 
+Description: Test Case 02: Random Tree (group: RTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [406 526  88 332 153 491 255  36 499 107 222 429 256 108 322 440 432 506
+ 534 447 524 238 234  45 176 514 436 109 480 247 150 313 421 317 494  67
+ 210 125 392 525 134 233 228 300 135  32 513 282 320 100 439  90  37 221
+ 212 182 117  82 149 364 343 510  33 226 457 418 162  92 338 152 476 425
+ 466 103  49 170 217 416 231 209  41  17 358 303 353 495  30 271  21 324
+ 292 147   2 388 204 328 521 394   8 399  31  19 342  43 522 402 454 348
+ 370  93 377 306 482 195 360 180 518 261  69 218 498 326 384 479   6 464
+  27 285 380 441 185 113 190 450  79  75 227 307 198  77  11  39 423 291
+ 188  84 434 248   0 369 433 173 264 372  98  87  55 333 186 391 136 294
+  60 205 345 444 405 319 428 341 130  99 272 533 137 351 346  71 192 308
+ 431 344 442 266 128 435 225 183 347 378  12 214 420 207 159 398 446 235
+ 246 356 270 337 488 486  76 121 485 458 407 213 355 200 191 242 196 143
+ 102 452 530 236 110 404 127 437 193 411 385 259 206  97 177  89 179 123
+ 508  40 224 161  56 415 284  72 390 268 139 230  42 366 520 144 467  61
+ 283  68 451 475 267 288 413 118 106 265 424 483  94 401  80 492 296 274
+ 215 119 438  96 244 249 166 305 371 531  50 151 257 449 232  15  64 474
+ 126 395 260 469 329 216 523 362 528 245  14 132  35  74 263 262 101  13
+ 304  53 219   1 509  20 168  58 298 311 181 383 279 502 229 155 178 133
+ 142 289 201 419 529 496 512 223  26 208 519 325 148 202 287  25 387  47
+ 252 174 352  38 443 286  46  63 243  48 116 165  86 471 167 373 184 340
+ 426 131  83 484 258 169 376 254 389  10 124  16 397 427 237 290 472 456
+  70 396 301 156 463 460 465 269 315 318 339  54 489 251  51 497 302 295
+ 503 203 505 278 403 277 129 163 400 273 240 199 517 381 462 535 330 386
+ 349 336 382 211 459  62 504 470 115 299 393 409 253 141 468  18 532 379
+ 281 368  34 473 146 310 507 138   3 445 105 239  78 515  73 410 354 361
+ 334 417 297 414 187 365 461 120  22 312 323 501 309 154  65  23 114  24
+ 164 197 430 455  95 111 374 350 276 321 314 275 122 408 481 158 453 448
+ 140  29 145 357 189   4 500   5 493  52  66 375 487   7  59 220 241 157
+ 194 335 250 327 363 478 293 112 516  81 316  57 331  44   9  28 422 280
+ 511 477  91 490 412 172 367 175 160 104 359 171  85 527]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got 0.5296884102723453 expected 0.95
+
+Test #6: failed 
+Description: Test Case 03: Random Tree (group: RTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [404 400 371 426 236 207   2 229 243 257 399 192 266 344  18 239 494 295
+ 136 479 255 353 313 510 176 278 199 178 433 516 186 241 181 489 263 293
+ 158 382 447 511 140 287 276 442 386  21  87 342 318 163 407 162 326 459
+ 284 150  49 177  88  92 383 530  68  31 301 112 525  53 475 315 205 473
+ 509  28 458 316  54 340 330 443  42 462 131 265 350 145 138 451 438 455
+ 508  64 193  30 486 337 102 174 294 336 258 437 200 187 127 468 312  51
+ 361 159 466 260 499 105 352 460 209   9 224 463 234 360 141 417 424 227
+ 113 430 528 286  75 411 275 271 108 206 379  33 481 117 521 170  61 501
+  36 230  27 119   3 267  58 476 218 262 223 269 101 497 519 440 518 256
+ 514 149 251 160  57 179 233 268  77 401   5  60  84  25 402 128 240 522
+ 533 484 161 172 213 534 245 445 423 368  15 217 211 175 358 214 320 369
+ 196 520 464 165 351  24 384 345 396 228  70 512 208 314  59  38 292 103
+  55 515  78 376  95 333 348  82 456 357 385 490  85 120 289 244 435  96
+ 355 453 122   8 403 144 156 203 529 346 506 291 246 390 325 436 420  97
+ 434 100 130 439 387 212 373 106 201 397  81 124 249  44 222 215 225 431
+ 482 302 356 487 290 197 405 444 338  45 457 307 392  72 153 248 416 524
+ 166 347  74   0 297 126  43 152 242 306 195 285 414 393 527  62 381 319
+ 133 142   6 304 467  65  93 129 335 427 155  19 409  17 277 448 154 526
+ 110 472  56 500 280  40 415 185 226 296 374 370 147 428  48 419 532 261
+ 188 328 421 323 493 363  22 168 311 303 503  98  20 288  67 470 535 259
+  12 109 327  89 339  90 231  94   7 189 441 254 305 425 309 169 422 116
+ 164 317 372  29 366 114 377 204 183 134 364 389 157 513   1 232 237 354
+ 523 180  16 137  47 507 216 123 505 171 210 151 362 413  99 253 115 202
+ 167 321 322 471 308 274 104 395 135  39  86  34  71 488 412 365 341 252
+ 388 272 454 492 380 432 429 182 118 139 283 331 300 235 238 367  10 250
+ 198 191  80 194 190 221 121  50 496 146 148 483 220  26  52  11 531 408
+  69 281  63  66 469 474  14 184  37 452 410 498 107  76 491  13  32 343
+ 173 132 310 418 375  91 270 264 398  41 332 279 504 449 299 282 219  83
+  73 324 359   4 465 349 478 391 495 125  35 298 329 406 143 517 446 273
+ 477 247 334 502  23  79  46 394 461 480 378 111 450 485]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got 0.40960830448948204 expected 0.95
+
+Test #7: failed 
+Description: Test Case 04: Random Tree (group: RTLearner)
+IncorrectOutput: Test failed on one or more output criteria.
+  Inputs:
+    data file: Istanbul.csv
+    permutation: [267 137 167 435 402 440 173 320  89  50 232 474 247 203 272 445 283 484
+ 383 226  49  11 102 143 419 450  47 166 421 241 189 263 161 224 186 298
+ 520 117 191  78 497 524 334  71  36 347 501 125 466 443 469 139 188 389
+ 160 360 379 514  35 142 465 358 486 238 171 243 523 405 269 136 310   5
+ 372  58 259 529 307 257 522 147 180 211 277   0 526 276 165 146  63 235
+ 471  51 345 227  22  10 164 204 513 140  90  27  74 457 190 205 304 525
+ 430 219 461 214  84 118 472 387 373 385 434 495  76 158  42 367 221   2
+ 418  17 429 220 222 420 532 431 110 496 289 148 106  86 460 300 438  14
+ 104  91 503 343 476 119  70 212   1 377 316 223 376 467 116 487 215 468
+ 346 168 265 264  13 473  97  33 382 192 201 182 195 400  59 482  93 498
+ 295 156  20  55 151 297 417 329 282 185 342   6  12 254 369 499 446 449
+ 246 444 194  23 491 331 410 108 131 179 490 133 534 134 408  67 349 174
+ 371 437 448 325 242 248 530 273 357  81 199 288 250 176 159 399 309   4
+ 196 361 262 344 285  15 213 107 480 413 252  19 403 206 149  68 100 374
+  39 141  21 207 478 506 129  66 464 407 251 470  28 423 485 415 321 533
+ 504 378 341  69 124 217 337 483 150 255 178 109 336 425 439  38 455 489
+ 239 200 145 330  34 390 507 135 428  46 163  48 388 197 105 208 392 521
+ 249 432 162 209 299  98 454 198 515 326 396   7  95  92  57 292 183 365
+ 441 509 128 354 169 364 363 481 293  73 172  16  99 245 233 356 253 350
+ 312 414 229 458   3 202 279  77  65 274 386 477 352  60 427 433 511 244
+  31 266 333 453  54 114 391 401 101  45  72 170 228 381  85 319 398 308
+ 527 311 187 103 516 327  30 313 271 359 281 493 494 175 517 528 284 426
+  94 270 237 230 287 184  37 115 502 447 475  61  24 318  87 422 314 531
+ 112 340  64 355 452 294 127  18 275 323  88 280 291 338  79 234 353  29
+ 138 510 258 181  82 225 436 339 412 512 479 126   8 113 256 535 348 411
+ 406  32  25 424  44 231  83 153 368 384 278 335  43 404 157 301 462 130
+ 375 193 456 395 317 324 305 303 416 260 268 236 111 132 144  52 505 322
+  75 362 155 366 261 332 519 122 351 290 328 393  80 123 380 306 218 451
+ 463 492 296 442 152 177   9 409 120 154 240  53  26  41  56  62 286 459
+  40 397 216 508 500 488  96 302 370 210 121 315 518 394]
+  Failures:
+    In-sample with leaf_size=1 correlation less than allowed: got -0.23631524611792065 expected 0.95
+    Out-of-sample correlation less than allowed: got -0.10777754693759267 expected 0.15
+
+Test #8: failed 
+Description: Test Case 01: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #9: failed 
+Description: Test Case 02: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #10: failed 
+Description: Test Case 03: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #11: failed 
+Description: Test Case 04: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #12: failed 
+Description: Test Case 05: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #13: failed 
+Description: Test Case 06: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #14: failed 
+Description: Test Case 07: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #15: failed 
+Description: Test Case 08: Bagging (group: BagLearner)
+TypeError: __init__() got an unexpected keyword argument 'kwargs'
+
+Test #16: passed 
+Test #17: passed 
+
diff --git a/assess_learners/points.txt b/assess_learners/points.txt new file mode 100644 index 0000000..fe6b09a --- /dev/null +++ b/assess_learners/points.txt @@ -0,0 +1 @@ +15.0 diff --git a/assess_learners/testlearner.py b/assess_learners/testlearner.py index f22966b..404eea9 100644 --- a/assess_learners/testlearner.py +++ b/assess_learners/testlearner.py @@ -26,6 +26,9 @@ import numpy as np import math import LinRegLearner as lrl import DTLearner as dtl +import RTLearner as rtl +import BagLearner as bgl +import InsaneLearner as isl import sys if __name__=="__main__": @@ -36,11 +39,7 @@ if __name__=="__main__": # data = np.array([list(map(float,s.strip().split(',')[1:])) # for s in inf.readlines()[1:]]) data = np.array([list(map(float,s.strip().split(',')[1:])) - for s in inf.readlines()]) - - # XXX: Get rid of some rows and columns for easier development. - # XXX: Remove later for testing! - # data = data[:10,5:] + for s in inf.readlines()[1:]]) # compute how much of the data is training and testing train_rows = int(0.6* data.shape[0]) @@ -58,6 +57,9 @@ if __name__=="__main__": # create a learner and train it # learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner learner = dtl.DTLearner(verbose = True) # create a LinRegLearner + # learner = rtl.RTLearner(verbose = True) # create a LinRegLearner + # learner = bgl.BagLearner(dtl.DTLearner, bags=50) # create a LinRegLearner + # learner = isl.InsaneLearner() learner.addEvidence(trainX, trainY) print(learner.author())