From 91725ad12b68a5a0ebc77d7fc57f1e61b72ef73d Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Tue, 6 Feb 2018 16:09:58 +0100 Subject: [PATCH] Finished euler 21 in ipython. --- ipython/EulerProblem021.ipynb | 310 +++++++++++++++++++++++----------- 1 file changed, 216 insertions(+), 94 deletions(-) diff --git a/ipython/EulerProblem021.ipynb b/ipython/EulerProblem021.ipynb index cab692f..f81cc68 100644 --- a/ipython/EulerProblem021.ipynb +++ b/ipython/EulerProblem021.ipynb @@ -18,64 +18,129 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Reuse factorization function from problem 12." + "We reuse the sieve of eratosthenes (which we called get_primes_smaller previously). What annoys me about the implementation is that we do not stop scanning for prospects once the current prime squared is greater than the limit. Let's test if it returns the same result and compare the execution time." ] }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5.759276217543773\n", + "0.3214989060045452\n" + ] + } + ], "source": [ - "def get_primes_smaller(number):\n", + "def get_primes_smaller(limit):\n", " primes = []\n", - " prospects = [n for n in range(2, number)]\n", + " prospects = [n for n in range(2, limit)]\n", " while prospects:\n", " p = prospects[0]\n", " prospects = [x for x in prospects if x % p != 0]\n", " primes.append(p)\n", " return primes\n", "\n", - "primes_smaller_10000 = get_primes_smaller(10000)\n", - "\n", - "def get_prime_factors(number):\n", - " remainder = number\n", - " factors = []\n", - " for p in primes_smaller_10000:\n", - " while remainder % p == 0:\n", - " remainder /= p\n", - " factors.append(p)\n", - " if remainder == 1 or p * p > number:\n", + "def sieve_of_eratosthenes(limit):\n", + " primes = []\n", + " prospects = [n for n in range(2, limit)]\n", + " while prospects:\n", + " p = prospects[0]\n", + " prospects = [x for x in prospects if x % p != 0]\n", + " primes.append(p)\n", + " if p * p > limit:\n", " break\n", - " if remainder != 1:\n", - " factors.append(remainder)\n", - " return factors" + " primes += prospects\n", + " return primes \n", + "\n", + "import timeit\n", + "import functools\n", + "print(timeit.timeit(functools.partial(get_primes_smaller, 10000), number=100))\n", + "print(timeit.timeit(functools.partial(sieve_of_eratosthenes, 10000), number=100))\n", + "assert(get_primes_smaller(10000) == sieve_of_eratosthenes(10000))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "If we have a list of all factors we group them. To get all potential divisors for each group we may use each factor 0 to len(factor_group). We then combine the potential divisors of all factor groups. " + "Okay, this difference is sick actually. Interesting to see what the difference between O(sqrt(n)) and O(n) can mean. Now we can use the primes to do the factorization. I will again apply some optimizations and see how it turns out" ] }, { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false - }, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6.244833890762658\n", + "5.688215703110259\n" + ] + } + ], + "source": [ + "def get_prime_factors_old(number):\n", + " prime_generator = sieve_of_eratosthenes(number // 2)\n", + " remainder = number\n", + " factors = []\n", + " for p in prime_generator:\n", + " while remainder % p == 0:\n", + " remainder //= p\n", + " factors.append(p)\n", + " if remainder == 1 or p * p > number:\n", + " break\n", + " if remainder != 1:\n", + " factors.append(remainder)\n", + " return factors\n", + "\n", + "def get_prime_factors(number):\n", + " remainder = number\n", + " factors = []\n", + " for p in sieve_of_eratosthenes(number // 2):\n", + " while remainder % p == 0:\n", + " remainder //= p\n", + " factors.append(p)\n", + " if remainder == 1:\n", + " break\n", + " if p > remainder or p * p > number:\n", + " factors.append(remainder)\n", + " break\n", + " return factors\n", + "\n", + "import timeit\n", + "import functools\n", + "print(timeit.timeit(functools.partial(get_prime_factors_old, 1000000), number=10))\n", + "print(timeit.timeit(functools.partial(get_prime_factors, 1000000), number=10))\n", + "assert(get_prime_factors_old(100000) == get_prime_factors(100000))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Okay, actually no big difference. Kind of expected if you check how similar the implementations are. I though stopping if p is greater than the remainder may be a common case.\n", + "\n", + "Now we can use the prime factors to get all factors. For this each prime number can be used 0 to n times where n is the number how often the prime occurs in the prime factorization. For example, all possible fators for the primes $2, 2, 3$ are $2^0 \\times 3^0, 2^1 \\times 3^0, 2^2 \\times 3^0, 2^0 \\times 3^1, 2^1 \\times 3^1, 2^2 \\times 3^1$ which results in $1, 2, 4, 3, 6, 12$.\n", + "\n", + "So we proceed in the following way. Once we have the prime factors we group then. Then we calculate the factors for each group. For example for $2,2$ the factors would be $1, 2, 4$ as explained in the previous paragraph. Then we calculate the combinations of all factor groups." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, "outputs": [], "source": [ - "def product(xs):\n", - " from operator import mul\n", - " from functools import reduce\n", - " return reduce(mul, xs, 1)\n", - "\n", "def group(xs):\n", " from functools import reduce\n", + " rs = []\n", " def f(xss, x):\n", " if xss and x in xss[-1]:\n", " xss[-1].append(x)\n", @@ -84,107 +149,161 @@ " return xss\n", " return reduce(f, xs, [])\n", "\n", - "def combinations(xss):\n", - " from functools import reduce\n", - " def f(xs, ys):\n", - " return [x + [y]\n", - " for y in ys\n", - " for x in xs\n", - " ] \n", - " return reduce(f, list(xss), [[]])\n", + "assert(group([1, 2, 2, 2, 4, 4, 5]) == [[1], [2, 2, 2], [4, 4], [5]])\n", "\n", - "def get_divisors_for_factor_group(factor_group):\n", - " factor = factor_group[0]\n", - " return [factor**i for i in range(len(factor_group) + 1)]\n", + "def group(xs):\n", + " r = []\n", + " for x in xs:\n", + " if r and r[-1][0] == x:\n", + " r[-1].append(x)\n", + " else:\n", + " r.append([x])\n", + " return r\n", "\n", - "def get_proper_divisors(number):\n", - " factors = get_prime_factors(number)\n", - " factor_groups = map(get_divisors_for_factor_group, group(factors))\n", - " factor_combinations = combinations(factor_groups)\n", - " divisors = map(product, factor_combinations)\n", - " factors = list(divisors)\n", - " factors.sort()\n", - " return factors[:-1] # last value is not a proper divisor\n", - "\n", - "assert(get_proper_divisors(220) == [1, 2, 4, 5, 10, 11, 20, 22, 44, 55, 110])" + "assert(group([1, 2, 2, 2, 4, 4, 5]) == [[1], [2, 2, 2], [4, 4], [5]])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We can finally start to get the solution." + "As you can see I had a fancy version with reduce first, but the loop is way more readable. We iterate over the input list. If the current item is in the last sublist of the return list we add it to the list. Otherwise we append a new sublist with the item.\n", + "\n", + "Next we write a function that calculates the resulting factors for a factor group." ] }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def prime_group_to_factors(group):\n", + " f = group[0]\n", + " n = len(group)\n", + " return [f**i for i in range(n + 1)]\n", + "\n", + "assert(prime_group_to_factors([2, 2, 2]) == [1, 2, 4, 8])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "No we only have to get all factors by calculating the combination of two factor lists." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def combine_factors(xs, ys):\n", + " return [x * y for y in ys for x in xs]\n", + "\n", + "assert(combine_factors([1, 2, 4], [1, 3]) == [1, 2, 4, 3, 6, 12])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now actually we want to combine an arbitrary number of factor lists." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def combine_factors(xss):\n", + " ys = [1]\n", + " for xs in xss:\n", + " ys = [x * y for y in ys for x in xs]\n", + " return ys" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_divisors(n):\n", + " prime_factors = get_prime_factors(n)\n", + " prime_groups = group(prime_factors)\n", + " factor_groups = list(map(prime_group_to_factors, prime_groups))\n", + " divisors = combine_factors(factor_groups)\n", + " divisors.sort()\n", + " return divisors\n", + " \n", + "assert(get_divisors(220) == [1, 2, 4, 5, 10, 11, 20, 22, 44, 55, 110, 220])\n", + "assert(get_divisors(13) == [1, 13])\n", + "assert(get_divisors(26) == [1, 2, 13, 26])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can finally start to get the solution. Remember that we only want to get the sum of proper divisors which are the divisors withouth the number itself." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, "outputs": [], "source": [ "def sum_of_proper_divisors(number):\n", - " return sum(get_proper_divisors(number))\n", + " return sum(get_divisors(number)[:-1])\n", "\n", "assert(sum_of_proper_divisors(220) == 284)\n", "assert(sum_of_proper_divisors(284) == 220)\n", "\n", "def is_amicable(number):\n", " s = sum_of_proper_divisors(number)\n", + " if s == number:\n", + " return False\n", " return number == sum_of_proper_divisors(s)\n", "\n", "assert(is_amicable(220))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The if statement is very import because a number pair $a, b$ is only amicable if $a \\neq b$." + ] + }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, + "execution_count": 9, + "metadata": {}, "outputs": [ { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mamicable_till_10000\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10000\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_amicable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mamicable_till_10000\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10000\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_amicable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m\u001b[0m in \u001b[0;36mis_amicable\u001b[1;34m(number)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mis_amicable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0ms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msum_of_proper_divisors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mnumber\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0msum_of_proper_divisors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;32massert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mis_amicable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m220\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m\u001b[0m in \u001b[0;36msum_of_proper_divisors\u001b[1;34m(number)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0msum_of_proper_divisors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mget_proper_divisors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32massert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msum_of_proper_divisors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m220\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m284\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32massert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msum_of_proper_divisors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m284\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m220\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m\u001b[0m in \u001b[0;36mget_proper_divisors\u001b[1;34m(number)\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 29\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mget_proper_divisors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 30\u001b[1;33m \u001b[0mfactors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_prime_factors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 31\u001b[0m \u001b[0mfactor_groups\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mget_divisors_for_factor_group\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgroup\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfactors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[0mfactor_combinations\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcombinations\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfactor_groups\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m\u001b[0m in \u001b[0;36mget_prime_factors\u001b[1;34m(number)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mprimes_smaller_10000\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[0mremainder\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[0mremainder\u001b[0m \u001b[1;33m/=\u001b[0m \u001b[0mp\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 18\u001b[0m \u001b[0mfactors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mremainder\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mnumber\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + "name": "stdout", + "output_type": "stream", + "text": [ + "31626\n" ] } ], "source": [ - "amicable_till_10000 = [i for i in range(1, 10000) if is_amicable(i)]" + "amicable_till_10000 = [i for i in range(1, 10000) if is_amicable(i)]\n", + "assert(sum(amicable_till_10000) == 31626)\n", + "print(sum(amicable_till_10000))" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "for a in amicable_till_10000:\n", - " print(a)" + "There is probably a much faster algorithm which works by caching the factors for previous numbers. Let's say we got the factors for $f(4) = (1, 2, 4)$. And our number new number is $28$. Ah, that does not really work as I thought. I am sure there is a smarter way." ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -204,16 +323,19 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.4" + "version": "3.6.3" }, "tags": [ "amicable", "factors", "combinations", "reduce", - "prime" + "prime", + "sieve of eratosthenes", + "partial", + "timeit" ] }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 }