I am working on a coding challenge from the Hackerrank site. Given two equal-length arrays of integers, with values from 2 to 109, find the maximum number of times we can remove a pair (Ai, Bj) where Ai and Bj are not co-prime.
The programming language of my choice is Python2. Hackerrank has timeout of 10 secs for the Python2 submissions and it is noteworthy that Hackerrank doesn't provide the numpy
package for the standard challenges.
My current approach is to reduce this problem to well-known Max-Flow problem and solve it using the Dinic algorithm.
My Python code:
# -*- coding: utf-8 -*- #!/bin/python from collections import * def gen_primes(): D = {} q = 2 while True: if q not in D: yield q D[q * q] = [q] else: for p in D[q]: D.setdefault(p + q, []).append(p) del D[q] q += 1 # precompute prime numbers smaller than sqrt(10**9) prime_gen = gen_primes() primes = [next(prime_gen) for _ in xrange(3500)] # enhanced trial division method using precomputed prime numbers def trial_division(n): a = set() while n % 2 == 0: a.add(2) n /= 2 i = 1 f = primes[i] while f * f <= n: if n % f == 0: a.add(f) n /= f else: i += 1 f = primes[i] if n != 1: a.add(n) return a def bfs(graph, cap_edges, level, s, t): queue = deque() queue.append(s) level[s] = 1 while queue: u = queue.popleft() for v, eid, _ in graph[u]: if level[v] == 0 and cap_edges[eid] > 0: level[v] = level[u] + 1 queue.append(v) return level[t] > 0 def dfs(graph, ptr, cap_edges, level, u, s, t): if u == t: return 1 adj = graph[u] ind = ptr[u] i = ind n = len(adj) while i < n: v, eid, eid_b = adj[i] ptr[u] = i i += 1 if (level[v] == level[u] + 1) and cap_edges[eid] > 0: f = dfs(graph, ptr, cap_edges, level, v, s, t) if f > 0: cap_edges[eid] -= 1 cap_edges[eid_b] += 1 return f return 0 # solve the max-flow problem using the Dinic algorithm def max_flow(graph, cap_edges, s, t): n = len(graph) level = [0] * n ptr = [0] * n flow = 0 while bfs(graph, cap_edges, level, s, t): f = dfs(graph, ptr, cap_edges, level, s, s, t) while f > 0: flow += f f = dfs(graph, ptr, cap_edges, level, s, s, t) level = [0] * n ptr = [0] * n return flow def computer_game(n, A, B): start_node = 0 end_node = 1 graph = defaultdict(list) cap_edges = [] node_count = 2 edges_count = 0 prime_nodes_map = {} for value in A: current_node = node_count graph[start_node].append((current_node, edges_count, edges_count+1)) cap_edges.append(1) graph[current_node].append((start_node, edges_count+1, edges_count)) cap_edges.append(0) edges_count += 2 node_count += 1 factors = trial_division(value) for p in factors: if p not in prime_nodes_map: prime_nodes_map[p] = node_count node_count += 1 p_node = prime_nodes_map[p] graph[current_node].append((p_node, edges_count, edges_count+1)) cap_edges.append(1) graph[p_node].append((current_node, edges_count+1, edges_count)) cap_edges.append(0) edges_count += 2 for value in B: current_node = node_count graph[current_node].append((end_node, edges_count, edges_count+1)) cap_edges.append(1) graph[end_node].append((current_node, edges_count+1, edges_count)) cap_edges.append(0) edges_count += 2 node_count += 1 factors = trial_division(value) for p in factors: if p not in prime_nodes_map: prime_nodes_map[p] = node_count node_count += 1 p_node = prime_nodes_map[p] graph[p_node].append((current_node, edges_count, edges_count+1)) cap_edges.append(1) graph[current_node].append((p_node, edges_count+1, edges_count)) cap_edges.append(0) edges_count += 2 result = max_flow(graph, cap_edges, start_node, end_node) print(result) if __name__ == '__main__': n = int(raw_input()) a = map(int, raw_input().rstrip().split()) b = map(int, raw_input().rstrip().split()) computer_game(n, a, b)
This Python code gets timeout for the last 2 biggest test instances, but I know in fact that I am using the right algorithm because I submitted a C++ solution implementing the exact same algorithm, which passes all the test cases with flying colors.
So my question is, can my Python code above be optimized any further? Did I miss some obvious optimizations? At this point I am wondering if this challenge can be solved with Python at all...
Here is a large test-case pair which causes timeout: input.txtoutput.txt
For the comparison, here is my C++ solution which passes all test cases:
#include <bits/stdc++.h> using namespace std; struct Entry { int node; int eid; int eid_r; }; vector<int> gen_primes(int n) { vector<int> result; bool prime[n+1]; memset(prime, true, sizeof(prime)); for (int p=2; p*p<=n; p++) { if (prime[p]) { for (int i=p*2; i<=n; i += p) prime[i] = false; } } for (int p=2; p<=n; p++) { if (prime[p]) { result.push_back(p); } } return result; } vector<int> factorization(int n, const vector<int>& primes) { vector<int> a; if (!(n&1)) { a.push_back(2); n >>= 1; while (!(n&1)) { n >>= 1; } } int i = 1; int f = primes[i]; while (f*f <= n) { if (n % f == 0) { a.push_back(f); n /= f; while (n % f == 0) { n /= f; } } f = primes[i++]; } if (n != 1) { a.push_back(n); } return a; } bool bfs(const vector<vector<Entry>> &graph, vector<int>& cap_edges, vector<int>& level, int s, int t) { queue<int> q; q.push(s); level[s] = 1; while (!q.empty()) { int u = q.front(); q.pop(); for (const Entry &e : graph[u]) { if ((level[e.node] == 0) && (cap_edges[e.eid] > 0)) { level[e.node] = level[u] + 1; q.push(e.node); } } } return level[t] > 0; } int dfs(const vector<vector<Entry>> &graph, vector<int>& ptr, vector<int>& cap_edges, vector<int>& level, int u, int s, int t) { if (u == t) { return 1; } for (int i = ptr[u]; i < graph[u].size(); i++) { const Entry& e = graph[u][i]; ptr[u] = i; if ((level[e.node] == level[u]+1) && (cap_edges[e.eid] > 0)) { int f = dfs(graph, ptr, cap_edges, level, e.node, s, t); if (f > 0) { cap_edges[e.eid] -= f; cap_edges[e.eid_r] += f; return f; } } } return 0; } int max_flow(const vector<vector<Entry>> &graph, vector<int>& cap_edges, int s, int t) { int n = graph.size(); vector<int> level(n, 0); vector<int> ptr(n, 0); int flow = 0; while (bfs(graph, cap_edges, level, s, t)) { int f; do { f = dfs(graph, ptr, cap_edges, level, s, s, t); flow += f; } while(f > 0); fill(level.begin(), level.end(), 0); fill(ptr.begin(), ptr.end(), 0); } return flow; } void solve(int n, const vector<int>& A, const vector<int>& B) { vector<int> primes = gen_primes(32000); int start_node = 0; int end_node = 1; vector<vector<Entry>> graph(310000); vector<int> cap_edges; int node_count = 2; int edge_count = 0; map<int, int> prime_nodes_map; for (int value : A) { int current_node = node_count; graph[start_node].push_back({current_node, edge_count, edge_count+1}); cap_edges.push_back(1); graph[current_node].push_back({start_node, edge_count+1, edge_count}); cap_edges.push_back(0); edge_count += 2; node_count += 1; vector<int> prime_factors = factorization(value, primes); for (int p : prime_factors) { auto it = prime_nodes_map.find(p); if (it == prime_nodes_map.end()) { prime_nodes_map[p] = node_count; node_count += 1; } int p_node = prime_nodes_map[p]; graph[current_node].push_back({p_node, edge_count, edge_count+1}); cap_edges.push_back(1); graph[p_node].push_back({current_node, edge_count+1, edge_count}); cap_edges.push_back(0); edge_count += 2; } } for (int value : B) { int current_node = node_count; graph[current_node].push_back({end_node, edge_count, edge_count+1}); cap_edges.push_back(1); graph[end_node].push_back({current_node, edge_count+1, edge_count}); cap_edges.push_back(0); edge_count += 2; node_count += 1; vector<int> prime_factors = factorization(value, primes); for (int p : prime_factors) { auto it = prime_nodes_map.find(p); if (it == prime_nodes_map.end()) { prime_nodes_map[p] = node_count; node_count += 1; } int p_node = prime_nodes_map[p]; graph[p_node].push_back({current_node, edge_count, edge_count+1}); cap_edges.push_back(1); graph[current_node].push_back({p_node, edge_count+1, edge_count}); cap_edges.push_back(0); edge_count += 2; } } cout << max_flow(graph, cap_edges, start_node, end_node) << endl; } int main(int argc, char **argv) { int n; cin >> n; vector<int> a; for (int i = 0; i < n; i++) { int val; cin >> val; a.push_back(val); } vector<int> b; for (int i = 0; i < n; i++) { int val; cin >> val; b.push_back(val); } solve(n, a, b); }
trial_division
function? And checking the cache inside yourn
loop? It seems likely that you would see some hits, but I'm not sure how many.\$\endgroup\$trial_division
function but the hits were negligible since the input data are uniformly random generated between 2 and 10^9...\$\endgroup\$n
. Too bad.\$\endgroup\$