I tried to create a peice of code that helps me remove duplicates from my sql database and reset the ID's as well. I am not sure that it's error proof.
import mysql.connector # Database connection parameters host = 'host database = 'database' user = 'user' password = 'password' port = numbers try: # Establishing the connection conn = mysql.connector.connect( host=host, database=database, user=user, password=password, port=port ) if conn.is_connected(): print('Connected to the database') cursor = conn.cursor() # Remove duplicates delete_duplicates_query = """ DELETE t1 FROM articles t1 INNER JOIN articles t2 WHERE t1.ID < t2.ID AND t1.headline = t2.headline AND t1.description = t2.description AND t1.link = t2.link AND t1.date = t2.date AND t1.hour = t2.hour; """ cursor.execute(delete_duplicates_query) conn.commit() print('Duplicates removed') # Reset primary key reset_primary_key_query = """ SET @new_id = 0; UPDATE articles SET ID = (@new_id := @new_id + 1); ALTER TABLE articles AUTO_INCREMENT = 1; """ cursor.execute(reset_primary_key_query) conn.commit() print('Primary key reset') except mysql.connector.Error as e: print(f"Error: {e}") finally: if conn.is_connected(): cursor.close() conn.close() print('Database connection closed')
For reference
Table name : articles
ID
headline
description
link
date
hour