Jump to content

Go Programming Language Cookbook/Bit map

From Wikibooks, open books for an open world

Given a big file of IPv4 addresses (more than 100 GB) — we need to count unique addresses. If we use generic map[string]bool — we will need more than 64 GB of RAM, so lets use the bit map:

packagemainimport("bufio""fmt""math/bits""os")// bitsetSize is the number of bytes needed for 2^32 bits (512 MiB)constbitsetSize=1<<29funcmain(){file,err:=os.Open("ip_addresses")iferr!=nil{fmt.Println("Error opening file:",err)return}deferfile.Close()bitset:=[bitsetSize]byte{}// Use a buffered scanner with a larger bufferscanner:=bufio.NewScanner(file)constmaxBuffer=64*1024// 64 KB bufferbuf:=make([]byte,0,maxBuffer)scanner.Buffer(buf,maxBuffer)// Process each lineforscanner.Scan(){line:=scanner.Bytes()// Parse the IP address manually from bytesip:=parseIPv4(line)// Set the bitbyteIndex:=ip>>3// Divide by 8bitIndex:=ip&7// Bit position 0-7bitset[byteIndex]|=1<<bitIndex}// Check for scanning errorsiferr:=scanner.Err();err!=nil{fmt.Println("Error reading file:",err)return}varcountuint64fori:=0;i<bitsetSize;i++{count+=uint64(bits.OnesCount8(bitset[i]))}fmt.Println("Number of unique IPv4 addresses:",count)}funcparseIPv4(line[]byte)(ipuint32){i:=0// Octet 1n:=uint32(line[i]-'0')fori=1;line[i]!='.';i++{n=n*10+uint32(line[i]-'0')}ip|=n<<24i++// Skip the dot// Octet 2n=uint32(line[i]-'0')i++for;line[i]!='.';i++{n=n*10+uint32(line[i]-'0')}ip|=n<<16i++// Skip the dot// Octet 3n=uint32(line[i]-'0')i++for;line[i]!='.';i++{n=n*10+uint32(line[i]-'0')}ip|=n<<8i++// Skip the dot// Octet 4n=uint32(line[i]-'0')i++for;i<len(line);i++{n=n*10+uint32(line[i]-'0')}ip|=nreturnip}
close