#!/usr/bin/perl # parse companies from # https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers use strict; # use URI::Encode qw(uri_decode); my %known_entities = ( 'nbsp' => ' ', 'eacute' => 'é', 'auml' => 'ä', ); # better to use URI::Encode if you have it sub uri_decode { my $name = $_[0]; foreach my $entity (keys %known_entities) { my $to = $known_entities{$entity}; $name =~ s/&$entity;/$to/g; } foreach my $entity (map { lc $_ } $name =~ /&([^;]+);/g) { if ($entity ne 'amp') { print "Unable to convert &$entity;, giving up\n"; exit 1; } } $name =~ s/&/&/ig; $name =~ s/ / /ig; return $name; } # never parse HTML with regex! # except when you should my $identifier; my $next_is_name = 0; while (<>) { s/\xe2\x80\x8b//g; # kill zero width space # grab identifier (in hex) if (/\ should be company name } elsif ($next_is_name && m|\(.*)\|) { my $name = uri_decode($1); $name =~ s/^\s+//g; # kill leading $name =~ s/\s+$//g; # and trailing space my $id = hex($identifier); if ($id != 65535) { print "\tcase $id:\n"; print "\t\treturn \"$name\";\n"; } $next_is_name = 0; } }