summaryrefslogtreecommitdiff
path: root/tools/parse_companies.pl
diff options
context:
space:
mode:
Diffstat (limited to 'tools/parse_companies.pl')
-rwxr-xr-xtools/parse_companies.pl59
1 files changed, 59 insertions, 0 deletions
diff --git a/tools/parse_companies.pl b/tools/parse_companies.pl
new file mode 100755
index 000000000..6dc358ee0
--- /dev/null
+++ b/tools/parse_companies.pl
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+
+# parse companies from
+# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers
+
+use strict;
+# use URI::Encode qw(uri_decode);
+
+my %known_entities = (
+ 'nbsp' => ' ',
+ 'eacute' => 'é',
+ 'auml' => 'ä',
+);
+
+# better to use URI::Encode if you have it
+sub uri_decode {
+ my $name = $_[0];
+ foreach my $entity (keys %known_entities) {
+ my $to = $known_entities{$entity};
+ $name =~ s/&$entity;/$to/g;
+ }
+ foreach my $entity (map { lc $_ } $name =~ /&([^;]+);/g) {
+ if ($entity ne 'amp') {
+ print "Unable to convert &$entity;, giving up\n";
+ exit 1;
+ }
+ }
+ $name =~ s/&/&/ig;
+ $name =~ s/ / /ig;
+ return $name;
+}
+
+# never parse HTML with regex!
+# except when you should
+
+my $identifier;
+my $next_is_name = 0;
+
+while (<>) {
+ s/\xe2\x80\x8b//g; # kill zero width space
+
+ # grab identifier (in hex)
+ if (/\<td.*(0x[0-9A-F]{4})/i) {
+ $identifier = $1;
+ $next_is_name = 1;
+
+ # next <td> should be company name
+ } elsif ($next_is_name && m|\<td.*\>(.*)\</td\>|) {
+ my $name = uri_decode($1);
+ $name =~ s/^\s+//g; # kill leading
+ $name =~ s/\s+$//g; # and trailing space
+ my $id = hex($identifier);
+ if ($id != 65535) {
+ print "\tcase $id:\n";
+ print "\t\treturn \"$name\";\n";
+ }
+ $next_is_name = 0;
+ }
+}