From 70faf81a82eca517faf82c8d6898109a637eeb68 Mon Sep 17 00:00:00 2001 From: Sean Callan Date: Mon, 23 Jan 2017 16:14:43 -0700 Subject: [PATCH] Remove GenServer dependency & update pattern file (#10) * Latest patterns.yml and remove GenServer dep * Clean-up adqio changes --- lib/ua_parser.ex | 18 +-------- lib/ua_parser/device.ex | 2 +- lib/ua_parser/parsers/device.ex | 23 ++++++----- lib/ua_parser/parsers/operating_system.ex | 5 +-- lib/ua_parser/parsers/version.ex | 21 +++++----- lib/ua_parser/storage.ex | 47 ++++++----------------- mix.exs | 23 ++--------- mix.lock | 3 +- priv/patterns.yml | 40 ++++++++++++++----- test/ua_parser/parsers/device_test.exs | 2 +- 10 files changed, 80 insertions(+), 104 deletions(-) diff --git a/lib/ua_parser.ex b/lib/ua_parser.ex index 76570a5..d615c1b 100644 --- a/lib/ua_parser.ex +++ b/lib/ua_parser.ex @@ -1,24 +1,10 @@ defmodule UAParser do @moduledoc """ - A fast User Agent parser with a simple API. + A fast User Agent parser with a widely used API. """ - use Application - alias UAParser.{Parser, Storage} - @doc false - def start(_type, _args) do - import Supervisor.Spec, warn: false - - children = [ - worker(Storage, []), - ] - - opts = [strategy: :one_for_one, name: UAParser.Supervisor] - Supervisor.start_link(children, opts) - end - @doc """ Parse a user-agent string into structs @@ -33,7 +19,7 @@ defmodule UAParser do iex> to_string(ua.device) "Other" """ - def parse(ua), do: Parser.parse(pattern, ua) + def parse(ua), do: Parser.parse(pattern(), ua) defp pattern, do: Storage.list end diff --git a/lib/ua_parser/device.ex b/lib/ua_parser/device.ex index 6ca7aae..6fd8e2b 100644 --- a/lib/ua_parser/device.ex +++ b/lib/ua_parser/device.ex @@ -14,7 +14,7 @@ defmodule UAParser.Device do iex> to_string(device) "Other" """ - defstruct [:family] + defstruct [:brand, :family, :model] end defimpl String.Chars, for: UAParser.Device do diff --git a/lib/ua_parser/parsers/device.ex b/lib/ua_parser/parsers/device.ex index 3b3d018..a14c6db 100644 --- a/lib/ua_parser/parsers/device.ex +++ b/lib/ua_parser/parsers/device.ex @@ -4,22 +4,27 @@ defmodule UAParser.Parsers.Device do of a user agent. """ - alias UAParser.{Device, Parsers.Base} + @behaviour UAParser.Parsers.Base - import Base - @behaviour Base + import UAParser.Parsers.Base + + alias UAParser.Device + + @replacements [brand: :brand_replacement, family: :device_replacement, model: :model_replacement] def parse(nil), do: %Device{} - def parse({group, match}) do - family = Keyword.get(group, :device_replacement) + def parse({group, match}), + do: do_replacement(@replacements, {group, match}, %Device{}) - family = + def do_replacement([], _, device), do: device + def do_replacement([{key, replacement}| replacements], {group, match}, device) do + replace = Keyword.get(group, replacement) + replace = match |> Enum.with_index - |> Enum.reduce(family, fn({_, index}, acc) -> + |> Enum.reduce(replace, fn({_, index}, acc) -> replace(acc, index, match) end) - - %Device{family: family} + do_replacement(replacements, {group, match}, Map.put(device, key, replace)) end end diff --git a/lib/ua_parser/parsers/operating_system.ex b/lib/ua_parser/parsers/operating_system.ex index 2b6583b..d006d15 100644 --- a/lib/ua_parser/parsers/operating_system.ex +++ b/lib/ua_parser/parsers/operating_system.ex @@ -3,10 +3,9 @@ defmodule UAParser.Parsers.OperatingSystem do A parser module representing the operating system derived from a user agent. """ + import UAParser.Parsers.Base - alias UAParser.{OperatingSystem, Parsers.Base} - - import Base + alias UAParser.OperatingSystem replacement_parser struct: OperatingSystem, keys: [:os_replacement, diff --git a/lib/ua_parser/parsers/version.ex b/lib/ua_parser/parsers/version.ex index 5cab942..774618b 100644 --- a/lib/ua_parser/parsers/version.ex +++ b/lib/ua_parser/parsers/version.ex @@ -4,23 +4,26 @@ defmodule UAParser.Parsers.Version do browser derived from the user agent. """ - alias UAParser.{Version, Parsers.Base} + @behaviour UAParser.Parsers.Base - import Base - @behaviour Base + import UAParser.Parsers.Base + + alias UAParser.Version def parse(nil), do: %Version{} - def parse({group, match}, keys \\ []) do + def parse(grouping, keys \\ []) do keys |> Enum.with_index - |> Enum.map(fn({key, index}) -> - group - |> Keyword.get(key) - |> replace(index + 1, match) - end) + |> Enum.map(&parse_version(grouping, &1)) |> version end + defp parse_version({group, match}, {key, index}) do + group + |> Keyword.get(key) + |> replace(index + 1, match) + end + defp version([major, minor, patch, patch_minor]), do: %Version{major: major, minor: minor, patch: patch, patch_minor: patch_minor} end diff --git a/lib/ua_parser/storage.ex b/lib/ua_parser/storage.ex index db88b53..026ae12 100644 --- a/lib/ua_parser/storage.ex +++ b/lib/ua_parser/storage.ex @@ -1,46 +1,21 @@ defmodule UAParser.Storage do @moduledoc """ - Storage of User-Agent regular expressions. + Load pattern data at compile time. Recompiling the application is necessary after updating the pattern file. """ - use GenServer - - alias __MODULE__, as: Storage alias UAParser.Processor - @doc """ - Start our GenServer. - """ - def start_link(opts \\ []) do - GenServer.start_link(Storage, opts, name: Storage) - end - - @doc """ - Initialize storage - """ - def init(opts), do: load_patterns(opts) - - @doc """ - Look for a matching User-Agent - """ - def handle_call(:list, _from, opts), - do: {:reply, opts[:data], opts} - - @doc """ - """ - def list, do: GenServer.call(Storage, :list) + Application.start(:yamerl) - defp load_patterns(opts) do - data = - :ua_parser - |> Application.get_env(:patterns) - |> :yamerl_constr.file - |> Processor.process + data = + :ua_parser + |> :code.priv_dir + |> Kernel.++('/patterns.yml') + |> to_string + |> :yamerl_constr.file([]) + |> Processor.process - opts = - opts - |> Keyword.put(:data, data) + @data data - {:ok, opts} - end + def list, do: @data end diff --git a/mix.exs b/mix.exs index c9a5886..c8ca713 100644 --- a/mix.exs +++ b/mix.exs @@ -1,7 +1,7 @@ defmodule UAParser.Mixfile do use Mix.Project - @version "1.1.1" + @version "1.2.0" def project do [ @@ -18,15 +18,15 @@ defmodule UAParser.Mixfile do def application do [ - applications: [:logger, :yamerl], - env: UAParser.Mixfile.env(), - mod: {UAParser, []}, + applications: [:logger, :yamerl] ] end defp deps do [ {:yamerl, "~> 0.4.0"}, + + # Development & Test dependencies {:credo, "~> 0.5", only: [:dev, :test]}, {:ex_doc, ">= 0.0.0", only: :dev}, ] @@ -41,20 +41,5 @@ defmodule UAParser.Mixfile do ] end - def env do - [ - patterns: get_patterns_filename, - ] - end - - @spec get_patterns_filename() :: String.t - def get_patterns_filename do - priv_path = - :ua_parser - |> :code.priv_dir() - |> IO.chardata_to_string() - default_path = priv_path <> "/patterns.yml" - Application.get_env(:ua_parser, :patterns, default_path) - end end diff --git a/mix.lock b/mix.lock index 48875bc..783374f 100644 --- a/mix.lock +++ b/mix.lock @@ -2,4 +2,5 @@ "credo": {:hex, :credo, "0.5.3", "0c405b36e7651245a8ed63c09e2d52c2e2b89b6d02b1570c4d611e0fcbecf4a2", [:mix], [{:bunt, "~> 0.1.6", [hex: :bunt, optional: false]}]}, "earmark": {:hex, :earmark, "1.0.3", "89bdbaf2aca8bbb5c97d8b3b55c5dd0cff517ecc78d417e87f1d0982e514557b", [:mix], []}, "ex_doc": {:hex, :ex_doc, "0.14.4", "a0a79a6896075814f4bc6802b74ccbed6549f47cc5ab34c71eaee2303170b8ef", [:mix], [{:earmark, "~> 1.0", [hex: :earmark, optional: false]}]}, - "yamerl": {:hex, :yamerl, "0.4.0", "ae215b1242810a9bc07716b88062f1bfe06f6bc7cf68372091f630baa536df79", [:rebar3], []}} + "yamerl": {:hex, :yamerl, "0.4.0", "ae215b1242810a9bc07716b88062f1bfe06f6bc7cf68372091f630baa536df79", [:rebar3], []}, + "yomel": {:hex, :yomel, "0.5.0", "c5a42d1818deda3f85ae14b1f01f6ece22b9ed8e8087012359fc04b59d85f621", [:make, :mix], []}} diff --git a/priv/patterns.yml b/priv/patterns.yml index 4168091..f710127 100644 --- a/priv/patterns.yml +++ b/priv/patterns.yml @@ -35,7 +35,7 @@ user_agent_parsers: family_replacement: 'MSIECrawler' # Downloader ... - - regex: '(Google-HTTP-Java-Client|Apache-HttpClient|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP)(?:[ /](\d+)(?:\.(\d+)(?:\.(\d+))?)?)?' + - regex: '(Google-HTTP-Java-Client|Apache-HttpClient|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP|okhttp)(?:[ /](\d+)(?:\.(\d+)(?:\.(\d+))?)?)?' # Bots - regex: '(1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]+-Agent|AdsBot-Google(?:-[a-z]+)?|altavista|AppEngine-Google|archive.*?\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]+)*|bingbot|BingPreview|blitzbot|BlogBridge|BoardReader(?: [A-Za-z]+)*|boitho.com-dc|BotSeer|\b\w*favicon\w*\b|\bYeti(?:-[a-z]+)?|Catchpoint bot|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher)?|Feed Seeker Bot|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]+-)?Googlebot(?:-[a-zA-Z]+)?|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile)?|IconSurf|IlTrovatore(?:-Setaccio)?|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]+Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masidani_bot|Mediapartners-Google|Microsoft .*? Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media *)?|msrbot|netresearch|Netvibes|NewsGator[^/]*|^NING|Nutch[^/]*|Nymesis|ObjectsSearch|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PlantyNet_WebRobot|Pompos|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|TwitterBot|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]+|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s)? Link Sleuth|Xerka [A-z]+Bot|yacy(?:bot)?|Yahoo[a-z]*Seeker|Yahoo! Slurp|Yandex\w+|YodaoBot(?:-[A-z]+)?|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+))?)?)?' @@ -160,7 +160,7 @@ user_agent_parsers: # Lightning (for Thunderbird) # http://www.mozilla.org/projects/calendar/lightning/ - - regex: '(Lightning)/(\d+)\.(\d+)\.?((?:[ab]?\d+[a-z]*)|(?:\d*))' + - regex: 'Gecko/\d+ (Lightning)/(\d+)\.(\d+)\.?((?:[ab]?\d+[a-z]*)|(?:\d*))' # Swiftfox - regex: '(Firefox)/(\d+)\.(\d+)\.(\d+(?:pre)?) \(Swiftfox\)' @@ -276,12 +276,16 @@ user_agent_parsers: # AOL Browser (IE-based) - regex: '(AOL) (\d+)\.(\d+); AOLBuild (\d+)' + # MxBrowser is Maxthon + - regex: '(MxBrowser)/(\d+)\.(\d+)(?:\.(\d+))?' + family_replacement: 'Maxthon' + #### END SPECIAL CASES TOP #### #### MAIN CASES - this catches > 50% of all browsers #### # Browser/major_version.minor_version.beta_version - - regex: '\b(MobileIron|Crosswalk|AdobeAIR|FireWeb|Jasmine|ANTGalio|Midori|Fresco|Lobo|PaleMoon|Maxthon|Lynx|OmniWeb|Dillo|Camino|Demeter|Fluid|Fennec|Epiphany|Shiira|Sunrise|Spotify|Flock|Netscape|Lunascape|WebPilot|NetFront|Netfront|Konqueror|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|Opera Mini|iCab|NetNewsWire|ThunderBrowse|Iris|UP\.Browser|Bunjalloo|Google Earth|Raven for Mac|Openwave|MacOutlook)/(\d+)\.(\d+)\.(\d+)' + - regex: '\b(MobileIron|Crosswalk|FireWeb|Jasmine|ANTGalio|Midori|Fresco|Lobo|PaleMoon|Maxthon|Lynx|OmniWeb|Dillo|Camino|Demeter|Fluid|Fennec|Epiphany|Shiira|Sunrise|Spotify|Flock|Netscape|Lunascape|WebPilot|NetFront|Netfront|Konqueror|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|Opera Mini|iCab|NetNewsWire|ThunderBrowse|Iris|UP\.Browser|Bunjalloo|Google Earth|Raven for Mac|Openwave|MacOutlook|Electron)/(\d+)\.(\d+)\.(\d+)' # Outlook 2007 - regex: 'Microsoft Office Outlook 12\.\d+\.\d+|MSOffice 12' @@ -336,18 +340,19 @@ user_agent_parsers: - regex: '(brave)/(\d+)\.(\d+)\.(\d+) Chrome' family_replacement: 'Brave' - # Chrome/Chromium/major_version.minor_version.beta_version - - regex: '(Chromium|Chrome)/(\d+)\.(\d+)\.(\d+)' + # Iron Browser ~since version 50 + - regex: '(Chrome)/(\d+)\.(\d+)\.(\d+)[\d.]* Iron[^/]' + family_replacement: 'Iron' # Dolphin Browser # @ref: http://www.dolphin.com - regex: '\b(Dolphin)(?: |HDCN/|/INT\-)(\d+)\.(\d+)\.?(\d+)?' # Browser/major_version.minor_version - - regex: '(bingbot|Bolt|Jasmine|IceCat|Skyfire|Midori|Maxthon|Lynx|Arora|IBrowse|Dillo|Camino|Shiira|Fennec|Phoenix|Chrome|Flock|Netscape|Lunascape|Epiphany|WebPilot|Opera Mini|Opera|NetFront|Netfront|Konqueror|Googlebot|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|iCab|iTunes|MacAppStore|NetNewsWire|Space Bison|Stainless|Orca|Dolfin|BOLT|Minimo|Tizen Browser|Polaris|Abrowser|Planetweb|ICE Browser|mDolphin|qutebrowser|Otter|QupZilla|MailBar|kmail2|YahooMobileMail|ExchangeWebServices|ExchangeServicesClient|Microsoft-CryptoAPI)/(\d+)\.(\d+)\.?(\d+)?' + - regex: '(bingbot|Bolt|AdobeAIR|Jasmine|IceCat|Skyfire|Midori|Maxthon|Lynx|Arora|IBrowse|Dillo|Camino|Shiira|Fennec|Phoenix|Flock|Netscape|Lunascape|Epiphany|WebPilot|Opera Mini|Opera|NetFront|Netfront|Konqueror|Googlebot|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|iCab|iTunes|MacAppStore|NetNewsWire|Space Bison|Stainless|Orca|Dolfin|BOLT|Minimo|Tizen Browser|Polaris|Abrowser|Planetweb|ICE Browser|mDolphin|qutebrowser|Otter|QupZilla|MailBar|kmail2|YahooMobileMail|ExchangeWebServices|ExchangeServicesClient)/(\d+)\.(\d+)(?:\.(\d+))?' # Chrome/Chromium/major_version.minor_version - - regex: '(Chromium|Chrome)/(\d+)\.(\d+)' + - regex: '(Chromium|Chrome)/(\d+)\.(\d+)(?:\.(\d+))?' ########## # IE Mobile needs to happen before Android to catch cases such as: @@ -361,6 +366,9 @@ user_agent_parsers: - regex: '(IEMobile)[ /](\d+)\.(\d+)' family_replacement: 'IE Mobile' + # Baca Berita App News Reader + - regex: '(BacaBerita App)\/(\d+)\.(\d+)\.(\d+)' + # Browser major_version.minor_version.beta_version (space instead of slash) - regex: '(iRider|Crazy Browser|SkipStone|iCab|Lunascape|Sleipnir|Maemo Browser) (\d+)\.(\d+)\.(\d+)' # Browser major_version.minor_version (space instead of slash) @@ -553,11 +561,19 @@ user_agent_parsers: - regex: '(python-requests)/(\d+)\.(\d+)' family_replacement: 'Python Requests' + # headless user-agents + - regex: '\b(Windows-Update-Agent|Microsoft-CryptoAPI|SophosUpdateManager|SophosAgent|Debian APT-HTTP|Ubuntu APT-HTTP|libcurl-agent|libwww-perl|urlgrabber|curl|Wget|OpenBSD ftp|jupdate)(?:[ /](\d+)(?:\.(\d+)(?:\.(\d+))?)?)?' + - regex: '(Java)[/ ]{0,1}\d+\.(\d+)\.(\d+)[_-]*([a-zA-Z0-9]+)*' # Roku Digital-Video-Players https://www.roku.com/ - regex: '^(Roku)/DVP-(\d+)\.(\d+)' + # Kurio App News Reader https://kurio.co.id/ + - regex: '(Kurio)\/(\d+)\.(\d+)\.(\d+)' + family_replacement: 'Kurio App' + + os_parsers: ########## # HbbTV vendors @@ -655,7 +671,7 @@ os_parsers: # UCWEB - regex: '^UCWEB.*; (Adr) (\d+)\.(\d+)(?:[.\-]([a-z0-9]+))?;' os_replacement: 'Android' - - regex: '^UCWEB.*; (iPad OS|iPh OS) (\d+)_(\d+)(?:_(\d+))?;' + - regex: '^UCWEB.*; (iPad|iPh|iPd) OS (\d+)_(\d+)(?:_(\d+))?;' os_replacement: 'iOS' - regex: '^UCWEB.*; (wds) (\d+)\.(\d+)(?:\.(\d+))?;' os_replacement: 'Windows Phone' @@ -875,8 +891,9 @@ os_parsers: os_v1_replacement: '10' os_v2_replacement: '0' # iOS Apps - - regex: '\b(iOS[ /]|iPhone(?:/| v|[ _]OS[/,]|; | OS : |\d,\d/|\d,\d; )|iPad/)(\d{1,2})[_\.](\d{1,2})(?:[_\.](\d+))?' + - regex: '\b(iOS[ /]|iOS; |iPhone(?:/| v|[ _]OS[/,]|; | OS : |\d,\d/|\d,\d; )|iPad/)(\d{1,2})[_\.](\d{1,2})(?:[_\.](\d+))?' os_replacement: 'iOS' + - regex: '\((iOS);' ########## # Apple TV @@ -4628,6 +4645,11 @@ device_parsers: ########## # Samsung ########## + # Samsung Smart-TV + - regex: '(SMART-TV); .* Tizen ' + device_replacement: 'Samsung $1' + brand_replacement: 'Samsung' + model_replacement: '$1' # Samsung Symbian Devices - regex: 'SymbianOS/9\.\d.* Samsung[/\-]([A-Za-z0-9 \-]+)' device_replacement: 'Samsung $1' diff --git a/test/ua_parser/parsers/device_test.exs b/test/ua_parser/parsers/device_test.exs index 03ea620..ea04dab 100644 --- a/test/ua_parser/parsers/device_test.exs +++ b/test/ua_parser/parsers/device_test.exs @@ -10,6 +10,6 @@ defmodule UAParser.Parsers.DeviceTest do {_, _, [pattern|_]} = Storage.list result = Parser.parse({pattern, ["iPod;", "iPod"]}) - assert %Device{family: "Spider"} = result + assert %Device{family: "Spider", brand: "Spider", model: "Smartphone"} == result end end