SELECT `justfunctions.eu.extract_url_language`("www.justdataplease.com/en-US/")
/*--Output--
english
*/
CREATE OR REPLACE FUNCTION `your_project_id.your_dataset_id.extract_url_language`(`url` STRING)
RETURNS STRING AS (CASE
-- North America
WHEN REGEXP_CONTAINS(url, '/en(-[A-Za-z]{2})?/') THEN 'english'
WHEN REGEXP_CONTAINS(url, '/es/') THEN 'spanish'
-- Europe
WHEN REGEXP_CONTAINS(url, '/fr/') THEN 'french'
WHEN REGEXP_CONTAINS(url, '/de/') THEN 'german'
WHEN REGEXP_CONTAINS(url, '/it/') THEN 'italian'
WHEN REGEXP_CONTAINS(url, '/ru/') THEN 'russian'
WHEN REGEXP_CONTAINS(url, '/pt-pt/') THEN 'portuguese'
WHEN REGEXP_CONTAINS(url, '/el/') THEN 'greek'
WHEN REGEXP_CONTAINS(url, '/sv-') THEN 'swedish'
WHEN REGEXP_CONTAINS(url, '/pl/') THEN 'polish'
WHEN REGEXP_CONTAINS(url, '/tr/') THEN 'turkish'
WHEN REGEXP_CONTAINS(url, '/nl/') THEN 'dutch'
WHEN REGEXP_CONTAINS(url, '/uk/') THEN 'ukrainian'
WHEN REGEXP_CONTAINS(url, '/cs/') THEN 'czech'
WHEN REGEXP_CONTAINS(url, '/da/') THEN 'danish'
WHEN REGEXP_CONTAINS(url, '/fi/') THEN 'finnish'
WHEN REGEXP_CONTAINS(url, '/hu/') THEN 'hungarian'
WHEN REGEXP_CONTAINS(url, '/ro/') THEN 'romanian'
WHEN REGEXP_CONTAINS(url, '/sk/') THEN 'slovak'
WHEN REGEXP_CONTAINS(url, '/pt/') THEN 'portuguese'
-- Asia
WHEN REGEXP_CONTAINS(url, '/ar/') THEN 'arabic'
WHEN REGEXP_CONTAINS(url, '/zh(-[A-Za-z]{2,5})?/') THEN 'chinese'
WHEN REGEXP_CONTAINS(url, '/ja/') THEN 'japanese'
WHEN REGEXP_CONTAINS(url, '/ko/') THEN 'korean'
WHEN REGEXP_CONTAINS(url, '/hi/') THEN 'hindi'
WHEN REGEXP_CONTAINS(url, '/ur/') THEN 'urdu'
WHEN REGEXP_CONTAINS(url, '/bn/') THEN 'bengali'
WHEN REGEXP_CONTAINS(url, '/fa/') THEN 'persian'
WHEN REGEXP_CONTAINS(url, '/ta/') THEN 'tamil'
WHEN REGEXP_CONTAINS(url, '/vi/') THEN 'vietnamese'
WHEN REGEXP_CONTAINS(url, '/th/') THEN 'thai'
WHEN REGEXP_CONTAINS(url, '/id/') THEN 'indonesian'
WHEN REGEXP_CONTAINS(url, '/ms/') THEN 'malay'
-- Africa
WHEN REGEXP_CONTAINS(url, '/ar/') THEN 'arabic' -- Arabic is also spoken widely in North Africa
-- South America
-- Spanish and Portuguese are majorly spoken here and are already added above
-- Oceania
-- English is a major language here and is already added above
ELSE 'unknown'
END
)
OPTIONS ( description = '''Extracts the language from a <url>.''')
SQL User Defined Function (SQL UDF)
See something wrong? Contact us or report an issue on Github.