Anexos¶

9. Anexos: toda aquella información que se considere relevante para la comprensión y clarificación del trabajo desarrollado.

A. Código fuente de conector para Hive¶

# encoding: utf-8

require_relative './odbc'

module Carto
  class Connector

    # Hive (HiveServer2) provider using Hortonworks driver
    # (http://public-repo-1.hortonworks.com/HDP/hive-odbc/2.1.2.1002/debian/hive-odbc-native_2.1.2.1002-2_amd64.deb)
    #
    # For all supported attributes, see https://hortonworks.com/wp-content/uploads/2015/10/Hortonworks-Hive-ODBC-Driver-User-Guide.pdf
    #
    # Another driver compatible with this one is [Cloudera's](http://www.cloudera.com/downloads/connectors/hive/odbc/2-5-12.html)
    #
    # The schema acts as a database name here, and can be ommitted (default schema 'default' or '')
    # So we'll use a `database` connection parameter for the schema for consistency with other providers.
    # The schema parameter should not be directly used by the user.
    class HiveProvider < OdbcProvider

      private

      DEFAULT_SCHEMA = 'default'.freeze # '' would also be OK

      def fixed_connection_attributes
        {
          Driver: 'Hortonworks Hive ODBC Driver 64-bit'
        }
      end

      def required_connection_attributes
        {
          server:   :HOST
        }
      end

      def optional_connection_attributes
        {
          database: { Schema: DEFAULT_SCHEMA },
          port: { PORT: 10000 },
          username: { UID: nil },
          password: { PWD: nil }
        }
      end

      def non_connection_parameters
        super.reverse_merge(schema: @connection[:database] || DEFAULT_SCHEMA)
      end

    end
  end
end

# encoding: utf-8

require_relative './odbc'

module Carto
  class Connector

    class ImpalaProvider < OdbcProvider

      def initialize(context, params)
        super
        if @connection
          @dsn        = @connection[:dsn]
          @driver     = @connection[:driver]
        end
      end

      def errors(only: nil)
        errors = super
        if @connection.blank?
          errors << "Missing 'connection' parameters"
        elsif @dns.blank? && @driver.blank?
          errors << "Must define either 'dsn' or 'driver' in 'connection'"
        end
        errors
      end

      private

      def connection_attributes
        @connection
      end
    end

  end
end

B. Configuración del conector para Apache Hive¶

La configuración consiste en añadir una nuevo objecto al objeto PROVIDERS presente en el archivo cartodb/lib/carto/connector/providers.rb del repositorio https://github.com/CartoDB/cartodb

# encoding: utf-8

require_relative 'providers/generic_odbc'
require_relative 'providers/mysql'
require_relative 'providers/postgresql'
require_relative 'providers/sqlserver'
require_relative 'providers/hive'
require_relative 'providers/pg_fdw'

module Carto
  class Connector

    # Here we map provider identifiers (as used in APIs, etc.) to the Provider class and basic attributes.
    # `name` is the human-readable name
    # `public` means that the provider is publicly announced (so it is accessible through UI, visible in lists of
    # providers, etc.) A provider may be available or not (see Connector.limits) independently of its public status,
    # so that a public provider may not be available for all users, and non-public providers may be available to
    # some users (e.g. 'odbc' provider for tests)
    PROVIDERS = {
      'odbc' => {
        name: 'ODBC',
        class: GenericOdbcProvider,
        public: false # Intended for internal development/tests
      },
      'postgres' => {
        name: 'PostgreSQL',
        class: PostgreSQLProvider,
        public: true
      },
      'mysql' => {
        name: 'MySQL',
        class: MySqlProvider,
        public: true
      },
      'sqlserver' => {
        name: 'Microsoft SQL Server',
        class: SqlServerProvider,
        public: true
      },
      'hive' => {
        name: 'Hive',
        class: HiveProvider,
        public: true
      }
    }
    ...

C. Configuración del conector para Apache Impala¶

La configuración consiste en añadir una nuevo objecto al objeto PROVIDERS presente en el archivo cartodb/lib/carto/connector/providers.rb del repositorio https://github.com/CartoDB/cartodb

# encoding: utf-8

require_relative 'providers/generic_odbc'
require_relative 'providers/mysql'
require_relative 'providers/postgresql'
require_relative 'providers/sqlserver'
require_relative 'providers/hive'
require_relative 'providers/pg_fdw'

module Carto
  class Connector

    # Here we map provider identifiers (as used in APIs, etc.) to the Provider class and basic attributes.
    # `name` is the human-readable name
    # `public` means that the provider is publicly announced (so it is accessible through UI, visible in lists of
    # providers, etc.) A provider may be available or not (see Connector.limits) independently of its public status,
    # so that a public provider may not be available for all users, and non-public providers may be available to
    # some users (e.g. 'odbc' provider for tests)
    PROVIDERS = {
          'odbc' => {
            name: 'ODBC',
            class: GenericOdbcProvider,
            public: false # Intended for internal development/tests
          },
          'postgres' => {
            name: 'PostgreSQL',
            class: PostgreSQLProvider,
            public: true
          },
          'mysql' => {
            name: 'MySQL',
            class: MySqlProvider,
            public: true
          },
          'sqlserver' => {
            name: 'Microsoft SQL Server',
            class: SqlServerProvider,
            public: true
          },
          'hive' => {
            name: 'Hive',
            class: HiveProvider,
            public: true
          },
          'impala' => {
            name: 'Impala',
            class: GenericOdbcProvider,
            public: true
          }
        }...

D. Configuración del conector para Amazon Redshift¶

La configuración consiste en añadir una nuevo objecto al objeto PROVIDERS presente en el archivo cartodb/lib/carto/connector/providers.rb del repositorio https://github.com/CartoDB/cartodb

# encoding: utf-8

require_relative 'providers/generic_odbc'
require_relative 'providers/mysql'
require_relative 'providers/postgresql'
require_relative 'providers/sqlserver'
require_relative 'providers/hive'
require_relative 'providers/pg_fdw'

module Carto
  class Connector

    # Here we map provider identifiers (as used in APIs, etc.) to the Provider class and basic attributes.
    # `name` is the human-readable name
    # `public` means that the provider is publicly announced (so it is accessible through UI, visible in lists of
    # providers, etc.) A provider may be available or not (see Connector.limits) independently of its public status,
    # so that a public provider may not be available for all users, and non-public providers may be available to
    # some users (e.g. 'odbc' provider for tests)
    PROVIDERS = {
          'odbc' => {
            name: 'ODBC',
            class: GenericOdbcProvider,
            public: false # Intended for internal development/tests
          },
          'postgres' => {
            name: 'PostgreSQL',
            class: PostgreSQLProvider,
            public: true
          },
          'mysql' => {
            name: 'MySQL',
            class: MySqlProvider,
            public: true
          },
          'sqlserver' => {
            name: 'Microsoft SQL Server',
            class: SqlServerProvider,
            public: true
          },
          'hive' => {
            name: 'Hive',
            class: HiveProvider,
            public: true
          },
          'impala' => {
            name: 'Impala',
            class: GenericOdbcProvider,
            public: true
          },
          'redshift' => {
            name: 'Redshift',
            class: GenericOdbcProvider,
            public: true
          }
        }...

E. Código fuente de conector para MongoDB¶

# encoding: utf-8

require_relative './odbc'

module Carto
  class Connector

    # Class for ODBC-based provider using mongo_fdw
    #
    # Requirements:
    #   * mongo_fdw extension must be installed in the user database
    # "connector": {
    #    "provider": "mongo",
    #    "connection": {
    #      "username":"THE_MONGO_USER_NAME",
    #      "password":"THE_MONGO_PASSWORD",
    #      "server": "THE_MONGO_SERVER",
    #      "database": "THE_MONGO_DATABASE",
    #      "port":"THE_MONGO_PORT",
    #      "schema": "THE_MONGO_COLLECTION"
    #    },
    #    "table": "THE_MONGO_TABLE",
    #    "columns": "THE_COLUMNS_OF_THE_MONGO_TABLE"
    #  }
    #   "columns" is a comma separated list of column-name type.
    #   As an example "columns" can be "_id NAME,   warehouse_id int,   warehouse_name text,   warehouse_created timestamptz
    #
    class MongoProvider < OdbcProvider

      def initialize(context, params)
        super
      end

      # Required connection attributes: { name: :internal_name }
      # The :internal_name is what is passed to the driver (through odbc_fdw 'odbc_' options)
      # The :name is the case-insensitive parameter received here trhough the API
      # This can be redefined as needed in derived classes.
      def required_connection_attributes
        {
          server:         :address
        }
      end

      # Connection attributes that are optional: { name: { internal_name: default_value } }
      # Those with non-nil default values will always be set.
      # name/internal_name as in `required_connection_attributes`
      # This can be redefined as needed in derived classes.
      def optional_connection_attributes
        {
          database: { database: "admin" },
          port: { port: 27017 },
          username: { username: nil },
          password: { password: nil },
          schema:   { collection: nil }
        }
      end

      def fdw_create_server(server_name)
        sql = fdw_create_server_sql 'mongo_fdw', server_name, server_options_m
        execute_as_superuser sql
      end

      def fdw_list_tables(server_name, limit)
        # TODO not tested in MongoDB
        execute %{
          SELECT * FROM ODBCTablesList('#{server_name}',#{limit.to_i});
        }
      end

      def features_information
        {
          "sql_queries":    true,
          "list_databases": false,
          "list_tables":    false,
          "preview_table":  false
        }
      end

      def fdw_create_foreign_table(server_name)
        cmds = []
        foreign_table_name = foreign_table_name_for(server_name)
        if @columns.present?
          cmds << fdw_create_foreign_table_sql(
            server_name, foreign_table_schema, foreign_table_name, @columns, table_options_m
          )
        else
          options = table_options.merge(prefix: unique_prefix_for(server_name))
          cmds << fdw_import_foreign_schema_sql(server_name, remote_schema_name, foreign_table_schema, options)
        end
        cmds << fdw_grant_select_sql(foreign_table_schema, foreign_table_name, @connector_context.database_username)
        execute_as_superuser cmds.join("\n")
        foreign_table_name
      end

      SERVER_OPTIONS = %w(dsn driver host server address port).freeze
      USER_OPTIONS   = %w(uid pwd user username password).freeze
      TABLE_OPTIONS  = %w(database collection).freeze

      def connection_options(parameters)
        parameters.map { |option_name, option_value| ["#{option_name}", quoted_value(option_value)] }
      end

      def server_options_m
        connection_options(connection_attributes.slice(*SERVER_OPTIONS)).parameters
      end

      def table_options_m
        connection_options(connection_attributes.slice(*TABLE_OPTIONS)).parameters
      end
    end
  end
end

F. Configuración del conector para MongoDB¶

La configuración consiste en añadir una nuevo objecto al objeto PROVIDERS presente en el archivo cartodb/lib/carto/connector/providers.rb del repositorio https://github.com/CartoDB/cartodb

# encoding: utf-8

require_relative 'providers/generic_odbc'
require_relative 'providers/mysql'
require_relative 'providers/postgresql'
require_relative 'providers/sqlserver'
require_relative 'providers/hive'
require_relative 'providers/pg_fdw'

module Carto
  class Connector

    # Here we map provider identifiers (as used in APIs, etc.) to the Provider class and basic attributes.
    # `name` is the human-readable name
    # `public` means that the provider is publicly announced (so it is accessible through UI, visible in lists of
    # providers, etc.) A provider may be available or not (see Connector.limits) independently of its public status,
    # so that a public provider may not be available for all users, and non-public providers may be available to
    # some users (e.g. 'odbc' provider for tests)
    PROVIDERS = {
          'odbc' => {
            name: 'ODBC',
            class: GenericOdbcProvider,
            public: false # Intended for internal development/tests
          },
          'postgres' => {
            name: 'PostgreSQL',
            class: PostgreSQLProvider,
            public: true
          },
          'mysql' => {
            name: 'MySQL',
            class: MySqlProvider,
            public: true
          },
          'sqlserver' => {
            name: 'Microsoft SQL Server',
            class: SqlServerProvider,
            public: true
          },
          'hive' => {
            name: 'Hive',
            class: HiveProvider,
            public: true
          },
          'impala' => {
            name: 'Impala',
            class: GenericOdbcProvider,
            public: true
          },
          'redshift' => {
            name: 'Redshift',
            class: GenericOdbcProvider,
            public: true
          },
          'mongo' => {
        name: 'mongo',
        class: MongoProvider,
        public: true
      }
        }...

G. Código fuente de conector para BigQuery¶

# encoding: utf-8

require 'uri'
require_relative './odbc'

module Carto
  class Connector

    # {
    #   "provider": "bigquery",
    #   "connection": {
    #     "Driver": "Google BigQuery 64",
    #     "OAuthMechanism": 1,
    #     "Catalog": "eternal-ship-170218",
    #     "SQLDialect": 1,
    #     "RefreshToken": "1/FyCbmKonlYAwx7FMjfow9QO5mdiOG3u9dfpi0ktYxOux_fFDF6ip-PERQkXYKiDc"
    #   },
    #   "table": "destination_table",
    #   "sql_query": "select * from `eternal-ship-170218.test.test` limit 1;"
    # }
    class BigQueryProvider < OdbcProvider

      private

      DRIVER_NAME       = 'Google BigQuery 64'
      SQL_DIALECT       = 1
      USER_AUTH         = 1
      SERVICE_AUTH      = 0

      def initialize(context, params)
        super
        @oauth_config = Cartodb.get_config(:oauth, 'bigquery')
        validate_config!(context)
      end

      def validate_config!(context)
        raise 'OAuth configuration not found for bigquery provider' if @oauth_config.nil?
        if @oauth_config['oauth_mechanism'] === SERVICE_AUTH \
            and @oauth_config['email'].nil? \
            and @oauth_config['key'].nil?
          raise 'bigquery provider configured in SERVICE_AUTH mode but email or key not present'
        else
          begin
          @token = context.user.oauths.select('bigquery').token
          raise 'OAuth Token not found for bigquery provider' if @token.nil?
          rescue => e
            CartoDB::Logger.error(exception: e,
                                    message: 'OAuth Token not found for "bigquery" provider',
                                    user_id: context.user.id)
          end
        end
      end

      def fixed_connection_attributes
        oauth_mechanism = @oauth_config['oauth_mechanism']
        proxy_conf = create_proxy_conf

        if oauth_mechanism === SERVICE_AUTH
          conf = {
            Driver:         DRIVER_NAME,
            SQLDialect:     SQL_DIALECT,
            OAuthMechanism: oauth_mechanism,
            Email:          @oauth_config['email'],
            KeyFilePath:    @oauth_config['key']
          }
        else
          conf = {
            Driver:         DRIVER_NAME,
            SQLDialect:     SQL_DIALECT,
            OAuthMechanism: oauth_mechanism,
            RefreshToken:   @token
          }
        end

        if !proxy_conf.nil?
          conf = conf.merge(proxy_conf)
        end

        return conf
      end

      def required_connection_attributes
        {
          database:       :Catalog
        }
      end

      def create_proxy_conf
        proxy = ENV['HTTP_PROXY'] || ENV['http_proxy']
        if !proxy.nil?
          proxy = URI.parse(proxy)
          {
            ProxyHost: proxy.host,
            ProxyPort: proxy.port
          }
        end
      end

    end
  end
end

H. Configuración del conector para BigQuery¶

La configuración consiste en añadir una nuevo objecto al objeto PROVIDERS presente en el archivo cartodb/lib/carto/connector/providers.rb del repositorio https://github.com/CartoDB/cartodb

# encoding: utf-8

require_relative 'providers/generic_odbc'
require_relative 'providers/mysql'
require_relative 'providers/postgresql'
require_relative 'providers/sqlserver'
require_relative 'providers/hive'
require_relative 'providers/pg_fdw'

module Carto
  class Connector

    # Here we map provider identifiers (as used in APIs, etc.) to the Provider class and basic attributes.
    # `name` is the human-readable name
    # `public` means that the provider is publicly announced (so it is accessible through UI, visible in lists of
    # providers, etc.) A provider may be available or not (see Connector.limits) independently of its public status,
    # so that a public provider may not be available for all users, and non-public providers may be available to
    # some users (e.g. 'odbc' provider for tests)
    PROVIDERS = {
          'odbc' => {
            name: 'ODBC',
            class: GenericOdbcProvider,
            public: false # Intended for internal development/tests
          },
          'postgres' => {
            name: 'PostgreSQL',
            class: PostgreSQLProvider,
            public: true
          },
          'mysql' => {
            name: 'MySQL',
            class: MySqlProvider,
            public: true
          },
          'sqlserver' => {
            name: 'Microsoft SQL Server',
            class: SqlServerProvider,
            public: true
          },
          'hive' => {
            name: 'Hive',
            class: HiveProvider,
            public: true
          },
          'impala' => {
            name: 'Impala',
            class: GenericOdbcProvider,
            public: true
          },
          'redshift' => {
            name: 'Redshift',
            class: GenericOdbcProvider,
            public: true
          },
          'mongo' => {
        name: 'mongo',
        class: MongoProvider,
        public: true
      },
      'bigquery' => {
        name: 'Google BigQuery 64',
        class: BigQueryProvider,
        public: true
      }
        }...

I. Análisis para encontrar bucles en secuencias de puntos¶

/*
DEP_EXT_findloops
From a points dataset representing positions of a moving object along a track,
this function finds the loops in the track

Inputs managed by CARTO, common to all DEP_EXT functions:
    operation               text: 'create' or 'populate'
    table_name              text: the name of the previous node table
    primary_source_query    text: the query on the previous node table
    primary_source_columns  text: the columns of the previous node table
User input:
    cat_column                 text: if we have more than one track that is identified by a column value
    temp_column       text: sorting column, usually timestamp
Output:
* cartodb_id bigint
* track_id text: the track identifier, equal to cat_column of the input
* loop_id integer: ordinal of the loop for each track
* the_geom geometry(Geometry,4326): the circle that represents the loop
* radius numeric: radius of
*/
CREATE OR REPLACE FUNCTION DEP_EXT_findloops(
        operation text,
        table_name text,
        primary_source_query text,
        primary_source_columns text[],
        cat_column text,
        temp_column text
    )
    RETURNS VOID AS $$
        DECLARE
            tail text;
            categorized text;
            cat_string text;
            cat_string2 text;
            sub_q text;
            s record;
            gSegment            geometry = NULL;
            gLastPoint          geometry = NULL;
            gLastTrackID        text = NULL;
            gLoopPolygon        geometry = NULL;
            gRadius             numeric;
            iLoops              integer := 0;
            cdbi                bigint := 0 ;
        BEGIN

            IF operation = 'create' THEN

                EXECUTE 'DROP TABLE IF EXISTS ' || table_name;

                EXECUTE 'CREATE TABLE ' || table_name || '(cartodb_id bigint, track_id text, loop_id integer, the_geom geometry(Geometry,4326), radius numeric)';

            ELSEIF operation = 'populate' THEN

                -- DEFAULTS
                -- -- no temporal column, then use cartodb_id
                IF  trim(temp_column) = '0' THEN
                   temp_column := 'cartodb_id';
                END IF;

                -- no category, no partition
                IF  trim(cat_column) = '0' THEN
                    categorized := ' order by ' || temp_column;
                    cat_string := '';
                ELSE
                    categorized := 'partition by ' || cat_column || ' order by ' || temp_column;
                    cat_string := cat_column;
                END IF;

                -- partition and sorting of the input
                sub_q := 'WITH '
                    ||  'prequery as('
                    ||      'SELECT '
                    ||      cat_string || ' as cat,'
                    ||      temp_column || ' as temp_column,'
                    ||      'the_geom as point'
                    ||      ' FROM ('
                    ||          primary_source_query
                    ||      ') _q'
                    ||   '),'
                    ||  'pts as('
                    ||      'SELECT '
                    ||      ' cat'
                    ||      ', temp_column'
                    ||      ', point'
                    ||      ', row_number() over(partition by cat order by temp_column) as index'
                    ||      ' FROM prequery'
                    ||      ' ORDER BY cat, temp_column'
                    ||  ')'
                    ||      'SELECT '
                    ||      ' b.cat::text as track_id'
                    ||      ', ST_MakeLine(ARRAY[a.point, b.point]) AS geom'
                    ||      ' FROM pts as a, pts as b'
                    ||      ' WHERE b.index > 1'
                    ||      ' AND a.index = b.index - 1'
                    ||      ' AND a.cat = b.cat '
                    ||      ' ORDER BY b.cat, b.temp_column';

                FOR s IN EXECUTE sub_q
                LOOP

                    -- restart when new track
                    if gLastTrackID <> s.track_id then
                        gSegment := null;
                        gLastPoint := null;
                        iLoops := 0;
                    end if;

                    -- build segments
                    if gSegment is null then
                        gSegment := s.geom;
                    elseif ST_equals(s.geom, gLastPoint) = false then
                        gSegment := ST_Makeline(gSegment, s.geom);
                    end if;

                    gLoopPolygon := ST_BuildArea(ST_Node(ST_Force2D(gSegment)));


                    if gLoopPolygon is not NULL and ST_Numpoints(gSegment) > 3 then

                        iLoops := iLoops + 1;
                        gRadius := (|/ ST_area(gLoopPolygon::geography)/PI());
                        gSegment := null;

                        EXECUTE
                        'INSERT INTO '
                        || quote_ident(table_name)
                        || ' VALUES('
                        || cdbi || ', '
                        || quote_literal(s.track_id) || ', '
                        || iLoops ||', '
                        || 'ST_GeomFromText(' || quote_literal(ST_astext(gLoopPolygon)) || ', 4326), '
                        || gRadius || ')';

                        cdbi := cdbi +1;

                    end if;

                    IF  trim(cat_column) <> '0' THEN
                        gLastTrackID = s.track_id;
                    END IF;

                    gLastPoint := s.geom;

                END LOOP;


            END IF;

        END;
$$ LANGUAGE plpgsql;