#!/usr/bin/env python3
#
# Copyright (c) 2022 Project CHIP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Common queries on a size database."""

import datetime
import logging
import sys
from typing import Dict, List, Mapping, Optional, Tuple, cast

import memdf.report
import memdf.util.config
import memdf.util.sqlite
import pandas as pd  # type: ignore
from memdf import Config
from memdf.sizedb import SizeDatabase

QUERY_CONFIG = {
    Config.group_map('query'): {
        'group': 'output'
    },
    'report.increases': {
        'help': 'Highlight large increases',
        'metavar': 'PERCENT',
        'default': 0.0,
        'argparse': {
            'alias': ['--threshold'],
            'type': float,
        },
    },
    'query.where': {
        'help': 'SQL filter',
        'metavar': 'SQL-EXPR',
        'default': '',
        'argparse': {
            'alias': ['--where'],
        },
    },
    'query.order': {
        'help': 'sort order',
        'metavar': 'COLUMN[,COLUMN]*',
        'default': '',
        'argparse': {
            'alias': ['--order'],
        },
    },
    'query.limit': {
        'help': 'limit result size',
        'metavar': 'ROWS',
        'default': 0,
        'argparse': {
            'alias': ['--limit'],
        },
    },
}


def argsplit(metavar: str, value: str) -> Tuple[Optional[Tuple], Dict]:
    """Given comma-separated metavar and values, match them up."""
    values = tuple(value.split(','))
    names = metavar.split(',')
    if len(names) < len(values):
        logging.error('Too many values for %s', metavar)
        return (None, {})
    if len(names) > len(values):
        logging.error('Missing %s for %s', ','.join(names[len(values):]),
                      metavar)
        return (None, {})
    return (values, dict(zip(names, values)))


def postprocess_canned_sql_option(config: Config, key: str,
                                  info: Mapping) -> None:
    """Record information from simple SQL query options in one place."""
    value = config[key]
    if not value:
        return
    title = info['sql']['title']
    if isinstance(value, str):
        metavar = info.get('metavar', 'VALUE')
        if ',' in metavar:
            values, args = argsplit(metavar, value)
            if not values:
                return
        else:
            values = (value,)
            args = {metavar: value}
        title = title.format(**args)
    else:
        values = tuple()

    if config['queries'] is None:
        config['queries'] = []
    cast(list, config['queries']).append((title, key, values, info))


def make_query(config: Config, info: Mapping) -> str:
    """Construct an SQL query string for a simple SQL query option."""
    args = {'where': '', 'order': '', 'limit': ''}
    if where := config.get('query.where'):
        if kw := info['sql'].get('where'):
            args['where'] = f'{kw} {where}'
    if order := (config.get('query.order') or info['sql'].get('order')):
        args['order'] = f'ORDER BY {order}'
    if limit := config.get('query.limit'):
        args['limit'] = f'LIMIT {limit}'
    return info['sql']['query'].format(**args)


def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame:
    """Convert a DataFrame ‘time’ column from Unix timestamp to ISO."""
    df['time'] = df['time'].map(lambda t: datetime.datetime.utcfromtimestamp(t)
                                .isoformat())
    return df


def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame:
    """Given ‘parent_size’and ‘commit_size’ columns, add change columns."""
    df['change'] = df.apply(lambda row: row.commit_size - row.parent_size,
                            axis=1)
    df['% change'] = df.apply(lambda row: SizeDatabase.percent_change(
        row.parent_size, row.commit_size),
        axis=1)
    if threshold := config['report.increases']:
        df = df[df['% change'] > threshold]
    return df


QUERY_CONFIG |= {
    'query.platforms': {
        'help': 'List known platforms',
        'default': False,
        'postprocess': postprocess_canned_sql_option,
        'sql': {
            'title': 'Platforms',
            'query': '''
                SELECT DISTINCT platform FROM thing {where} {order} {limit}
                ''',
            'where': 'WHERE',
            'order': 'platform',
        },
        'argparse': {
            'alias': ['--platforms'],
        },
    },
    'query.platform-targets': {
        'help': 'List known targets for the given platform',
        'metavar': 'PLATFORM',
        'default': '',
        'postprocess': postprocess_canned_sql_option,
        'sql': {
            'title': 'Platform Targets',
            'query': '''
                SELECT DISTINCT platform, config, target
                  FROM thing
                  WHERE platform=? {where}
                  {order} {limit}
                ''',
                'where': 'AND',
                'order': 'platform, config, target',
        },
        'argparse': {
            'alias': ['--platform-targets'],
        },
    },
    'query.platform-sections': {
        'help': 'List known sections for the given platform',
        'metavar': 'PLATFORM',
        'default': '',
        'postprocess': postprocess_canned_sql_option,
        'sql': {
            'title': 'Platform Sections',
            'query': '''
                SELECT DISTINCT platform, s.name AS section
                  FROM thing t
                  INNER JOIN build b ON t.id == b.thing_id
                  INNER JOIN size s ON b.id == s.build_id
                  WHERE platform=? {where}
                  {order} {limit}
                ''',
            'where': 'AND',
            'order': 'platform, section',
        },
        'argparse': {
            'alias': ['--platform-sections'],
        },
    },
    'query.section-sizes': {
        'help': 'List size data for a given build section',
        'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
        'default': '',
        'postprocess': postprocess_canned_sql_option,
        'sql': {
            'title': 'Sizes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
            'query': '''
                SELECT DISTINCT time, hash, pr, size
                  FROM build b
                  INNER JOIN size s ON b.id == s.build_id
                  WHERE b.thing_id == (SELECT id FROM thing
                                       WHERE platform == ?
                                       AND config == ?
                                       AND target == ?)
                    AND name == ?
                    {where}
                  {order} {limit}
                ''',
            'where': 'AND',
            'order': 'time',
            'postprocess': [postprocess_df_time],
        },
    },
    'query.section-changes': {
        'help': 'List size changes for a given build section',
        'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
        'default': '',
        'postprocess': postprocess_canned_sql_option,
        'sql': {
            'title': 'Changes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
            'query': '''
                WITH builds (bid, pid, time, pr, hash) AS (
                SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash
                  FROM build b
                  INNER JOIN build p
                  ON p.hash = b.parent AND p.thing_id == b.thing_id
                  WHERE b.thing_id == (SELECT id FROM thing
                                       WHERE platform == ?
                                       AND config == ?
                                       AND target == ?)
                )
                SELECT DISTINCT
                  time, hash, pr,
                  ps.size as parent_size,
                  bs.size as commit_size
                FROM builds
                INNER JOIN size bs ON builds.bid == bs.build_id
                INNER JOIN size ps ON builds.pid == ps.build_id
                WHERE bs.name == ? AND ps.name == bs.name
                  {where}
                {order} {limit}
                ''',
            'where': 'AND',
            'order': 'time',
            'postprocess': [postprocess_df_time, postprocess_df_changes],
        },
    },
    'query.all-changes': {
        'help': 'List all size changes',
        'default': False,
        'postprocess': postprocess_canned_sql_option,
        'sql': {
            'title': 'Size Changes',
            'query': '''
                WITH
                builds (bid, pid, time, pr, hash, thing_id) AS (
                  SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash, b.thing_id
                  FROM build b
                  INNER JOIN build p
                    ON p.hash = b.parent AND p.thing_id == b.thing_id
                ),
                changes (bid, tid, name, parent_size, commit_size, change) AS (
                  SELECT DISTINCT
                    bs.build_id,
                    thing_id,
                    bs.name,
                    ps.size as parent_size,
                    bs.size as commit_size,
                    bs.size - ps.size as change
                  FROM builds
                  INNER JOIN size bs ON builds.bid == bs.build_id
                  INNER JOIN size ps ON builds.pid == ps.build_id
                  WHERE bs.name == ps.name
                )
                SELECT
                  time, hash,
                  platform, config, target, name,
                  parent_size, commit_size, change
                FROM changes
                INNER JOIN build ON bid == build.id
                INNER JOIN thing ON tid == thing.id
                {where} {order} {limit}
                ''',
            'where': 'AND',
            'order': 'time',
            'postprocess': [postprocess_df_time, postprocess_df_changes],
        },
    },
    'query.build-sizes': {
        # SQLite doesn't have PIVOT so we have to script this.
        'help': 'List size changes for a given build',
        'metavar': 'PLATFORM,CONFIG,TARGET',
        'default': '',
    },
}


def get_build_sections(db: SizeDatabase, build: str) -> Optional[Tuple]:
    """Split a build arg and get its thing_id and sections."""
    values, args = argsplit('PLATFORM,CONFIG,TARGET', build)
    if not values:
        return None

    platform = args['PLATFORM']
    pconfig = args['CONFIG']
    ptarget = args['TARGET']
    thing_id = db.select_thing_id(platform, pconfig, ptarget)
    if not thing_id:
        logging.error('No match for %s,%s,%s', platform, pconfig, ptarget)
        return None

    sections = db.select_sections_for_thing(thing_id)
    if not sections:
        logging.warning('No sections for %s,%s,%s', platform, pconfig, ptarget)
        return None

    return (platform, pconfig, ptarget, thing_id, sections)


def make_build_sizes_query(config: Config, thing_id: str,
                           sections: List[str]) -> Tuple[List[str], str]:
    """Construct and SQL query for all section sizes for a given thing."""
    # SQLite doesn't have PIVOT so we need to construct a query with
    # a column for each section.
    columns = ['time', 'hash', 'pr']
    cols = ', '.join(columns)
    joins = ''
    where = f' WHERE b.thing_id == {thing_id}'
    for i, s in enumerate(sections):
        columns.append(s)
        cols += f', s{i}.size AS s{i}z'
        joins += f' INNER JOIN size s{i} ON b.id == s{i}.build_id'
        where += f' AND s{i}.name == "{s}"'
    if qw := config['query.where']:
        where += f' AND {qw}'
    query = f'''SELECT {cols}
                  FROM build b
                  {joins}
                  {where}
                  ORDER BY {config.get('query.order') or 'time'}'''
    if limit := config['query.limit']:
        query += f' LIMIT {limit}'
    return (columns, query)


def query_build_sizes(config: Config, db: SizeDatabase,
                      build: str) -> Optional[pd.DataFrame]:
    """Get all sizes for the given build."""
    t = get_build_sections(db, build)
    if not t:
        return None
    platform, pconfig, ptarget, thing_id, sections = t

    columns, query = make_build_sizes_query(config, thing_id, sections)
    logging.debug('Query: %s', query)

    cur = db.execute(query)
    rows = cur.fetchall()
    if rows:
        df = pd.DataFrame(rows, columns=columns)
        df.attrs = {
            'name': f'qbs-{build}',
            'title': f'Sizes for {platform} {pconfig} {ptarget}',
        }
        return postprocess_df_time(config, df)

    return None


def main(argv):
    status = 0
    try:
        cfg = {
            **memdf.util.config.CONFIG,
            **memdf.util.sqlite.CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **QUERY_CONFIG,
        }
        cfg['database.file']['argparse']['required'] = True

        config = Config().init(cfg)
        config.parse(argv)

        db = SizeDatabase(config['database.file'], writable=False)
        db.open()

        dfs = {}

        q = 0
        for title, key, values, info in config.get('queries', []):
            q += 1
            query = make_query(config, info)
            logging.debug('Option: %s', key)
            logging.debug('Title: %s', title)
            logging.debug('Query: %s', query.strip())
            logging.debug('With: %s', values)
            cur = db.execute(query, values)
            columns = [i[0] for i in cur.description]
            rows = cur.fetchall()
            if rows:
                df = pd.DataFrame(rows, columns=columns)
                df.attrs = {'name': f'query{q}', 'title': title}
                for f in info['sql'].get('postprocess', []):
                    df = f(config, df)
                dfs[df.attrs['name']] = df

        if build := config['query.build-sizes']:
            q += 1
            if (df := query_build_sizes(config, db, build)) is not None:
                dfs[df.attrs['name']] = df

        if q == 0:
            config.argparse.print_help()
            return 1

        memdf.report.write_dfs(config,
                               dfs,
                               hierify=config['hierify'],
                               title=True,
                               floatfmt='5.1f')

    except Exception as exception:
        raise exception

    return status


if __name__ == '__main__':
    sys.exit(main(sys.argv))