#
# Copyright (C) 2019 Intel Corporation
#
# This software and the related documents are Intel copyrighted materials, and your use of them
# is governed by the express license under which they were provided to you ("License"). Unless
# the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose
# or transmit this software or the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express or implied
# warranties, other than those that are expressly stated in the License.
#


# ------------------------------------------------------------------------------
# This example shows how to obtain the contents of a GPU data set (only
# possible if GPU profile was collected).
#
# Trace OpenCL and Intel Media SDK programs (Intel Graphics Driver only):
# > ./advixe-cl --collect survey --enable-gpu-profiling --project-dir /my_proj --search-dir src:r=/test_dir -- /test_dir/test_app
#
# Expected output looks like:
#
# ============================================================
# Main GPU Dataset
# ============================================================
#carm_l3_cache_line_utilization_______________: 1
#carm_slm_cache_line_utilization______________: 0
#carm_traffic_gb______________________________: 25.7824
#computing_task_______________________________: GEMM
#computing_task_average_time__________________: 0.0676286
#computing_task_id____________________________: 3
#computing_task_instance_count________________: 3
#computing_task_purpose_______________________: Compute
#computing_task_simd_width____________________: 32
#computing_task_svm_usage_type________________:
#computing_task_total_time____________________: 0.202886
#computing_threads_started____________________: 124076
#data_transferred_device_to_host_size_________: 0
#data_transferred_device_to_host_time_________: 0
#data_transferred_host_to_device_size_________: 0
#data_transferred_host_to_device_time_________: 0
#data_transferred_synchronization_time________: 0
#data_transferred_total_gb_sec________________:
#data_transferred_total_size__________________:
#data_transferred_total_time__________________: 0
#elapsed_time_________________________________: 0.202886
#eu_array_active______________________________: 0.855657
#eu_array_idle________________________________: 0.000618341
#eu_array_stalled_____________________________: 0.143725
#eu_instructions_2_fpus_active________________: 0.431467
#eu_instructions_ipc_rate_____________________: 1.52334
#eu_instructions_send_active__________________: 0.143569
#eu_threads_occupancy_________________________: 0.998176
#gpu_compute_performance_fp_ai________________: 2.5662
#gpu_compute_performance_gflop________________: 6.44245
#gpu_compute_performance_gflops_______________: 31.7541
#gpu_compute_performance_gintop_______________: 19.5541
#gpu_compute_performance_gintops______________: 96.38
#gpu_compute_performance_gmixop_______________: 25.9966
#gpu_compute_performance_gmixops______________: 128.134
#gpu_compute_performance_int_ai_______________: 7.78892
#gpu_compute_performance_mix_ai_______________: 10.3551
#gpu_memory_bandwidth_gb_sec__________________: 12.374
#gpu_memory_bandwidth_gb_sec_read_____________: 12.2303
#gpu_memory_bandwidth_gb_sec_write____________: 0.143698
#gpu_memory_data_transferred_gb_______________: 2.51051
#gpu_memory_data_transferred_gb_read__________: 2.48135
#gpu_memory_data_transferred_gb_write_________: 0.0291544
#gpu_shader_atomics___________________________: 0
#gpu_shader_barriers__________________________: 0
#l3_shader_bandwidth_gb_sec___________________: 126.968
#l3_shader_data_transferred_gb________________: 25.7601
#module_architecture_id_______________________: 8
#module_binary_file_path______________________: c:\work\prj\ze_gemm\e000\hs000\archive\binaries\47c1543410d09163.clbin\50ed5a987f52c3a3b4b8d32b6739b672\47c1543410d09163.clbin
#module_checksum______________________________: 50ed5a987f52c3a3b4b8d32b6739b672
#module_kernel_name___________________________: GEMM
#module_module________________________________: 47c1543410d09163.clbin
#module_module_path___________________________: 47c1543410d09163.clbin
#module_offset________________________________: 0
#module_segment_type__________________________: compute
#module_size__________________________________: 960
#module_symbol_file_path______________________: c:\work\prj\ze_gemm\e000\hs000\archive\symbols\47c1543410d09163.clpdb\4120491ec961650a07dbc85a244c597c\47c1543410d09163.clpdb
#shared_local_memory_bandwidth_gb_sec_________: 0
#shared_local_memory_bandwidth_gb_sec_read____: 0
#shared_local_memory_bandwidth_gb_sec_write___: 0
#shared_local_memory_data_transferred_gb______: 0
#shared_local_memory_data_transferred_gb_read_: 0
#shared_local_memory_data_transferred_gb_write: 0
#source_size__________________________________: 10
#source_source_file___________________________: gemm.cl
#source_source_file_path______________________: .////////////////////////////////gemm.cl
#source_source_line___________________________: 1
#typed_memory_bandwidth_gb_sec________________: 0
#typed_memory_bandwidth_gb_sec_read___________: 0
#typed_memory_bandwidth_gb_sec_write__________: 0
#typed_memory_data_transferred_gb_____________: 0
#typed_memory_data_transferred_gb_read________: 0
#typed_memory_data_transferred_gb_write_______: 0
#untyped_memory_bandwidth_gb_sec______________: 126.968
#untyped_memory_bandwidth_gb_sec_read_________: 126.84
#untyped_memory_bandwidth_gb_sec_write________: 0.128446
#untyped_memory_data_transferred_gb___________: 25.7601
#untyped_memory_data_transferred_gb_read______: 25.734
#untyped_memory_data_transferred_gb_write_____: 0.0260599
#work_size_global_____________________________: 1024 x 1024
#work_size_local______________________________: 256 x 1
#============================================================
# Instruction Mix Dataset
# ============================================================
# gemm_nn: 76
#    Type: Size: Op Type   : Callcount          : Exec Count         : Dynamic Count
#    INT : 32  : MOVE      : 3,145,728          : 17,825,792         : 3,145,728
#    INT : 32  : BIT       : 4,298,113,024      : 68,754,079,744     : 2,149,580,800
#    INT : 32  : BASIC     : 10,754,195,456     : 139,823,415,296    : 6,451,888,128
#        :     : OTHER     : 2,149,580,800      : 34,393,292,800     : 1,074,790,400
#        :     : CONTROL   : 1,075,838,976      : 1,083,179,008      : 1,074,790,400
#    FP  : 32  : MOVE      : 2,097,152          : 33,554,432         : 1,048,576
#    INT : 16  : LOAD      : 4,294,967,296      : 68,719,476,736     : 2,147,483,648
#    FP  : 32  : FMA       : 2,147,483,648      : 34,359,738,368     : 1,073,741,824
#    INT : 16  : STORE     : 2,097,152          : 33,554,432         : 1,048,576
# ============================================================
#
#Exec Count - sum of instruction call counts multiplied by execution size
#Dynamic Count - sum of instruction call counts multiplied by execution size with mask utilization applied
#
# ------------------------------------------------------------------------------

import sys

try:

    import advisor

except ImportError:

    print(
        """Import error: Python could not resolve path to Advisor's pythonapi directory.
        To fix, either manually add path to the pythonapi directory into PYTHONPATH environment
        variable, or use advixe-vars.* scripts to set up product environment variables automatically."""
    )
    sys.exit(1)

# Check command-line arguments.
if len(sys.argv) < 2:
    print('Usage: "python {} path_to_project_dir"'.format(__file__))
    sys.exit(2)

# Open the Advisor Project and load the data.
project = advisor.open_project(sys.argv[1])
data = project.load(advisor.SURVEY)

data_type = {"2": "FP", "3": "INT"}
op_type = {
    "1": "OTHER",
    "2": "BASIC",
    "3": "FMA",
    "4": "MATH",
    "5": "DIV",
    "6": "POW",
    "7": "STORE",
    "8": "LOAD",
    "9": "SLM_STORE",
    "10": "SLM_LOAD",
    "11": "MOVE",
    "12": "BIT",
    "13": "CONTROL",
    "14": "SYNC",
    "15": "ATOMIC",
    "16": "SLM_ATOMIC",
    "17": "VECTOR",
}

print("=" * 60)
print("Main GPU Dataset")
print("=" * 60)

# Traverse the tree and print the entries.
for top_level_row in data.gpu:
    stack = [(top_level_row, 0)]
    while stack:
        row, level = stack.pop()
        for c in row.children:
            stack.append((c, level + 1))
        # Access the entry through use of an iterator.
        for key in row:
            # Entry represented as a dictionary.
            print("{:_<45}: {}".format(key, row[key]))
        print("=" * 60)

# Print the instruction mix of the data set.
print("Instruction Mix Dataset")
print("=" * 60)
for top_level_row in data.get_gpu_rows(advisor.GpuDataType.INSTRUCTION_MIX):
    print("{}: {: <9}".format(top_level_row["computing_task"], top_level_row["computing_task_id"]))
    for row in top_level_row.children:
        print(
            "    {: <4}: {: <4}: {: <10}: {: <19}: {: <19}: {: <19}".format(
                "Type", "Size", "Op Type",
                "Callcount", "Exec Count", "Dynamic Count",
            )
        )
        break
    for row in top_level_row.children:
        operand_data_type_str = data_type.get(row["operand_type"], "")
        instruction_str = op_type.get(row["instruction_class"], "?")
        print(
            "    {: <4}: {: <4}: {: <10}: {: <19,d}: {: <19,d}: {:<19,d}".format(
                operand_data_type_str, row["operand_size"], instruction_str,
                int(row["callcount"]), int(row["executed_instruction_count"]), int(row["dynamic_instruction_count"]),
            )
        )
    print("=" * 60)
