From c41f94ac4eb1f012c701821dda320cc1d81ef481 Mon Sep 17 00:00:00 2001
From: Satvik Vemuganti <>
Date: Tue, 23 Aug 2022 02:06:29 +0530
Subject: [PATCH] metadata_dictionary: Add mappings for "*.podspec" files

 swh/indexer/data/podspec.csv                  |  68 +++++++++++
 swh/indexer/metadata_dictionary/   |  18 ++-
 swh/indexer/metadata_dictionary/    | 113 ++++++++++++++++++
 .../tests/metadata_dictionary/ |  65 ++++++++++
 swh/indexer/tests/                 |   3 +
 5 files changed, 266 insertions(+), 1 deletion(-)
 create mode 100644 swh/indexer/data/podspec.csv
 create mode 100644 swh/indexer/metadata_dictionary/
 create mode 100644 swh/indexer/tests/metadata_dictionary/

diff --git a/swh/indexer/data/podspec.csv b/swh/indexer/data/podspec.csv
new file mode 100644
index 00000000..08293602
--- /dev/null
+++ b/swh/indexer/data/podspec.csv
@@ -0,0 +1,68 @@
diff --git a/swh/indexer/metadata_dictionary/ b/swh/indexer/metadata_dictionary/
index 99c2504c..5ad49e0e 100644
--- a/swh/indexer/metadata_dictionary/
+++ b/swh/indexer/metadata_dictionary/
@@ -8,9 +8,24 @@ from typing import Dict, Type
 import click
-from . import cff, codemeta, composer, dart, github, maven, npm, nuget, python, ruby
+from . import (
+    cff,
+    codemeta,
+    composer,
+    dart,
+    github,
+    maven,
+    npm,
+    nuget,
+    podspec,
+    python,
+    ruby,
 from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping
+# podspec,
 INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
     "CffMapping": cff.CffMapping,
     "CodemetaMapping": codemeta.CodemetaMapping,
@@ -21,6 +36,7 @@ INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
     "PythonPkginfoMapping": python.PythonPkginfoMapping,
     "ComposerMapping": composer.ComposerMapping,
     "NuGetMapping": nuget.NuGetMapping,
+    "PodspecMapping": podspec.PodspecMapping,
 EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = {
diff --git a/swh/indexer/metadata_dictionary/ b/swh/indexer/metadata_dictionary/
new file mode 100644
index 00000000..6e302f03
--- /dev/null
+++ b/swh/indexer/metadata_dictionary/
@@ -0,0 +1,113 @@
+import ast
+import itertools
+import os.path
+import re
+from typing import List
+from rdflib import Graph, Literal, URIRef
+from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
+from swh.indexer.metadata_dictionary.base import DirectoryLsEntry
+from swh.indexer.namespaces import CODEMETA, SCHEMA
+from import Sha1
+from .base import DictMapping, SingleFileIntrinsicMapping
+PODSPEC_TABLE_PATH = os.path.join(_DATA_DIR, "podspec.csv")
+with open(PODSPEC_TABLE_PATH) as fd:
+    (CODEMETA_TERMS, PODSPEC_TABLE) = _read_crosstable(fd)
+class PodspecMapping(DictMapping, SingleFileIntrinsicMapping):
+    """
+    dedicated class for Podspec mapping and translation
+    """
+    name = "podspec"
+    mapping = PODSPEC_TABLE["Podspec"]
+    string_fields = [
+        "description",
+        "name",
+        "softwareVersion",
+    ]
+    _re_spec_new = re.compile(r".* +(do|\{) +\|.*\|.*")
+    _re_spec_entry = re.compile(r"\s*\w+\.(?P<key>\w+)\s*=\s*(?P<expr>.*)")
+    @classmethod
+    def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
+        for entry in file_entries:
+            if entry["name"].endswith(b".podspec"):
+                return [entry["sha1"]]
+        return []
+    def translate(self, raw_content):
+        try:
+            raw_content = raw_content.decode()
+        except UnicodeDecodeError:
+            self.log.warning("Error unidecoding from %s", self.log_suffix)
+            return
+        lines = itertools.dropwhile(
+            lambda x: not self._re_spec_new.match(x), raw_content.split("\n")
+        )
+        try:
+            next(lines)
+        except StopIteration:
+            self.log.warning("Could not find Pod::Specification in %s", self.log_suffix)
+            return
+        content_dict = {}
+        for line in lines:
+            match = self._re_spec_entry.match(line)
+            if match:
+                value = self.eval_podspec_expression("expr"))
+                if value:
+                    content_dict["key")] = value
+        return self._translate_dict(content_dict)
+    def eval_podspec_expression(self, expr):
+        def evaluator(node):
+            if isinstance(node, ast.Str):
+                return node.s
+            elif isinstance(node, ast.List):
+                res = []
+                for element in node.elts:
+                    val = evaluator(element)
+                    if not val:
+                        return
+                    res.append(val)
+                return res
+        expr = expr.replace(".freeze", "")
+        try:
+            tree = ast.parse(expr, mode="eval")
+        except (SyntaxError, ValueError):
+            return
+        if isinstance(tree, ast.Expression):
+            return evaluator(tree.body)
+    def translate_summary(self, graph: Graph, root, s):
+        if isinstance(s, str):
+            graph.add((root, SCHEMA.description, Literal(s)))
+    def parse_enum(self, enum_string):
+        if enum_string.startswith("{"):
+            items = enum_string.strip("{ }\n").split(",")
+            parsed = {}
+            for item in items:
+                parsed[item.split("=>")[0].strip("\n ")] = item.split("=>")[1].strip(
+                    "\n "
+                )[1:-1]
+            return parsed
+    def translate_source(self, graph: Graph, root, s):
+        if isinstance(s, str):
+            parsed = self.parse_enum(s)
+            if parsed:
+                if ":git" in parsed:
+                    s = parsed[":git"]
+            graph.add((root, CODEMETA.codeRepository, URIRef(s)))
diff --git a/swh/indexer/tests/metadata_dictionary/ b/swh/indexer/tests/metadata_dictionary/
new file mode 100644
index 00000000..40412d24
--- /dev/null
+++ b/swh/indexer/tests/metadata_dictionary/
@@ -0,0 +1,65 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+from swh.indexer.metadata_dictionary import MAPPINGS
+def test_compute_metadata_podspec():
+    raw_content = b""" do |spec|
+          = 'Reachability'
+  spec.version       = '3.1.0'
+  spec.license       = { :type => 'BSD' }
+  spec.homepage      = ''
+  spec.authors       = { 'Tony Million' => '' }
+  spec.summary       = 'ARC and GCD Compatible Reachability Class for iOS and OS X.'
+  spec.source        = { :git => '' }
+  spec.module_name   = 'Rich'
+  spec.swift_version = '4.0'
+  spec.ios.deployment_target  = '9.0'
+  spec.osx.deployment_target  = '10.10'
+  spec.source_files       = 'Reachability/common/*.swift'
+  spec.ios.source_files   = 'Reachability/ios/*.swift', 'Reachability/extensions/*.swift'
+  spec.osx.source_files   = 'Reachability/osx/*.swift'
+  spec.framework      = 'SystemConfiguration'
+  spec.ios.framework  = 'UIKit'
+  spec.osx.framework  = 'AppKit'
+  spec.dependency 'SomeOtherPod'
+    result = MAPPINGS["PodspecMapping"]().translate(raw_content)
+    expected = {
+        "@context": "",
+        "type": "SoftwareSourceCode",
+        "author": [
+            {"type": "Person", "name": "Tony Million", "email": ""}
+        ],
+        "description": "ARC and GCD Compatible Reachability Class for iOS and OS X.",
+        "url": "",
+        "codeRepository": "",
+        "name": "Reachability",
+        "softwareVersion": "3.1.0",
+    }
+    assert result == expected
+def test_parse_enum():
+    raw_content = """{
+        :git => '',
+        :commit => 'd8ce9f9c301d021a69953134185ab728c1c248d3'
+        }
+    """
+    expected = {
+        ":git": "",
+        ":commit": "d8ce9f9c301d021a69953134185ab728c1c248d3",
+    }
+    result = MAPPINGS["PodspecMapping"]().parse_enum(raw_content)
+    assert result == expected
diff --git a/swh/indexer/tests/ b/swh/indexer/tests/
index 6bbab408..242be195 100644
--- a/swh/indexer/tests/
+++ b/swh/indexer/tests/
@@ -117,10 +117,13 @@ def test_cli_mapping_list(cli_runner, swh_config):
+            "podspec",
         ]  # must be sorted for test to pass
+    # "podspec",
     assert result.exit_code == 0, result.output
     assert result.output == expected_output